Soutrik commited on
Commit
cf754af
1 Parent(s): cbce8d4

added: hyra config copied from template

Browse files
.gitignore CHANGED
@@ -20,4 +20,4 @@ app/core/__pycache__/
20
  src/__pycache__/test_infra.cpython-310.pyc
21
  app/core/__pycache__/config.cpython-310.pyc
22
  data/
23
- /data
 
20
  src/__pycache__/test_infra.cpython-310.pyc
21
  app/core/__pycache__/config.cpython-310.pyc
22
  data/
23
+ !configs/data/
.project-root ADDED
File without changes
configs/callbacks/default.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - model_checkpoint
3
+ - early_stopping
4
+ - model_summary
5
+ - rich_progress_bar
6
+ - _self_
7
+
8
+ model_checkpoint:
9
+ dirpath: ${paths.ckpt_dir}
10
+ monitor: "val_loss"
11
+ mode: "min"
12
+ save_last: False
13
+ auto_insert_metric_name: False
14
+
15
+ early_stopping:
16
+ monitor: "val_loss"
17
+ patience: 3
18
+ mode: "min"
19
+
20
+ model_summary:
21
+ max_depth: -1
configs/callbacks/early_stopping.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.EarlyStopping.html
2
+
3
+ early_stopping:
4
+ _target_: lightning.pytorch.callbacks.EarlyStopping
5
+ monitor: val_loss # quantity to be monitored, must be specified !!!
6
+ min_delta: 0. # minimum change in the monitored quantity to qualify as an improvement
7
+ patience: 3 # number of checks with no improvement after which training will be stopped
8
+ verbose: False # verbosity mode
9
+ mode: "min" # "max" means higher metric value is better, can be also "min"
10
+ strict: True # whether to crash the training if monitor is not found in the validation metrics
11
+ check_finite: True # when set True, stops training when the monitor becomes NaN or infinite
12
+ stopping_threshold: null # stop training immediately once the monitored quantity reaches this threshold
13
+ divergence_threshold: null # stop training as soon as the monitored quantity becomes worse than this threshold
14
+ check_on_train_epoch_end: null # whether to run early stopping at the end of the training epoch
15
+ # log_rank_zero_only: False # this keyword argument isn't available in stable version
configs/callbacks/model_checkpoint.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html
2
+
3
+ model_checkpoint:
4
+ _target_: lightning.pytorch.callbacks.ModelCheckpoint
5
+ dirpath: null # directory to save the model file
6
+ filename: best-checkpoint # checkpoint filename
7
+ monitor: val_loss # name of the logged metric which determines when model is improving
8
+ verbose: False # verbosity mode
9
+ save_last: False # additionally always save an exact copy of the last checkpoint to a file last.ckpt
10
+ save_top_k: 1 # save k best models (determined by above metric)
11
+ mode: "min" # "max" means higher metric value is better, can be also "min"
12
+ auto_insert_metric_name: True # when True, the checkpoints filenames will contain the metric name
13
+ save_weights_only: False # if True, then only the model’s weights will be saved
14
+ every_n_train_steps: null # number of training steps between checkpoints
15
+ train_time_interval: null # checkpoints are monitored at the specified time interval
16
+ every_n_epochs: null # number of epochs between checkpoints
17
+ save_on_train_epoch_end: null # whether to run checkpointing at the end of the training epoch or the end of validation
configs/callbacks/model_summary.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ model_summary:
2
+ _target_: lightning.pytorch.callbacks.RichModelSummary
3
+ max_depth: 2
configs/callbacks/rich_progress_bar.yaml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ rich_progress_bar:
2
+ _target_: lightning.pytorch.callbacks.RichProgressBar
configs/data/catdog.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: src.datamodules.catdog_datamodule.CatDogImageDataModule
2
+
3
+ data_dir: ${paths.data_dir}
4
+ url: ${paths.data_url}
5
+ num_workers: 4
6
+ batch_size: 32
7
+ train_val_split: [0.8, 0.2]
8
+ pin_memory: False
9
+ image_size: 160
10
+ dataset_url: "https://download.pytorch.org/tutorials/cats_and_dogs_filtered.zip"
configs/experiment/catdog_experiment.yaml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # to execute this experiment run:
4
+ # python train.py experiment=catdog_ex
5
+
6
+ defaults:
7
+ - override /paths: catdog
8
+ - override /data: catdog
9
+ - override /model: catdog_classifier
10
+ - override /callbacks: default
11
+ - override /logger: default
12
+ - override /trainer: default
13
+
14
+ # all parameters below will be merged with parameters from default configurations set above
15
+ # this allows you to overwrite only specified parameters
16
+
17
+ seed: 42
18
+ name: "catdog_experiment"
19
+
20
+ data:
21
+ batch_size: 64
22
+ num_workers: 8
23
+ pin_memory: True
24
+ image_size: 160
25
+
26
+ model:
27
+ lr: 1e-3
28
+ weight_decay: 1e-5
29
+ factor: 0.1
30
+ patience: 10
31
+ min_lr: 1e-6
32
+ num_classes: 2
33
+ patch_size: 16
34
+ embed_dim: 64
35
+ depth: 6
36
+ num_heads: 2
37
+ mlp_ratio: 3
38
+
39
+ trainer:
40
+ min_epochs: 1
41
+ max_epochs: 6
42
+
43
+ callbacks:
44
+ model_checkpoint:
45
+ monitor: "val_acc"
46
+ mode: "max"
47
+ save_top_k: 1
48
+ save_last: True
49
+
50
+ early_stopping:
51
+ monitor: "val_acc"
52
+ patience: 10
53
+ mode: "max"
configs/experiment/catdog_experiment_convnext.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # to execute this experiment run:
4
+ # python train.py experiment=catdog_ex
5
+
6
+ defaults:
7
+ - override /paths: catdog
8
+ - override /data: catdog
9
+ - override /model: catdog_classifier_convnext
10
+ - override /callbacks: default
11
+ - override /logger: default
12
+ - override /trainer: default
13
+
14
+ # all parameters below will be merged with parameters from default configurations set above
15
+ # this allows you to overwrite only specified parameters
16
+
17
+ seed: 42
18
+ name: "catdog_experiment_convnext"
19
+
20
+ # Logger-specific configurations
21
+ logger:
22
+ aim:
23
+ experiment: ${name}
24
+ mlflow:
25
+ experiment_name: ${name}
26
+ tags:
27
+ model_type: "timm_classify"
28
+
29
+ data:
30
+ batch_size: 64
31
+ num_workers: 8
32
+ pin_memory: True
33
+ image_size: 160
34
+
35
+ model:
36
+ base_model: convnext_tiny.fb_in22k_ft_in1k
37
+ pretrained: True
38
+ lr: 1e-3
39
+ weight_decay: 1e-5
40
+ factor: 0.1
41
+ patience: 5
42
+ min_lr: 1e-6
43
+ num_classes: 2
44
+ kernel_sizes: 7
45
+
46
+ trainer:
47
+ min_epochs: 1
48
+ max_epochs: 3
49
+
50
+ callbacks:
51
+ model_checkpoint:
52
+ monitor: "val_acc"
53
+ mode: "max"
54
+ save_top_k: 1
55
+ save_last: True
56
+
57
+ early_stopping:
58
+ monitor: "val_acc"
59
+ patience: 3
60
+ mode: "max"
configs/hydra/default.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://hydra.cc/docs/configure_hydra/intro/
2
+
3
+ # enable color logging
4
+ defaults:
5
+ - override hydra_logging: colorlog
6
+ - override job_logging: colorlog
7
+
8
+ # output directory, generated dynamically on each run
9
+ run:
10
+ dir: ${paths.log_dir}/${task_name}/runs/${now:%Y-%m-%d}_${now:%H-%M-%S}
11
+ sweep:
12
+ dir: ${paths.log_dir}/${task_name}/multiruns/${now:%Y-%m-%d}_${now:%H-%M-%S}
13
+ subdir: ${hydra.job.num}
14
+
15
+ job_logging:
16
+ handlers:
17
+ file:
18
+ # Incorporates fix from https://github.com/facebookresearch/hydra/pull/2242
19
+ filename: ${hydra.runtime.output_dir}/${task_name}.log
configs/infer.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # specify here default configuration
4
+ # order of defaults determines the order in which configs override each other
5
+ defaults:
6
+ - _self_
7
+ - data: dogbreed
8
+ - model: dogbreed_classifier
9
+ - callbacks: default
10
+ - logger: null # set logger here or use command line (e.g. `python train.py logger=tensorboard`)
11
+ - trainer: default
12
+ - paths: dogbreed
13
+ - hydra: default
14
+ # experiment configs allow for version control of specific hyperparameters
15
+ # e.g. best hyperparameters for given model and datamodule
16
+ - experiment: dogbreed_experiment
17
+ # debugging config (enable through command line, e.g. `python train.py debug=default)
18
+ - debug: null
19
+
20
+ # task name, determines output directory path
21
+ task_name: "infer"
22
+
23
+ # tags to help you identify your experiments
24
+ # you can overwrite this in experiment configs
25
+ # overwrite from command line with `python train.py tags="[first_tag, second_tag]"`
26
+ tags: ["dev"]
27
+
28
+ # set False to skip model training
29
+ train: False
30
+
31
+ # evaluate on test set, using best model weights achieved during training
32
+ # lightning chooses best weights based on the metric specified in checkpoint callback
33
+ test: False
34
+
35
+ # simply provide checkpoint path to resume training
36
+ ckpt_path: ${paths.ckpt_dir}/best-checkpoint.ckpt
37
+
38
+ # seed for random number generators in pytorch, numpy and python.random
39
+ seed: 42
40
+
41
+ # name of the experiment
42
+ name: "dogbreed_experiment"
configs/logger/aim.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ aim:
2
+ __target__: aim.pytorch_lightning.AimLogger
3
+ experiment: ${name}
4
+ train_metric_prefix: train_
5
+ test_metric_prefix: test_
6
+ val_metric_prefix: val_
configs/logger/csv.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # csv logger built in lightning
2
+
3
+ csv:
4
+ _target_: lightning.pytorch.loggers.csv_logs.CSVLogger
5
+ save_dir: "${paths.output_dir}"
6
+ name: "csv/"
7
+ prefix: ""
configs/logger/default.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # train with many loggers at once
2
+
3
+ defaults:
4
+ - csv
5
+ - tensorboard
6
+ - aim
7
+ - mlflow
configs/logger/mlflow.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # MLflow logger configuration
2
+
3
+ mlflow:
4
+ _target_: lightning.pytorch.loggers.MLFlowLogger
5
+ experiment_name: ${name}
6
+ tracking_uri: file:${paths.log_dir}/mlruns
7
+ save_dir: ${paths.log_dir}/mlruns
8
+ log_model: False
9
+ prefix: ""
configs/logger/tensorboard.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://www.tensorflow.org/tensorboard/
2
+
3
+ tensorboard:
4
+ _target_: lightning.pytorch.loggers.tensorboard.TensorBoardLogger
5
+ save_dir: "${paths.output_dir}/tensorboard/"
6
+ name: null
7
+ log_graph: False
8
+ default_hp_metric: True
9
+ prefix: ""
10
+ # version: ""
configs/model/catdog_classifier.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ _target_: src.models.catdog_classifier.ViTTinyClassifier
3
+
4
+ # model params
5
+ img_size: 160
6
+ patch_size: 16
7
+ num_classes: 2
8
+ embed_dim: 64
9
+ depth: 6
10
+ num_heads: 2
11
+ mlp_ratio: 3.0
12
+ pre_norm: False
13
+
14
+ # optimizer params
15
+ lr: 1e-3
16
+ weight_decay: 1e-5
17
+
18
+ # scheduler params
19
+ factor: 0.1
20
+ patience: 10
21
+ min_lr: 1e-6
configs/model/catdog_classifier_convnext.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ _target_: src.models.catdog_classifier_convnext.ConvNextClassifier
3
+
4
+ # model params
5
+ base_model: convnext_tiny.in12k_ft_in1k
6
+ pretrained: True
7
+ num_classes: 2
8
+ kernel_sizes: 7
9
+ # optimizer params
10
+ lr: 1e-3
11
+ weight_decay: 1e-5
12
+
13
+ # scheduler params
14
+ factor: 0.1
15
+ patience: 10
16
+ min_lr: 1e-6
configs/paths/catdog.yaml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # path to root directory
2
+ # this requires PROJECT_ROOT environment variable to exist
3
+ # you can replace it with "." if you want the root to be the current working directory
4
+ root_dir: ${oc.env:PROJECT_ROOT}
5
+
6
+ # path to data directory
7
+ data_dir: ${paths.root_dir}/data/
8
+
9
+ # path to logging directory
10
+ log_dir: ${paths.root_dir}/logs/
11
+
12
+ # path to checkpoint directory
13
+ ckpt_dir: ${paths.root_dir}/checkpoints
14
+
15
+ # path to artifact directory
16
+ artifact_dir: ${paths.root_dir}/artifacts/
17
+
18
+ # download url for the dataset
19
+ data_url: "https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip"
20
+
21
+ # path to output directory, created dynamically by hydra
22
+ # path generation pattern is specified in `configs/hydra/default.yaml`
23
+ # use it to store all files generated during the run, like ckpts and metrics
24
+ output_dir: ${hydra:runtime.output_dir}
25
+
26
+ # path to working directory
27
+ work_dir: ${hydra:runtime.cwd}
configs/test.yaml ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - _self_
3
+ - data: dogbreed
4
+ - model: dogbreed_classifier
5
+ - callbacks: default
6
+ - logger: null
7
+ - trainer: default
8
+ - paths: default # This should map to another config file if using hydra to merge
9
+
10
+ task_name: train
11
+ tags:
12
+ - dev
13
+ train: true
14
+ test: true
15
+ ckpt_path: null
16
+ seed: 42
17
+
18
+ # Ensure paths section is present
19
+ paths:
20
+ root_dir: ./ # Project root directory
21
+ data_dir: ./data # Path to your dataset
22
+ log_dir: ./logs # Path to logs directory
23
+ ckpt_dir: ./checkpoints # Path to checkpoints
24
+ artifact_dir: ./artifacts # Path to save artifacts
25
+ kaggle_dir: khushikhushikhushi/dog-breed-image-dataset # Path for Kaggle dataset
26
+
27
+ # Ensure data section is present
28
+ data:
29
+ num_workers: 4
30
+ batch_size: 32
31
+ image_size: 224
32
+ train_split: 0.8
33
+ val_split: 0.1
34
+ test_split: 0.1
configs/train.yaml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # specify here default configuration
4
+ # order of defaults determines the order in which configs override each other
5
+ defaults:
6
+ - _self_
7
+ - data: catdog
8
+ - model: catdog_classifier
9
+ - callbacks: default
10
+ - logger: null # set logger here or use command line (e.g. `python train.py logger=tensorboard`)
11
+ - trainer: default
12
+ - paths: catdog
13
+ - hydra: default
14
+
15
+ - experiment: catdog_experiment
16
+ # debugging config (enable through command line, e.g. `python train.py debug=default)
17
+ - debug: null
18
+
19
+ # task name, determines output directory path
20
+ task_name: "train"
21
+
22
+ # tags to help you identify your experiments
23
+ # you can overwrite this in experiment configs
24
+ # overwrite from command line with `python train.py tags="[first_tag, second_tag]"`
25
+ tags: ["dev"]
26
+
27
+ # set False to skip model training
28
+ train: True
29
+
30
+ # evaluate on test set, using best model weights achieved during training
31
+ # lightning chooses best weights based on the metric specified in checkpoint callback
32
+ test: False
33
+
34
+ # simply provide checkpoint path to resume training
35
+ ckpt_path: ${paths.ckpt_dir}/best-checkpoint.ckpt
36
+
37
+ # seed for random number generators in pytorch, numpy and python.random
38
+ seed: 42
39
+
40
+ # name of the experiment
41
+ name: "dogbreed_experiment"
configs/trainer/default.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: lightning.Trainer
2
+
3
+ default_root_dir: ${paths.output_dir}
4
+ min_epochs: 1
5
+ max_epochs: 6
6
+
7
+ accelerator: auto
8
+ devices: auto
9
+
10
+ # mixed precision for extra speed-up
11
+ # precision: 16
12
+
13
+ # set True to to ensure deterministic results
14
+ # makes training slower but gives more reproducibility than just setting seeds
15
+ deterministic: True
16
+
17
+ # Log every N steps in training and validation
18
+ log_every_n_steps: 10
19
+ fast_dev_run: False
logs/train/runs/2024-11-08_14-29-05/.hydra/config.yaml ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task_name: train
2
+ tags:
3
+ - dev
4
+ train: true
5
+ test: false
6
+ ckpt_path: ${paths.ckpt_dir}/best-checkpoint.ckpt
7
+ seed: 42
8
+ name: catdog_experiment
9
+ data:
10
+ _target_: src.datamodules.catdog_datamodule.CatDogImageDataModule
11
+ data_dir: ${paths.data_dir}
12
+ url: ${paths.data_url}
13
+ num_workers: 8
14
+ batch_size: 64
15
+ splits:
16
+ - 0.8
17
+ - 0.2
18
+ pin_memory: true
19
+ image_size: 160
20
+ model:
21
+ _target_: src.models.catdog_classifier.ViTTinyClassifier
22
+ img_size: 160
23
+ patch_size: 16
24
+ num_classes: 2
25
+ embed_dim: 64
26
+ depth: 6
27
+ num_heads: 2
28
+ mlp_ratio: 3
29
+ pre_norm: false
30
+ lr: 0.001
31
+ weight_decay: 1.0e-05
32
+ factor: 0.1
33
+ patience: 10
34
+ min_lr: 1.0e-06
35
+ callbacks:
36
+ model_checkpoint:
37
+ _target_: lightning.pytorch.callbacks.ModelCheckpoint
38
+ dirpath: ${paths.ckpt_dir}
39
+ filename: best-checkpoint
40
+ monitor: val_acc
41
+ verbose: false
42
+ save_last: true
43
+ save_top_k: 1
44
+ mode: max
45
+ auto_insert_metric_name: false
46
+ save_weights_only: false
47
+ every_n_train_steps: null
48
+ train_time_interval: null
49
+ every_n_epochs: null
50
+ save_on_train_epoch_end: null
51
+ early_stopping:
52
+ _target_: lightning.pytorch.callbacks.EarlyStopping
53
+ monitor: val_acc
54
+ min_delta: 0.0
55
+ patience: 10
56
+ verbose: false
57
+ mode: max
58
+ strict: true
59
+ check_finite: true
60
+ stopping_threshold: null
61
+ divergence_threshold: null
62
+ check_on_train_epoch_end: null
63
+ model_summary:
64
+ _target_: lightning.pytorch.callbacks.RichModelSummary
65
+ max_depth: -1
66
+ rich_progress_bar:
67
+ _target_: lightning.pytorch.callbacks.RichProgressBar
68
+ logger:
69
+ csv:
70
+ _target_: lightning.pytorch.loggers.csv_logs.CSVLogger
71
+ save_dir: ${paths.output_dir}
72
+ name: csv/
73
+ prefix: ''
74
+ tensorboard:
75
+ _target_: lightning.pytorch.loggers.tensorboard.TensorBoardLogger
76
+ save_dir: ${paths.output_dir}/tensorboard/
77
+ name: null
78
+ log_graph: false
79
+ default_hp_metric: true
80
+ prefix: ''
81
+ aim:
82
+ __target__: aim.pytorch_lightning.AimLogger
83
+ experiment: ${name}
84
+ train_metric_prefix: train_
85
+ test_metric_prefix: test_
86
+ val_metric_prefix: val_
87
+ mlflow:
88
+ _target_: lightning.pytorch.loggers.MLFlowLogger
89
+ experiment_name: ${name}
90
+ tracking_uri: file:${paths.log_dir}/mlruns
91
+ save_dir: ${paths.log_dir}/mlruns
92
+ log_model: false
93
+ prefix: ''
94
+ trainer:
95
+ _target_: lightning.Trainer
96
+ default_root_dir: ${paths.output_dir}
97
+ min_epochs: 1
98
+ max_epochs: 6
99
+ accelerator: auto
100
+ devices: auto
101
+ deterministic: true
102
+ log_every_n_steps: 10
103
+ fast_dev_run: false
104
+ paths:
105
+ root_dir: ${oc.env:PROJECT_ROOT}
106
+ data_dir: ${paths.root_dir}/data/
107
+ log_dir: ${paths.root_dir}/logs/
108
+ ckpt_dir: ${paths.root_dir}/checkpoints
109
+ artifact_dir: ${paths.root_dir}/artifacts/
110
+ data_url: https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip
111
+ output_dir: ${hydra:runtime.output_dir}
112
+ work_dir: ${hydra:runtime.cwd}
logs/train/runs/2024-11-08_14-29-05/.hydra/hydra.yaml ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${paths.log_dir}/${task_name}/runs/${now:%Y-%m-%d}_${now:%H-%M-%S}
4
+ sweep:
5
+ dir: ${paths.log_dir}/${task_name}/multiruns/${now:%Y-%m-%d}_${now:%H-%M-%S}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ colorlog:
72
+ (): colorlog.ColoredFormatter
73
+ format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
74
+ handlers:
75
+ console:
76
+ class: logging.StreamHandler
77
+ formatter: colorlog
78
+ stream: ext://sys.stdout
79
+ root:
80
+ level: INFO
81
+ handlers:
82
+ - console
83
+ disable_existing_loggers: false
84
+ job_logging:
85
+ version: 1
86
+ formatters:
87
+ simple:
88
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
89
+ colorlog:
90
+ (): colorlog.ColoredFormatter
91
+ format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
92
+ - %(message)s'
93
+ log_colors:
94
+ DEBUG: purple
95
+ INFO: green
96
+ WARNING: yellow
97
+ ERROR: red
98
+ CRITICAL: red
99
+ handlers:
100
+ console:
101
+ class: logging.StreamHandler
102
+ formatter: colorlog
103
+ stream: ext://sys.stdout
104
+ file:
105
+ class: logging.FileHandler
106
+ formatter: simple
107
+ filename: ${hydra.runtime.output_dir}/${task_name}.log
108
+ root:
109
+ level: INFO
110
+ handlers:
111
+ - console
112
+ - file
113
+ disable_existing_loggers: false
114
+ env: {}
115
+ mode: RUN
116
+ searchpath: []
117
+ callbacks: {}
118
+ output_subdir: .hydra
119
+ overrides:
120
+ hydra:
121
+ - hydra.mode=RUN
122
+ task: []
123
+ job:
124
+ name: catdog_datamodule
125
+ chdir: null
126
+ override_dirname: ''
127
+ id: ???
128
+ num: ???
129
+ config_name: train
130
+ env_set: {}
131
+ env_copy: []
132
+ config:
133
+ override_dirname:
134
+ kv_sep: '='
135
+ item_sep: ','
136
+ exclude_keys: []
137
+ runtime:
138
+ version: 1.3.2
139
+ version_base: '1.3'
140
+ cwd: /mnt/batch/tasks/shared/LS_root/mounts/clusters/soutrik-vm-dev/code/Users/Soutrik.Chowdhury/pytorch-template-aws
141
+ config_sources:
142
+ - path: hydra.conf
143
+ schema: pkg
144
+ provider: hydra
145
+ - path: /mnt/batch/tasks/shared/LS_root/mounts/clusters/soutrik-vm-dev/code/Users/Soutrik.Chowdhury/pytorch-template-aws/configs
146
+ schema: file
147
+ provider: main
148
+ - path: hydra_plugins.hydra_colorlog.conf
149
+ schema: pkg
150
+ provider: hydra-colorlog
151
+ - path: ''
152
+ schema: structured
153
+ provider: schema
154
+ output_dir: /mnt/batch/tasks/shared/LS_root/mounts/clusters/soutrik-vm-dev/code/Users/Soutrik.Chowdhury/pytorch-template-aws/logs/train/runs/2024-11-08_14-29-05
155
+ choices:
156
+ debug: null
157
+ experiment: catdog_experiment
158
+ hydra: default
159
+ paths: catdog
160
+ trainer: default
161
+ logger: default
162
+ callbacks: default
163
+ model: catdog_classifier
164
+ data: catdog
165
+ hydra/env: default
166
+ hydra/callbacks: null
167
+ hydra/job_logging: colorlog
168
+ hydra/hydra_logging: colorlog
169
+ hydra/hydra_help: default
170
+ hydra/help: default
171
+ hydra/sweeper: basic
172
+ hydra/launcher: basic
173
+ hydra/output: default
174
+ verbose: false
logs/train/runs/2024-11-08_14-29-05/.hydra/overrides.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ []
logs/train/runs/2024-11-08_14-29-05/train.log ADDED
File without changes
logs/train/runs/2024-11-08_14-32-38/.hydra/config.yaml ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task_name: train
2
+ tags:
3
+ - dev
4
+ train: true
5
+ test: false
6
+ ckpt_path: ${paths.ckpt_dir}/best-checkpoint.ckpt
7
+ seed: 42
8
+ name: catdog_experiment
9
+ data:
10
+ _target_: src.datamodules.catdog_datamodule.CatDogImageDataModule
11
+ data_dir: ${paths.data_dir}
12
+ url: ${paths.data_url}
13
+ num_workers: 8
14
+ batch_size: 64
15
+ train_val_split:
16
+ - 0.8
17
+ - 0.2
18
+ pin_memory: true
19
+ image_size: 160
20
+ dataset_url: https://download.pytorch.org/tutorials/cats_and_dogs_filtered.zip
21
+ model:
22
+ _target_: src.models.catdog_classifier.ViTTinyClassifier
23
+ img_size: 160
24
+ patch_size: 16
25
+ num_classes: 2
26
+ embed_dim: 64
27
+ depth: 6
28
+ num_heads: 2
29
+ mlp_ratio: 3
30
+ pre_norm: false
31
+ lr: 0.001
32
+ weight_decay: 1.0e-05
33
+ factor: 0.1
34
+ patience: 10
35
+ min_lr: 1.0e-06
36
+ callbacks:
37
+ model_checkpoint:
38
+ _target_: lightning.pytorch.callbacks.ModelCheckpoint
39
+ dirpath: ${paths.ckpt_dir}
40
+ filename: best-checkpoint
41
+ monitor: val_acc
42
+ verbose: false
43
+ save_last: true
44
+ save_top_k: 1
45
+ mode: max
46
+ auto_insert_metric_name: false
47
+ save_weights_only: false
48
+ every_n_train_steps: null
49
+ train_time_interval: null
50
+ every_n_epochs: null
51
+ save_on_train_epoch_end: null
52
+ early_stopping:
53
+ _target_: lightning.pytorch.callbacks.EarlyStopping
54
+ monitor: val_acc
55
+ min_delta: 0.0
56
+ patience: 10
57
+ verbose: false
58
+ mode: max
59
+ strict: true
60
+ check_finite: true
61
+ stopping_threshold: null
62
+ divergence_threshold: null
63
+ check_on_train_epoch_end: null
64
+ model_summary:
65
+ _target_: lightning.pytorch.callbacks.RichModelSummary
66
+ max_depth: -1
67
+ rich_progress_bar:
68
+ _target_: lightning.pytorch.callbacks.RichProgressBar
69
+ logger:
70
+ csv:
71
+ _target_: lightning.pytorch.loggers.csv_logs.CSVLogger
72
+ save_dir: ${paths.output_dir}
73
+ name: csv/
74
+ prefix: ''
75
+ tensorboard:
76
+ _target_: lightning.pytorch.loggers.tensorboard.TensorBoardLogger
77
+ save_dir: ${paths.output_dir}/tensorboard/
78
+ name: null
79
+ log_graph: false
80
+ default_hp_metric: true
81
+ prefix: ''
82
+ aim:
83
+ __target__: aim.pytorch_lightning.AimLogger
84
+ experiment: ${name}
85
+ train_metric_prefix: train_
86
+ test_metric_prefix: test_
87
+ val_metric_prefix: val_
88
+ mlflow:
89
+ _target_: lightning.pytorch.loggers.MLFlowLogger
90
+ experiment_name: ${name}
91
+ tracking_uri: file:${paths.log_dir}/mlruns
92
+ save_dir: ${paths.log_dir}/mlruns
93
+ log_model: false
94
+ prefix: ''
95
+ trainer:
96
+ _target_: lightning.Trainer
97
+ default_root_dir: ${paths.output_dir}
98
+ min_epochs: 1
99
+ max_epochs: 6
100
+ accelerator: auto
101
+ devices: auto
102
+ deterministic: true
103
+ log_every_n_steps: 10
104
+ fast_dev_run: false
105
+ paths:
106
+ root_dir: ${oc.env:PROJECT_ROOT}
107
+ data_dir: ${paths.root_dir}/data/
108
+ log_dir: ${paths.root_dir}/logs/
109
+ ckpt_dir: ${paths.root_dir}/checkpoints
110
+ artifact_dir: ${paths.root_dir}/artifacts/
111
+ data_url: https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip
112
+ output_dir: ${hydra:runtime.output_dir}
113
+ work_dir: ${hydra:runtime.cwd}
logs/train/runs/2024-11-08_14-32-38/.hydra/hydra.yaml ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${paths.log_dir}/${task_name}/runs/${now:%Y-%m-%d}_${now:%H-%M-%S}
4
+ sweep:
5
+ dir: ${paths.log_dir}/${task_name}/multiruns/${now:%Y-%m-%d}_${now:%H-%M-%S}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ colorlog:
72
+ (): colorlog.ColoredFormatter
73
+ format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
74
+ handlers:
75
+ console:
76
+ class: logging.StreamHandler
77
+ formatter: colorlog
78
+ stream: ext://sys.stdout
79
+ root:
80
+ level: INFO
81
+ handlers:
82
+ - console
83
+ disable_existing_loggers: false
84
+ job_logging:
85
+ version: 1
86
+ formatters:
87
+ simple:
88
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
89
+ colorlog:
90
+ (): colorlog.ColoredFormatter
91
+ format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
92
+ - %(message)s'
93
+ log_colors:
94
+ DEBUG: purple
95
+ INFO: green
96
+ WARNING: yellow
97
+ ERROR: red
98
+ CRITICAL: red
99
+ handlers:
100
+ console:
101
+ class: logging.StreamHandler
102
+ formatter: colorlog
103
+ stream: ext://sys.stdout
104
+ file:
105
+ class: logging.FileHandler
106
+ formatter: simple
107
+ filename: ${hydra.runtime.output_dir}/${task_name}.log
108
+ root:
109
+ level: INFO
110
+ handlers:
111
+ - console
112
+ - file
113
+ disable_existing_loggers: false
114
+ env: {}
115
+ mode: RUN
116
+ searchpath: []
117
+ callbacks: {}
118
+ output_subdir: .hydra
119
+ overrides:
120
+ hydra:
121
+ - hydra.mode=RUN
122
+ task: []
123
+ job:
124
+ name: catdog_datamodule
125
+ chdir: null
126
+ override_dirname: ''
127
+ id: ???
128
+ num: ???
129
+ config_name: train
130
+ env_set: {}
131
+ env_copy: []
132
+ config:
133
+ override_dirname:
134
+ kv_sep: '='
135
+ item_sep: ','
136
+ exclude_keys: []
137
+ runtime:
138
+ version: 1.3.2
139
+ version_base: '1.3'
140
+ cwd: /mnt/batch/tasks/shared/LS_root/mounts/clusters/soutrik-vm-dev/code/Users/Soutrik.Chowdhury/pytorch-template-aws
141
+ config_sources:
142
+ - path: hydra.conf
143
+ schema: pkg
144
+ provider: hydra
145
+ - path: /mnt/batch/tasks/shared/LS_root/mounts/clusters/soutrik-vm-dev/code/Users/Soutrik.Chowdhury/pytorch-template-aws/configs
146
+ schema: file
147
+ provider: main
148
+ - path: hydra_plugins.hydra_colorlog.conf
149
+ schema: pkg
150
+ provider: hydra-colorlog
151
+ - path: ''
152
+ schema: structured
153
+ provider: schema
154
+ output_dir: /mnt/batch/tasks/shared/LS_root/mounts/clusters/soutrik-vm-dev/code/Users/Soutrik.Chowdhury/pytorch-template-aws/logs/train/runs/2024-11-08_14-32-38
155
+ choices:
156
+ debug: null
157
+ experiment: catdog_experiment
158
+ hydra: default
159
+ paths: catdog
160
+ trainer: default
161
+ logger: default
162
+ callbacks: default
163
+ model: catdog_classifier
164
+ data: catdog
165
+ hydra/env: default
166
+ hydra/callbacks: null
167
+ hydra/job_logging: colorlog
168
+ hydra/hydra_logging: colorlog
169
+ hydra/hydra_help: default
170
+ hydra/help: default
171
+ hydra/sweeper: basic
172
+ hydra/launcher: basic
173
+ hydra/output: default
174
+ verbose: false
logs/train/runs/2024-11-08_14-32-38/.hydra/overrides.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ []
logs/train/runs/2024-11-08_14-32-38/train.log ADDED
File without changes