File size: 14,092 Bytes
ce190ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
output_path: /miniscratch/_groups/ccai/trash
# README on load_path
# 1/ any path which leads to a dir will be loaded as `path / checkpoints / latest_ckpt.pth`
# 2/ if you want to specify a specific checkpoint, it MUST be a `.pth` file
# 3/ resuming a P OR an M model, you may only specify 1 of `load_path.p` OR `load_path.m`.
#    You may also leave BOTH at none, in which case `output_path / checkpoints / latest_ckpt.pth`
#    will be used
# 4/ resuming a P+M model, you may specify (`p` AND `m`) OR `pm` OR leave all at none,
#    in which case `output_path / checkpoints / latest_ckpt.pth` will be used to load from
#    a single checkpoint
load_paths:
  p: none # Painter weights: none will use `output_path / checkpoints / latest_ckpt.pth`
  m: none # Masker weights: none will use `output_path / checkpoints / latest_ckpt.pth`
  pm: none # Painter and Masker weights: none will use `output_path / checkpoints / latest_ckpt.pth`

# -------------------
# -----  Tasks  -----
# -------------------
tasks: [d, s, m, p] # [p] [m, s, d]

# ----------------
# ----- Data -----
# ----------------
data:
  max_samples: -1 # -1 for all, otherwise set to an int to crop the training data size
  files: # if one is not none it will override the dirs location
    base: /miniscratch/_groups/ccai/data/jsons
    train:
      r: train_r_full.json
      s: train_s_fixedholes.json
      rf: train_rf.json
      kitti: train_kitti.json
    val:
      r: val_r_full.json
      s: val_s_fixedholes.json
      rf: val_rf_labelbox.json
      kitti: val_kitti.json
  check_samples: False
  loaders:
    batch_size: 6
    num_workers: 6
  normalization: default # can be "default" or "HRNet" for now. # default: mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]; HRNet: mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
  transforms:
    - name: hflip
      ignore: val
      p: 0.5
    - name: resize
      ignore: false
      new_size: 640
      keep_aspect_ratio: true # smallest dimension will be `new_size` and the other will be computed to keep aspect  ratio
    - name: crop
      ignore: false
      center: val # disable randomness, crop around the image's center
      height: 600
      width: 600
    - name: brightness
      ignore: val
    - name: saturation
      ignore: val
    - name: contrast
      ignore: val
    - name: resize
      ignore: false
      new_size:
        default: 640
        d: 160
        s: 160

# ---------------------
# ----- Generator -----
# ---------------------
gen:
  opt:
    optimizer: ExtraAdam # one in [Adam, ExtraAdam] default: Adam
    beta1: 0.9
    lr:
      default: 0.00005 # 0.00001 for dlv2, 0.00005 for dlv3
    lr_policy: step
    # lr_policy can be constant, step or multi_step; if step, specify lr_step_size and lr_gamma
    # if multi_step specify lr_step_size lr_gamma and lr_milestones:
    #   if lr_milestones is a list:
    #     the learning rate will be multiplied by gamma each time the epoch reaches an
    #     item in the list (no need for lr_step_size).
    #   if lr_milestones is an int:
    #      a list of milestones is created from `range(lr_milestones, train.epochs, lr_step_size)`
    lr_step_size: 5 # for linear decay : period of learning rate decay (epochs)
    lr_milestones: 15
    lr_gamma: 0.5 # Multiplicative factor of learning rate decay
  default:
    &default-gen # default parameters for the generator (encoder and decoders)
    activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh]
    init_gain: 0.02
    init_type: xavier
    n_res: 1 # number of residual blocks before upsampling
    n_downsample: &n_downsample 3 # number of downsampling layers in encoder | dim 32 + down 3 => z = 256 x 32 x 32
    n_upsample: *n_downsample # upsampling in spade decoder ; should match encoder.n_downsample
    pad_type: reflect # padding type [zero/reflect]
    norm: spectral # ResBlock normalization ; one of {"batch", "instance", "layer", "adain", "spectral", "none"}
    proj_dim: 32 # Dim of projection from latent space
  encoder: # specific params for the encoder
    <<: *default-gen
    dim: 32
    architecture: deeplabv3 # [deeplabv2/v3 resnet -> res_dim=2048) | dlv3 mobilenet -> res_dim=320
    input_dim: 3 # input number of channels
    n_res: 0 # number of residual blocks in content encoder/decoder
    norm: spectral # ConvBlock normalization ; one of {"batch", "instance", "layer", "adain", "spectral", "none"}

  #! Don't change!!!
  deeplabv2:
    nblocks: [3, 4, 23, 3]
    use_pretrained: True
    pretrained_model: "/miniscratch/_groups/ccai/data/pretrained_models/deeplabv2/DeepLab_resnet_pretrained_imagenet.pth"

  deeplabv3:
    backbone: resnet # resnet or mobilenet
    output_stride: 8 # 8 or 16
    use_pretrained: true
    pretrained_model:
      mobilenet: "/miniscratch/_groups/ccai/data/pretrained_models/deeplabv3/deeplabv3_plus_mobilenetv2_segmentron.pth"
      resnet: "/miniscratch/_groups/ccai/data/pretrained_models/deeplabv3/model_CoinCheungDeepLab-v3-plus.pth"

  d: # specific params for the depth estimation decoder
    <<: *default-gen
    output_dim: 1
    norm: batch
    loss: sigm # dada or sigm | /!\ ignored if classify.enable
    upsample_featuremaps: True # upsamples from 80x80 to 160x160 intermediate feature maps
    architecture: dada # dada or base | must be base for classif
    classify: # classify log-depth instead of regression
      enable: False
      linspace:
        min: 0.35
        max: 6.95
        buckets: 256
  s: # specific params for the semantic segmentation decoder
    <<: *default-gen
    num_classes: 11
    output_dim: 11
    use_advent: True
    use_minent: True
    architecture: deeplabv3
    upsample_featuremaps: False # upsamples from 80x80 to 160x160 intermediate feature maps
    use_dada: True
  p: # specific params for the SPADE painter
    <<: *default-gen
    latent_dim: 640
    loss: gan # gan or hinge
    no_z: true # <=> use_vae=False in the SPADE repo
    output_dim: 3 # output dimension
    pad_type: reflect # padding type [zero/reflect]
    paste_original_content: True # only select the water painted to backprop through the network, not the whole generated image: fake_flooded = masked_x + m * fake_flooded
    pl4m_epoch: 49 # epoch from which we introduce a new loss to the masker: the painter's discriminator's loss
    spade_kernel_size: 3 # kernel size within SPADE norm layers
    spade_n_up: 7 # number of upsampling layers in the translation decoder is equal to number of downsamplings in the encoder.  output's h and w are z's h and w x 2^spade_num_upsampling_layers | z:32 and spade_n_up:4 => output 512
    spade_param_free_norm: instance # what param-free normalization to apply in SPADE normalization
    spade_use_spectral_norm: true
    use_final_shortcut: False # if true, the last spade block does not get the masked input as conditioning but the prediction of the previous layer (passed through a conv to match dims) in order to lighten the masking restrictions and have smoother edges
    diff_aug:
      use: False
      do_color_jittering: false
      do_cutout: false
      cutout_ratio: 0.5
      do_translation: false
      translation_ratio: 0.125

  m: # specific params for the mask-generation decoder
    <<: *default-gen
    use_spade: False
    output_dim: 1
    use_minent: True # directly minimize the entropy of the image
    use_minent_var: True # add variance of entropy map in the measure of entropy for a certain picture
    use_advent: True # minimize the entropy of the image by adversarial training
    use_ground_intersection: True
    use_proj: True
    proj_dim: 64
    use_pl4m: False
    n_res: 3
    use_low_level_feats: True
    use_dada: False
    spade:
      latent_dim: 128
      detach: false # detach s_pred and d_pred conditioning tensors
      cond_nc: 15 # 12 without x, 15 with x
      spade_use_spectral_norm: True
      spade_param_free_norm: batch
      num_layers: 3
      activations:
        all_lrelu: True

# -------------------------
# ----- Discriminator -----
# -------------------------
dis:
  soft_shift: 0.2 # label smoothing: real in U(1-soft_shift, 1), fake in U(0, soft_shift) # ! one-sided label smoothing
  flip_prob: 0.05 # label flipping
  opt:
    optimizer: ExtraAdam # one in [Adam, ExtraAdam] default: Adam
    beta1: 0.5
    lr:
      default: 0.00002 # 0.0001 for dlv2, 0.00002 for dlv3
    lr_policy: step
    # lr_policy can be constant, step or multi_step; if step, specify lr_step_size and lr_gamma
    # if multi_step specify lr_step_size lr_gamma and lr_milestones:
    #   if lr_milestones is a list:
    #     the learning rate will be multiplied by gamma each time the epoch reaches an
    #     item in the list (no need for lr_step_size).
    #   if lr_milestones is an int:
    #      a list of milestones is created from `range(lr_milestones, train.epochs, lr_step_size)`
    lr_step_size: 15 # for linear decay : period of learning rate decay (epochs)
    lr_milestones: 5
    lr_gamma: 0.5 # Multiplicative factor of learning rate decay
  default:
    &default-dis # default setting for discriminators (there are 4 of them for rn rf sn sf)
    input_nc: 3
    ndf: 64
    n_layers: 4
    norm: instance
    init_type: xavier
    init_gain: 0.02
    use_sigmoid: false
    num_D: 1 #Number of discriminators to use (>1 means multi-scale)
    get_intermediate_features: false
  p:
    <<: *default-dis
    num_D: 3
    get_intermediate_features: true
    use_local_discriminator: false
    # ttur: false # two time-scale update rule (see SPADE repo)
  m:
    <<: *default-dis
    multi_level: false
    architecture: base # can be [base | OmniDiscriminator]
    gan_type: WGAN_norm # can be [GAN | WGAN | WGAN_gp | WGAN_norm]
    wgan_clamp_lower: -0.01 # used in WGAN, WGAN clap the params in dis to [wgan_clamp_lower, wgan_clamp_upper] for every update
    wgan_clamp_upper: 0.01 # used in WGAN
  s:
    <<: *default-dis
    gan_type: WGAN_norm # can be [GAN | WGAN | WGAN_gp | WGAN_norm]
    wgan_clamp_lower: -0.01 # used in WGAN, WGAN clap the params in dis to [wgan_clamp_lower, wgan_clamp_upper] for every update
    wgan_clamp_upper: 0.01 # used in WGAN
# -------------------------------
# -----  Domain Classifier  -----
# -------------------------------
classifier:
  opt:
    optimizer: ExtraAdam # one in [Adam, ExtraAdam] default: Adam
    beta1: 0.5
    lr:
      default: 0.0005
    lr_policy: step # constant or step ; if step, specify step_size and gamma
    lr_step_size: 30 # for linear decay
    lr_gamma: 0.5
  loss: l2 #Loss can be l1, l2, cross_entropy.  default cross_entropy
  layers: [100, 100, 20, 20, 4] # number of units per hidden layer ; las number is output_dim
  dropout: 0.4 # probability of being set to 0
  init_type: kaiming
  init_gain: 0.2
  proj_dim: 128 #Dim of projection from latent space

# ------------------------
# ----- Train Params -----
# ------------------------
train:
  kitti:
    pretrain: False
    epochs: 10
    batch_size: 6
  amp: False
  pseudo:
    tasks: [] # list of tasks for which to use pseudo labels (empty list to disable)
    epochs: 10 # disable pseudo training after n epochs (set to -1 to never disable)
  epochs: 300
  fid:
    n_images: 57 # val_rf.json has 57 images
    batch_size: 50 # inception inference batch size, not painter's
    dims: 2048 # what Inception bock to compute the stats from (see BLOCK_INDEX_BY_DIM in fid.py)
  latent_domain_adaptation: False # whether or not to do domain adaptation on the latent vectors # Needs to be turned off if use_advent is True
  lambdas: # scaling factors in the total loss
    G:
      d:
        main: 1
        gml: 0.5
      s:
        crossent: 1
        crossent_pseudo: 0.001
        minent: 0.001
        advent: 0.001
      m:
        bce: 1 # Main prediction loss, i.e. GAN or BCE
        tv: 1 # Total variational loss (for smoothing)
        gi: 0.05
        pl4m: 1 # painter loss for the masker (end-to-end)
      p:
        context: 0
        dm: 1 # depth matching
        featmatch: 10
        gan: 1 # gan loss
        reconstruction: 0
        tv: 0
        vgg: 10
      classifier: 1
    C: 1
    advent:
      ent_main: 0.5 # the coefficient of the MinEnt loss that directly minimize the entropy of the image
      ent_aux: 0.0 # the corresponding coefficient of the MinEnt loss of second output
      ent_var: 0.1 # the proportion of variance of entropy map in the entropy measure for a certain picture
      adv_main: 1.0 # the coefficient of the AdvEnt loss that minimize the entropy of the image by adversarial training
      adv_aux: 0.0 # the corresponding coefficient of the AdvEnt loss of second output
      dis_main: 1.0 # the discriminator take care of the first output in the adversarial training
      dis_aux: 0.0 # the discriminator take care of the second output in the adversarial training
      WGAN_gp: 10 # used in WGAN_gp, it's the hyperparameters for the gradient penalty
  log_level: 2 # 0: no log, 1: only aggregated losses, >1 detailed losses
  save_n_epochs: 25 # Save `latest_ckpt.pth` every epoch, `epoch_{epoch}_ckpt.pth` model every n epochs if epoch >= min_save_epoch
  min_save_epoch: 28 # Save extra intermediate checkpoints when epoch > min_save_epoch
  resume: false # Load latest_ckpt.pth checkpoint from `output_path` #TODO Make this path of checkpoint to load
  auto_resume: true # automatically looks for similar output paths and exact same jobID to resume training automatically even if resume is false.

# -----------------------------
# ----- Validation Params -----
# -----------------------------
val:
  store_images: false # write to disk on top of comet logging
  val_painter: /miniscratch/_groups/ccai/checkpoints/painter/victor/good_large_lr/checkpoints/latest_ckpt.pth
# -----------------------------
# ----- Comet Params ----------
# -----------------------------
comet:
  display_size: 20
  rows_per_log: 5 # number of samples (rows) in a logged grid image. Number of total logged images: display_size // rows_per_log
  im_per_row: # how many columns (3 = x, target, pred)
    p: 4
    m: 6
    s: 4
    d: 4