Spaces:
Runtime error
Runtime error
File size: 14,092 Bytes
ce190ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 |
output_path: /miniscratch/_groups/ccai/trash
# README on load_path
# 1/ any path which leads to a dir will be loaded as `path / checkpoints / latest_ckpt.pth`
# 2/ if you want to specify a specific checkpoint, it MUST be a `.pth` file
# 3/ resuming a P OR an M model, you may only specify 1 of `load_path.p` OR `load_path.m`.
# You may also leave BOTH at none, in which case `output_path / checkpoints / latest_ckpt.pth`
# will be used
# 4/ resuming a P+M model, you may specify (`p` AND `m`) OR `pm` OR leave all at none,
# in which case `output_path / checkpoints / latest_ckpt.pth` will be used to load from
# a single checkpoint
load_paths:
p: none # Painter weights: none will use `output_path / checkpoints / latest_ckpt.pth`
m: none # Masker weights: none will use `output_path / checkpoints / latest_ckpt.pth`
pm: none # Painter and Masker weights: none will use `output_path / checkpoints / latest_ckpt.pth`
# -------------------
# ----- Tasks -----
# -------------------
tasks: [d, s, m, p] # [p] [m, s, d]
# ----------------
# ----- Data -----
# ----------------
data:
max_samples: -1 # -1 for all, otherwise set to an int to crop the training data size
files: # if one is not none it will override the dirs location
base: /miniscratch/_groups/ccai/data/jsons
train:
r: train_r_full.json
s: train_s_fixedholes.json
rf: train_rf.json
kitti: train_kitti.json
val:
r: val_r_full.json
s: val_s_fixedholes.json
rf: val_rf_labelbox.json
kitti: val_kitti.json
check_samples: False
loaders:
batch_size: 6
num_workers: 6
normalization: default # can be "default" or "HRNet" for now. # default: mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]; HRNet: mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
transforms:
- name: hflip
ignore: val
p: 0.5
- name: resize
ignore: false
new_size: 640
keep_aspect_ratio: true # smallest dimension will be `new_size` and the other will be computed to keep aspect ratio
- name: crop
ignore: false
center: val # disable randomness, crop around the image's center
height: 600
width: 600
- name: brightness
ignore: val
- name: saturation
ignore: val
- name: contrast
ignore: val
- name: resize
ignore: false
new_size:
default: 640
d: 160
s: 160
# ---------------------
# ----- Generator -----
# ---------------------
gen:
opt:
optimizer: ExtraAdam # one in [Adam, ExtraAdam] default: Adam
beta1: 0.9
lr:
default: 0.00005 # 0.00001 for dlv2, 0.00005 for dlv3
lr_policy: step
# lr_policy can be constant, step or multi_step; if step, specify lr_step_size and lr_gamma
# if multi_step specify lr_step_size lr_gamma and lr_milestones:
# if lr_milestones is a list:
# the learning rate will be multiplied by gamma each time the epoch reaches an
# item in the list (no need for lr_step_size).
# if lr_milestones is an int:
# a list of milestones is created from `range(lr_milestones, train.epochs, lr_step_size)`
lr_step_size: 5 # for linear decay : period of learning rate decay (epochs)
lr_milestones: 15
lr_gamma: 0.5 # Multiplicative factor of learning rate decay
default:
&default-gen # default parameters for the generator (encoder and decoders)
activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh]
init_gain: 0.02
init_type: xavier
n_res: 1 # number of residual blocks before upsampling
n_downsample: &n_downsample 3 # number of downsampling layers in encoder | dim 32 + down 3 => z = 256 x 32 x 32
n_upsample: *n_downsample # upsampling in spade decoder ; should match encoder.n_downsample
pad_type: reflect # padding type [zero/reflect]
norm: spectral # ResBlock normalization ; one of {"batch", "instance", "layer", "adain", "spectral", "none"}
proj_dim: 32 # Dim of projection from latent space
encoder: # specific params for the encoder
<<: *default-gen
dim: 32
architecture: deeplabv3 # [deeplabv2/v3 resnet -> res_dim=2048) | dlv3 mobilenet -> res_dim=320
input_dim: 3 # input number of channels
n_res: 0 # number of residual blocks in content encoder/decoder
norm: spectral # ConvBlock normalization ; one of {"batch", "instance", "layer", "adain", "spectral", "none"}
#! Don't change!!!
deeplabv2:
nblocks: [3, 4, 23, 3]
use_pretrained: True
pretrained_model: "/miniscratch/_groups/ccai/data/pretrained_models/deeplabv2/DeepLab_resnet_pretrained_imagenet.pth"
deeplabv3:
backbone: resnet # resnet or mobilenet
output_stride: 8 # 8 or 16
use_pretrained: true
pretrained_model:
mobilenet: "/miniscratch/_groups/ccai/data/pretrained_models/deeplabv3/deeplabv3_plus_mobilenetv2_segmentron.pth"
resnet: "/miniscratch/_groups/ccai/data/pretrained_models/deeplabv3/model_CoinCheungDeepLab-v3-plus.pth"
d: # specific params for the depth estimation decoder
<<: *default-gen
output_dim: 1
norm: batch
loss: sigm # dada or sigm | /!\ ignored if classify.enable
upsample_featuremaps: True # upsamples from 80x80 to 160x160 intermediate feature maps
architecture: dada # dada or base | must be base for classif
classify: # classify log-depth instead of regression
enable: False
linspace:
min: 0.35
max: 6.95
buckets: 256
s: # specific params for the semantic segmentation decoder
<<: *default-gen
num_classes: 11
output_dim: 11
use_advent: True
use_minent: True
architecture: deeplabv3
upsample_featuremaps: False # upsamples from 80x80 to 160x160 intermediate feature maps
use_dada: True
p: # specific params for the SPADE painter
<<: *default-gen
latent_dim: 640
loss: gan # gan or hinge
no_z: true # <=> use_vae=False in the SPADE repo
output_dim: 3 # output dimension
pad_type: reflect # padding type [zero/reflect]
paste_original_content: True # only select the water painted to backprop through the network, not the whole generated image: fake_flooded = masked_x + m * fake_flooded
pl4m_epoch: 49 # epoch from which we introduce a new loss to the masker: the painter's discriminator's loss
spade_kernel_size: 3 # kernel size within SPADE norm layers
spade_n_up: 7 # number of upsampling layers in the translation decoder is equal to number of downsamplings in the encoder. output's h and w are z's h and w x 2^spade_num_upsampling_layers | z:32 and spade_n_up:4 => output 512
spade_param_free_norm: instance # what param-free normalization to apply in SPADE normalization
spade_use_spectral_norm: true
use_final_shortcut: False # if true, the last spade block does not get the masked input as conditioning but the prediction of the previous layer (passed through a conv to match dims) in order to lighten the masking restrictions and have smoother edges
diff_aug:
use: False
do_color_jittering: false
do_cutout: false
cutout_ratio: 0.5
do_translation: false
translation_ratio: 0.125
m: # specific params for the mask-generation decoder
<<: *default-gen
use_spade: False
output_dim: 1
use_minent: True # directly minimize the entropy of the image
use_minent_var: True # add variance of entropy map in the measure of entropy for a certain picture
use_advent: True # minimize the entropy of the image by adversarial training
use_ground_intersection: True
use_proj: True
proj_dim: 64
use_pl4m: False
n_res: 3
use_low_level_feats: True
use_dada: False
spade:
latent_dim: 128
detach: false # detach s_pred and d_pred conditioning tensors
cond_nc: 15 # 12 without x, 15 with x
spade_use_spectral_norm: True
spade_param_free_norm: batch
num_layers: 3
activations:
all_lrelu: True
# -------------------------
# ----- Discriminator -----
# -------------------------
dis:
soft_shift: 0.2 # label smoothing: real in U(1-soft_shift, 1), fake in U(0, soft_shift) # ! one-sided label smoothing
flip_prob: 0.05 # label flipping
opt:
optimizer: ExtraAdam # one in [Adam, ExtraAdam] default: Adam
beta1: 0.5
lr:
default: 0.00002 # 0.0001 for dlv2, 0.00002 for dlv3
lr_policy: step
# lr_policy can be constant, step or multi_step; if step, specify lr_step_size and lr_gamma
# if multi_step specify lr_step_size lr_gamma and lr_milestones:
# if lr_milestones is a list:
# the learning rate will be multiplied by gamma each time the epoch reaches an
# item in the list (no need for lr_step_size).
# if lr_milestones is an int:
# a list of milestones is created from `range(lr_milestones, train.epochs, lr_step_size)`
lr_step_size: 15 # for linear decay : period of learning rate decay (epochs)
lr_milestones: 5
lr_gamma: 0.5 # Multiplicative factor of learning rate decay
default:
&default-dis # default setting for discriminators (there are 4 of them for rn rf sn sf)
input_nc: 3
ndf: 64
n_layers: 4
norm: instance
init_type: xavier
init_gain: 0.02
use_sigmoid: false
num_D: 1 #Number of discriminators to use (>1 means multi-scale)
get_intermediate_features: false
p:
<<: *default-dis
num_D: 3
get_intermediate_features: true
use_local_discriminator: false
# ttur: false # two time-scale update rule (see SPADE repo)
m:
<<: *default-dis
multi_level: false
architecture: base # can be [base | OmniDiscriminator]
gan_type: WGAN_norm # can be [GAN | WGAN | WGAN_gp | WGAN_norm]
wgan_clamp_lower: -0.01 # used in WGAN, WGAN clap the params in dis to [wgan_clamp_lower, wgan_clamp_upper] for every update
wgan_clamp_upper: 0.01 # used in WGAN
s:
<<: *default-dis
gan_type: WGAN_norm # can be [GAN | WGAN | WGAN_gp | WGAN_norm]
wgan_clamp_lower: -0.01 # used in WGAN, WGAN clap the params in dis to [wgan_clamp_lower, wgan_clamp_upper] for every update
wgan_clamp_upper: 0.01 # used in WGAN
# -------------------------------
# ----- Domain Classifier -----
# -------------------------------
classifier:
opt:
optimizer: ExtraAdam # one in [Adam, ExtraAdam] default: Adam
beta1: 0.5
lr:
default: 0.0005
lr_policy: step # constant or step ; if step, specify step_size and gamma
lr_step_size: 30 # for linear decay
lr_gamma: 0.5
loss: l2 #Loss can be l1, l2, cross_entropy. default cross_entropy
layers: [100, 100, 20, 20, 4] # number of units per hidden layer ; las number is output_dim
dropout: 0.4 # probability of being set to 0
init_type: kaiming
init_gain: 0.2
proj_dim: 128 #Dim of projection from latent space
# ------------------------
# ----- Train Params -----
# ------------------------
train:
kitti:
pretrain: False
epochs: 10
batch_size: 6
amp: False
pseudo:
tasks: [] # list of tasks for which to use pseudo labels (empty list to disable)
epochs: 10 # disable pseudo training after n epochs (set to -1 to never disable)
epochs: 300
fid:
n_images: 57 # val_rf.json has 57 images
batch_size: 50 # inception inference batch size, not painter's
dims: 2048 # what Inception bock to compute the stats from (see BLOCK_INDEX_BY_DIM in fid.py)
latent_domain_adaptation: False # whether or not to do domain adaptation on the latent vectors # Needs to be turned off if use_advent is True
lambdas: # scaling factors in the total loss
G:
d:
main: 1
gml: 0.5
s:
crossent: 1
crossent_pseudo: 0.001
minent: 0.001
advent: 0.001
m:
bce: 1 # Main prediction loss, i.e. GAN or BCE
tv: 1 # Total variational loss (for smoothing)
gi: 0.05
pl4m: 1 # painter loss for the masker (end-to-end)
p:
context: 0
dm: 1 # depth matching
featmatch: 10
gan: 1 # gan loss
reconstruction: 0
tv: 0
vgg: 10
classifier: 1
C: 1
advent:
ent_main: 0.5 # the coefficient of the MinEnt loss that directly minimize the entropy of the image
ent_aux: 0.0 # the corresponding coefficient of the MinEnt loss of second output
ent_var: 0.1 # the proportion of variance of entropy map in the entropy measure for a certain picture
adv_main: 1.0 # the coefficient of the AdvEnt loss that minimize the entropy of the image by adversarial training
adv_aux: 0.0 # the corresponding coefficient of the AdvEnt loss of second output
dis_main: 1.0 # the discriminator take care of the first output in the adversarial training
dis_aux: 0.0 # the discriminator take care of the second output in the adversarial training
WGAN_gp: 10 # used in WGAN_gp, it's the hyperparameters for the gradient penalty
log_level: 2 # 0: no log, 1: only aggregated losses, >1 detailed losses
save_n_epochs: 25 # Save `latest_ckpt.pth` every epoch, `epoch_{epoch}_ckpt.pth` model every n epochs if epoch >= min_save_epoch
min_save_epoch: 28 # Save extra intermediate checkpoints when epoch > min_save_epoch
resume: false # Load latest_ckpt.pth checkpoint from `output_path` #TODO Make this path of checkpoint to load
auto_resume: true # automatically looks for similar output paths and exact same jobID to resume training automatically even if resume is false.
# -----------------------------
# ----- Validation Params -----
# -----------------------------
val:
store_images: false # write to disk on top of comet logging
val_painter: /miniscratch/_groups/ccai/checkpoints/painter/victor/good_large_lr/checkpoints/latest_ckpt.pth
# -----------------------------
# ----- Comet Params ----------
# -----------------------------
comet:
display_size: 20
rows_per_log: 5 # number of samples (rows) in a logged grid image. Number of total logged images: display_size // rows_per_log
im_per_row: # how many columns (3 = x, target, pred)
p: 4
m: 6
s: 4
d: 4
|