Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,989 Bytes
9d0d223 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
# @package __global__
defaults:
- /solver/default
- /augmentations/default
- override /dset: audio/example
- _self_
solver: watermarking # standard name to load the solver using builders
sample_rate: ???
channels: ???
# all the defaults form compression
losses:
adv: 4.
feat: 4.
l1: 0.1
mel: 0.0
msspec: 2.0
sisnr: 0.0
wm_detection: 1.0 # loss for first 2 bits cannot be 0
wm_mb: 1.0 # loss for the rest of the bits (wm message)
tf_loudnessratio: 10.0
balancer:
balance_grads: true
ema_decay: 0.999
per_batch_item: true
total_norm: 1.
crop:
prob: 0.4
shuffle_prob: 0.2
pad_prob: 0.2 # shuffle_prob + pad_prob + prob <= 1
size: 0.5
max_n_windows: 5
adversarial:
every: 1
adversaries: [msstftd]
adv_loss: hinge
feat_loss: l1
tf_loudnessratio:
sample_rate: ${sample_rate}
segment: 0.5
overlap: 0.5
n_bands: 16
temperature: 1.0
# watermarking: audioseal
# losses hyperparameters
l1: {}
l2: {}
wm_detection:
p_weight: 1
n_weight: 1
wm_mb:
loss_type: bce # loss between decoded and original
temperature: 0.1 # decoded is divided by temperature before loss computation
spec_range:
n_fft: 2048
min_frequency: 300.0
max_frequency: 15000.0
sample_rate: ${sample_rate}
spec_entropy_range:
n_fft: 2048
min_frequency: 300.0
max_frequency: 15000.0
sample_rate: ${sample_rate}
mrstft:
factor_sc: .5
factor_mag: .5
normalized: false
mel:
sample_rate: ${sample_rate}
n_fft: 1024
hop_length: 256
win_length: 1024
n_mels: 64
f_min: 64
f_max: null
normalized: false
floor_level: 1e-5
sisnr:
sample_rate: ${sample_rate}
segment: 5.
msspec:
sample_rate: ${sample_rate}
range_start: 6
range_end: 11
n_mels: 64
f_min: 64
f_max: null
normalized: true
alphas: false
floor_level: 1e-5
# metrics
metrics:
visqol:
mode: audio
bin: null # path to visqol install
model: tcdaudio14_aacvopus_coresv_svrnsim_n.68_g.01_c1.model # visqol v3
# adversaries hyperparameters
msstftd:
in_channels: 1
out_channels: 1
filters: 32
norm: weight_norm
n_ffts: [1024, 2048, 512, 256, 128]
hop_lengths: [256, 512, 128, 64, 32]
win_lengths: [1024, 2048, 512, 256, 128]
activation: LeakyReLU
activation_params: { negative_slope: 0.3 }
msd:
in_channels: 1
out_channels: 1
scale_norms: [spectral_norm, weight_norm, weight_norm]
kernel_sizes: [5, 3]
filters: 16
max_filters: 1024
downsample_scales: [4, 4, 4, 4]
inner_kernel_sizes: null
groups: [4, 4, 4, 4]
strides: null
paddings: null
activation: LeakyReLU
activation_params: { negative_slope: 0.3 }
mpd:
in_channels: 1
out_channels: 1
periods: [2, 3, 5, 7, 11]
n_layers: 5
kernel_size: 5
stride: 3
filters: 8
filter_scales: 4
max_filters: 1024
activation: LeakyReLU
activation_params: { negative_slope: 0.3 }
norm: weight_norm
# data hyperparameters
dataset:
batch_size: 16
num_workers: 10
segment_duration: 1
train:
num_samples: 500000
valid:
num_samples: 10000
evaluate:
batch_size: 16
num_samples: 10000
segment_duration: 10
generate:
batch_size: 16
num_samples: 50
segment_duration: 30
# solver hyperparameters
evaluate:
every: 10
num_workers: 5
metrics:
visqol: false
sisnr: true
generate:
every: 10
num_workers: 5
audio:
sample_rate: ${sample_rate}
# checkpointing schedule
checkpoint:
save_last: true
save_every: 25
keep_last: 10
keep_every_states: null
# optimization hyperparameters
optim:
epochs: 300
updates_per_epoch: 2000
lr: 5e-5
max_norm: 3.0
optimizer: adam
adam:
betas: [0.5, 0.9]
weight_decay: 0.
ema:
use: true # whether to use EMA or not
updates: 1 # update at every step
device: ${device} # device for EMA, can be put on GPU if more frequent updates
decay: 0.99 # EMA decay value, if null, no EMA is used
schedule:
lr_scheduler: "cosine"
cosine:
warmup: 4000
lr_min_ratio: 0.0
cycle_length: 1.0
|