|
model: |
|
base_learning_rate: 1.0e-4 |
|
target: sgm.models.diffusion.DiffusionEngine |
|
params: |
|
scale_factor: 0.13025 |
|
disable_first_stage_autocast: True |
|
log_keys: |
|
- cls |
|
|
|
scheduler_config: |
|
target: sgm.lr_scheduler.LambdaLinearScheduler |
|
params: |
|
warm_up_steps: [10000] |
|
cycle_lengths: [10000000000000] |
|
f_start: [1.e-6] |
|
f_max: [1.] |
|
f_min: [1.] |
|
|
|
denoiser_config: |
|
target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser |
|
params: |
|
num_idx: 1000 |
|
|
|
scaling_config: |
|
target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling |
|
discretization_config: |
|
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization |
|
|
|
network_config: |
|
target: sgm.modules.diffusionmodules.openaimodel.UNetModel |
|
params: |
|
use_checkpoint: True |
|
in_channels: 4 |
|
out_channels: 4 |
|
model_channels: 256 |
|
attention_resolutions: [1, 2, 4] |
|
num_res_blocks: 2 |
|
channel_mult: [1, 2, 4] |
|
num_head_channels: 64 |
|
num_classes: sequential |
|
adm_in_channels: 1024 |
|
transformer_depth: 1 |
|
context_dim: 1024 |
|
spatial_transformer_attn_type: softmax-xformers |
|
|
|
conditioner_config: |
|
target: sgm.modules.GeneralConditioner |
|
params: |
|
emb_models: |
|
- is_trainable: True |
|
input_key: cls |
|
ucg_rate: 0.2 |
|
target: sgm.modules.encoders.modules.ClassEmbedder |
|
params: |
|
add_sequence_dim: True |
|
embed_dim: 1024 |
|
n_classes: 1000 |
|
|
|
- is_trainable: False |
|
ucg_rate: 0.2 |
|
input_key: original_size_as_tuple |
|
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND |
|
params: |
|
outdim: 256 |
|
|
|
- is_trainable: False |
|
input_key: crop_coords_top_left |
|
ucg_rate: 0.2 |
|
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND |
|
params: |
|
outdim: 256 |
|
|
|
first_stage_config: |
|
target: sgm.models.autoencoder.AutoencoderKL |
|
params: |
|
ckpt_path: CKPT_PATH |
|
embed_dim: 4 |
|
monitor: val/rec_loss |
|
ddconfig: |
|
attn_type: vanilla-xformers |
|
double_z: true |
|
z_channels: 4 |
|
resolution: 256 |
|
in_channels: 3 |
|
out_ch: 3 |
|
ch: 128 |
|
ch_mult: [1, 2, 4, 4] |
|
num_res_blocks: 2 |
|
attn_resolutions: [] |
|
dropout: 0.0 |
|
lossconfig: |
|
target: torch.nn.Identity |
|
|
|
loss_fn_config: |
|
target: sgm.modules.diffusionmodules.loss.StandardDiffusionLoss |
|
params: |
|
loss_weighting_config: |
|
target: sgm.modules.diffusionmodules.loss_weighting.EpsWeighting |
|
sigma_sampler_config: |
|
target: sgm.modules.diffusionmodules.sigma_sampling.DiscreteSampling |
|
params: |
|
num_idx: 1000 |
|
|
|
discretization_config: |
|
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization |
|
|
|
sampler_config: |
|
target: sgm.modules.diffusionmodules.sampling.EulerEDMSampler |
|
params: |
|
num_steps: 50 |
|
|
|
discretization_config: |
|
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization |
|
|
|
guider_config: |
|
target: sgm.modules.diffusionmodules.guiders.VanillaCFG |
|
params: |
|
scale: 5.0 |
|
|
|
data: |
|
target: sgm.data.dataset.StableDataModuleFromConfig |
|
params: |
|
train: |
|
datapipeline: |
|
urls: |
|
|
|
- DATA_PATH |
|
pipeline_config: |
|
shardshuffle: 10000 |
|
sample_shuffle: 10000 |
|
|
|
decoders: |
|
- pil |
|
|
|
postprocessors: |
|
- target: sdata.mappers.TorchVisionImageTransforms |
|
params: |
|
key: jpg |
|
transforms: |
|
- target: torchvision.transforms.Resize |
|
params: |
|
size: 256 |
|
interpolation: 3 |
|
- target: torchvision.transforms.ToTensor |
|
- target: sdata.mappers.Rescaler |
|
|
|
- target: sdata.mappers.AddOriginalImageSizeAsTupleAndCropToSquare |
|
params: |
|
h_key: height |
|
w_key: width |
|
|
|
loader: |
|
batch_size: 64 |
|
num_workers: 6 |
|
|
|
lightning: |
|
modelcheckpoint: |
|
params: |
|
every_n_train_steps: 5000 |
|
|
|
callbacks: |
|
metrics_over_trainsteps_checkpoint: |
|
params: |
|
every_n_train_steps: 25000 |
|
|
|
image_logger: |
|
target: main.ImageLogger |
|
params: |
|
disabled: False |
|
enable_autocast: False |
|
batch_frequency: 1000 |
|
max_images: 8 |
|
increase_log_steps: True |
|
log_first_step: False |
|
log_images_kwargs: |
|
use_ema_scope: False |
|
N: 8 |
|
n_rows: 2 |
|
|
|
trainer: |
|
devices: 0, |
|
benchmark: True |
|
num_sanity_val_steps: 0 |
|
accumulate_grad_batches: 1 |
|
max_epochs: 1000 |