Spaces:
Runtime error
Runtime error
File size: 5,872 Bytes
ad7bc89 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
model:
base_learning_rate: 1.0e-4
target: sgm.models.diffusion.DiffusionEngine
params:
scale_factor: 0.13025
disable_first_stage_autocast: True
trainkeys: pose
multiplier: 0.05
loss_rgb_lambda: 5
loss_fg_lambda: 10
loss_bg_lambda: 10
log_keys:
- txt
denoiser_config:
target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
params:
num_idx: 1000
weighting_config:
target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
scaling_config:
target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
discretization_config:
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
network_config:
target: sgm.modules.diffusionmodules.openaimodel.UNetModel
params:
adm_in_channels: 2816
num_classes: sequential
use_checkpoint: False
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [4, 2]
num_res_blocks: 2
channel_mult: [1, 2, 4]
num_head_channels: 64
use_linear_in_transformer: True
transformer_depth: [1, 2, 10]
context_dim: 2048
spatial_transformer_attn_type: softmax-xformers
image_cross_blocks: [0, 2, 4, 6, 8, 10]
rgb: True
far: 2
num_samples: 24
not_add_context_in_triplane: False
rgb_predict: True
add_lora: False
average: False
use_prev_weights_imp_sample: True
stratified: True
imp_sampling_percent: 0.9
conditioner_config:
target: sgm.modules.GeneralConditioner
params:
emb_models:
# crossattn cond
- is_trainable: False
input_keys: txt,txt_ref
target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
params:
layer: hidden
layer_idx: 11
modifier_token: <new1>
# crossattn and vector cond
- is_trainable: False
input_keys: txt,txt_ref
target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder
params:
arch: ViT-bigG-14
version: laion2b_s39b_b160k
layer: penultimate
always_return_pooled: True
legacy: False
modifier_token: <new1>
# vector cond
- is_trainable: False
input_keys: original_size_as_tuple,original_size_as_tuple_ref
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256 # multiplied by two
# vector cond
- is_trainable: False
input_keys: crop_coords_top_left,crop_coords_top_left_ref
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256 # multiplied by two
# vector cond
- is_trainable: False
input_keys: target_size_as_tuple,target_size_as_tuple_ref
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256 # multiplied by two
first_stage_config:
target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
params:
ckpt_path: pretrained-models/sdxl_vae.safetensors
embed_dim: 4
monitor: val/rec_loss
ddconfig:
attn_type: vanilla-xformers
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [1, 2, 4, 4]
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
loss_fn_config:
target: sgm.modules.diffusionmodules.loss.StandardDiffusionLossImgRef
params:
sigma_sampler_config:
target: sgm.modules.diffusionmodules.sigma_sampling.CubicSampling
params:
num_idx: 1000
discretization_config:
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
sigma_sampler_config_ref:
target: sgm.modules.diffusionmodules.sigma_sampling.DiscreteSampling
params:
num_idx: 50
discretization_config:
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
sampler_config:
target: sgm.modules.diffusionmodules.sampling.EulerEDMSampler
params:
num_steps: 50
discretization_config:
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
guider_config:
target: sgm.modules.diffusionmodules.guiders.VanillaCFGImgRef
params:
scale: 7.5
data:
target: sgm.data.data_co3d.CustomDataDictLoader
params:
batch_size: 1
num_workers: 4
category: teddybear
img_size: 512
skip: 2
num_images: 5
mask_images: True
single_id: 0
bbox: True
addreg: True
drop_ratio: 0.25
drop_txt: 0.1
modifier_token: <new1>
lightning:
modelcheckpoint:
params:
every_n_train_steps: 1600
save_top_k: -1
save_on_train_epoch_end: False
callbacks:
metrics_over_trainsteps_checkpoint:
params:
every_n_train_steps: 25000
image_logger:
target: main.ImageLogger
params:
disabled: False
enable_autocast: False
batch_frequency: 5000
max_images: 8
increase_log_steps: False
log_first_step: False
log_images_kwargs:
use_ema_scope: False
N: 1
n_rows: 2
trainer:
devices: 0,1,2,3
benchmark: True
num_sanity_val_steps: 0
accumulate_grad_batches: 1
max_steps: 1610
# val_check_interval: 400
|