ldm_configs: # scheduler_config: # target: sgm.lr_scheduler.LambdaLinearScheduler # params: # warm_up_steps: [10000] # cycle_lengths: [10000000000000] # f_start: [1.e-6] # f_max: [1.] # f_min: [1.] # denoiser_config: # target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser # params: # num_idx: 1000 # scaling_config: # target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling # discretization_config: # target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization conditioner_config: target: sgm.modules.GeneralConditioner params: emb_models: - is_trainable: False input_key: caption ucg_rate: 0.1 target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2 params: always_return_pooled: True legacy: False arch: 'ViT-L-14' version: 'openai' - is_trainable: True input_key: 'img-c' ucg_rate: 0.1 # legacy_ucg_value: None target: sgm.modules.encoders.modules.FrozenDinov2ImageEmbedderMVPlucker params: freeze: False enable_bf16: True output_cls: False # return pooling # arch: vits arch: vitb inp_size: 280 n_cond_frames: 4 # first 4 views as cond modLN: False aug_c: True # - is_trainable: False # input_key: 'fps-xyz' # ucg_rate: 0.0 # target: sgm.modules.encoders.modules.PCD_Scaler # params: # scaling_factor: 0.45 # perturb_pcd_scale: 0.015 # # perturb_pcd_scale: 0.0 loss_fn_config: target: sgm.modules.diffusionmodules.loss.FMLoss params: transport_config: target: transport.create_transport params: # all follow default snr_type: uniform path_type: GVP guider_config: target: sgm.modules.diffusionmodules.guiders.VanillaCFG params: # scale: 1.0 scale: 5.0