StableDiffusionVideoTo3D

Runtime error

StableDiffusionVideoTo3D / scripts /pub /configs /V3D_512.yaml

heheyas

fix

e1cbd75 8 months ago

4.69 kB

	model:
	base_learning_rate: 1.0e-04
	target: sgm.models.video_diffusion.DiffusionEngine
	params:
	ckpt_path: ./ckpts/V3D_512.ckpt
	scale_factor: 0.18215
	disable_first_stage_autocast: true
	input_key: latents
	log_keys: []
	scheduler_config:
	target: sgm.lr_scheduler.LambdaLinearScheduler
	params:
	warm_up_steps:
	- 1
	cycle_lengths:
	- 10000000000000
	f_start:
	- 1.0e-06
	f_max:
	- 1.0
	f_min:
	- 1.0
	denoiser_config:
	target: sgm.modules.diffusionmodules.denoiser.Denoiser
	params:
	scaling_config:
	target: sgm.modules.diffusionmodules.denoiser_scaling.VScalingWithEDMcNoise
	network_config:
	target: sgm.modules.diffusionmodules.video_model.VideoUNet
	params:
	adm_in_channels: 768
	num_classes: sequential
	use_checkpoint: true
	in_channels: 8
	out_channels: 4
	model_channels: 320
	attention_resolutions:
	- 4
	- 2
	- 1
	num_res_blocks: 2
	channel_mult:
	- 1
	- 2
	- 4
	- 4
	num_head_channels: 64
	use_linear_in_transformer: true
	transformer_depth: 1
	context_dim: 1024
	spatial_transformer_attn_type: softmax-xformers
	extra_ff_mix_layer: true
	use_spatial_context: true
	merge_strategy: learned_with_images
	video_kernel_size:
	- 3
	- 1
	- 1
	conditioner_config:
	target: sgm.modules.GeneralConditioner
	params:
	emb_models:
	- is_trainable: false
	ucg_rate: 0.2
	input_key: cond_frames_without_noise
	target: sgm.modules.encoders.modules.IdentityEncoder
	- input_key: fps_id
	is_trainable: true
	target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
	params:
	outdim: 256
	- input_key: motion_bucket_id
	is_trainable: true
	target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
	params:
	outdim: 256
	- input_key: cond_frames
	is_trainable: false
	ucg_rate: 0.2
	target: sgm.modules.encoders.modules.IdentityEncoder
	- input_key: cond_aug
	is_trainable: true
	target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
	params:
	outdim: 256
	first_stage_config:
	target: sgm.models.autoencoder.AutoencodingEngine
	params:
	loss_config:
	target: torch.nn.Identity
	regularizer_config:
	target: sgm.modules.autoencoding.regularizers.DiagonalGaussianRegularizer
	encoder_config:
	target: sgm.modules.diffusionmodules.model.Encoder
	params:
	attn_type: vanilla
	double_z: true
	z_channels: 4
	resolution: 256
	in_channels: 3
	out_ch: 3
	ch: 128
	ch_mult:
	- 1
	- 2
	- 4
	- 4
	num_res_blocks: 2
	attn_resolutions: []
	dropout: 0.0
	decoder_config:
	target: sgm.modules.autoencoding.temporal_ae.VideoDecoder
	params:
	attn_type: vanilla
	double_z: true
	z_channels: 4
	resolution: 256
	in_channels: 3
	out_ch: 3
	ch: 128
	ch_mult:
	- 1
	- 2
	- 4
	- 4
	num_res_blocks: 2
	attn_resolutions: []
	dropout: 0.0
	video_kernel_size:
	- 3
	- 1
	- 1
	sampler_config:
	target: sgm.modules.diffusionmodules.sampling.EulerEDMSampler
	params:
	num_steps: 30
	discretization_config:
	target: sgm.modules.diffusionmodules.discretizer.EDMDiscretization
	params:
	sigma_max: 700.0
	guider_config:
	target: sgm.modules.diffusionmodules.guiders.LinearPredictionGuider
	params:
	max_scale: 3.5
	min_scale: 3.5
	num_frames: 18
	loss_fn_config:
	target: sgm.modules.diffusionmodules.loss.StandardDiffusionLoss
	params:
	batch2model_keys:
	- num_video_frames
	- image_only_indicator
	loss_weighting_config:
	target: sgm.modules.diffusionmodules.loss_weighting.EDMWeighting
	params:
	sigma_data: 1.0
	sigma_sampler_config:
	target: sgm.modules.diffusionmodules.sigma_sampling.EDMSampling
	params:
	p_mean: 1.5
	p_std: 2.0