|
pretrained_model_name_or_path: stabilityai/stable-diffusion-2-1-unclip |
|
pretrained_unet_path: null |
|
revision: null |
|
with_smpl: true |
|
data_common: |
|
root_dir: /aifs4su/mmcode/lipeng/human_8view_with_smplx/ |
|
predict_relative_views: [0, 1, 2, 4, 6, 7] |
|
num_validation_samples: 8 |
|
img_wh: [768, 768] |
|
read_normal: true |
|
read_color: true |
|
read_depth: false |
|
exten: .png |
|
prompt_embeds_path: mvdiffusion/data/fixed_prompt_embeds_7view |
|
object_list: |
|
- data_lists/human_only_scan_with_smplx.json |
|
invalid_list: |
|
- |
|
with_smpl: ${with_smpl} |
|
|
|
train_dataset: |
|
root_dir: ${data_common.root_dir} |
|
azi_interval: 45.0 |
|
random_views: 0 |
|
predict_relative_views: ${data_common.predict_relative_views} |
|
bg_color: three_choices |
|
object_list: ${data_common.object_list} |
|
invalid_list: ${data_common.invalid_list} |
|
img_wh: ${data_common.img_wh} |
|
validation: false |
|
num_validation_samples: ${data_common.num_validation_samples} |
|
read_normal: ${data_common.read_normal} |
|
read_color: ${data_common.read_color} |
|
read_depth: ${data_common.read_depth} |
|
load_cache: false |
|
exten: ${data_common.exten} |
|
prompt_embeds_path: ${data_common.prompt_embeds_path} |
|
side_views_rate: 0.3 |
|
elevation_list: null |
|
with_smpl: ${with_smpl} |
|
|
|
validation_dataset: |
|
prompt_embeds_path: ${data_common.prompt_embeds_path} |
|
root_dir: examples/debug |
|
num_views: ${num_views} |
|
bg_color: white |
|
img_wh: ${data_common.img_wh} |
|
num_validation_samples: 1000 |
|
margin_size: 10 |
|
|
|
|
|
validation_train_dataset: |
|
root_dir: ${data_common.root_dir} |
|
azi_interval: 45.0 |
|
random_views: 0 |
|
predict_relative_views: ${data_common.predict_relative_views} |
|
bg_color: white |
|
object_list: ${data_common.object_list} |
|
invalid_list: ${data_common.invalid_list} |
|
img_wh: ${data_common.img_wh} |
|
validation: false |
|
num_validation_samples: ${data_common.num_validation_samples} |
|
read_normal: ${data_common.read_normal} |
|
read_color: ${data_common.read_color} |
|
read_depth: ${data_common.read_depth} |
|
num_samples: ${data_common.num_validation_samples} |
|
load_cache: false |
|
exten: ${data_common.exten} |
|
prompt_embeds_path: ${data_common.prompt_embeds_path} |
|
elevation_list: null |
|
with_smpl: ${with_smpl} |
|
|
|
output_dir: output/unit-unclip-768-6view-onlyscan-onlyortho-faceinself-scale0.5-smplx |
|
checkpoint_prefix: ../human_checkpoint_backup/ |
|
seed: 42 |
|
train_batch_size: 2 |
|
validation_batch_size: 1 |
|
validation_train_batch_size: 1 |
|
max_train_steps: 30000 |
|
gradient_accumulation_steps: 2 |
|
gradient_checkpointing: true |
|
learning_rate: 0.0001 |
|
scale_lr: false |
|
lr_scheduler: piecewise_constant |
|
step_rules: 1:2000,0.5 |
|
lr_warmup_steps: 10 |
|
snr_gamma: 5.0 |
|
use_8bit_adam: false |
|
allow_tf32: true |
|
use_ema: true |
|
dataloader_num_workers: 32 |
|
adam_beta1: 0.9 |
|
adam_beta2: 0.999 |
|
adam_weight_decay: 0.01 |
|
adam_epsilon: 1.0e-08 |
|
max_grad_norm: 1.0 |
|
prediction_type: null |
|
logging_dir: logs |
|
vis_dir: vis |
|
mixed_precision: fp16 |
|
report_to: wandb |
|
local_rank: 0 |
|
checkpointing_steps: 5000 |
|
checkpoints_total_limit: 2 |
|
resume_from_checkpoint: latest |
|
enable_xformers_memory_efficient_attention: true |
|
validation_steps: 2500 |
|
validation_sanity_check: true |
|
tracker_project_name: PSHuman |
|
trainable_modules: null |
|
|
|
use_classifier_free_guidance: true |
|
condition_drop_rate: 0.05 |
|
scale_input_latents: true |
|
regress_elevation: false |
|
regress_focal_length: false |
|
elevation_loss_weight: 1.0 |
|
focal_loss_weight: 0.0 |
|
pipe_kwargs: |
|
num_views: ${num_views} |
|
pipe_validation_kwargs: |
|
eta: 1.0 |
|
|
|
unet_from_pretrained_kwargs: |
|
unclip: true |
|
num_views: ${num_views} |
|
sample_size: 96 |
|
zero_init_conv_in: true |
|
regress_elevation: ${regress_elevation} |
|
regress_focal_length: ${regress_focal_length} |
|
num_regress_blocks: 2 |
|
camera_embedding_type: e_de_da_sincos |
|
projection_camera_embeddings_input_dim: 2 |
|
zero_init_camera_projection: true |
|
init_mvattn_with_selfattn: false |
|
cd_attention_last: false |
|
cd_attention_mid: false |
|
multiview_attention: true |
|
sparse_mv_attention: true |
|
selfattn_block: self_rowwise |
|
mvcd_attention: true |
|
addition_downsample: false |
|
use_face_adapter: false |
|
in_channels: 12 |
|
|
|
|
|
validation_guidance_scales: |
|
- 3.0 |
|
validation_grid_nrow: ${num_views} |
|
camera_embedding_lr_mult: 1.0 |
|
plot_pose_acc: false |
|
num_views: 7 |
|
pred_type: joint |
|
drop_type: drop_as_a_whole |
|
|