File size: 4,237 Bytes
2252f3d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
pretrained_model_name_or_path: stabilityai/stable-diffusion-2-1-unclip
pretrained_unet_path: null
revision: null
with_smpl: true
data_common:
root_dir: /aifs4su/mmcode/lipeng/human_8view_with_smplx/
predict_relative_views: [0, 1, 2, 4, 6, 7]
num_validation_samples: 8
img_wh: [768, 768]
read_normal: true
read_color: true
read_depth: false
exten: .png
prompt_embeds_path: mvdiffusion/data/fixed_prompt_embeds_7view
object_list:
- data_lists/human_only_scan_with_smplx.json # modified
invalid_list:
-
with_smpl: ${with_smpl}
train_dataset:
root_dir: ${data_common.root_dir}
azi_interval: 45.0
random_views: 0
predict_relative_views: ${data_common.predict_relative_views}
bg_color: three_choices
object_list: ${data_common.object_list}
invalid_list: ${data_common.invalid_list}
img_wh: ${data_common.img_wh}
validation: false
num_validation_samples: ${data_common.num_validation_samples}
read_normal: ${data_common.read_normal}
read_color: ${data_common.read_color}
read_depth: ${data_common.read_depth}
load_cache: false
exten: ${data_common.exten}
prompt_embeds_path: ${data_common.prompt_embeds_path}
side_views_rate: 0.3
elevation_list: null
with_smpl: ${with_smpl}
validation_dataset:
prompt_embeds_path: ${data_common.prompt_embeds_path}
root_dir: examples/debug
num_views: ${num_views}
bg_color: white
img_wh: ${data_common.img_wh}
num_validation_samples: 1000
margin_size: 10
# crop_size: 720
validation_train_dataset:
root_dir: ${data_common.root_dir}
azi_interval: 45.0
random_views: 0
predict_relative_views: ${data_common.predict_relative_views}
bg_color: white
object_list: ${data_common.object_list}
invalid_list: ${data_common.invalid_list}
img_wh: ${data_common.img_wh}
validation: false
num_validation_samples: ${data_common.num_validation_samples}
read_normal: ${data_common.read_normal}
read_color: ${data_common.read_color}
read_depth: ${data_common.read_depth}
num_samples: ${data_common.num_validation_samples}
load_cache: false
exten: ${data_common.exten}
prompt_embeds_path: ${data_common.prompt_embeds_path}
elevation_list: null
with_smpl: ${with_smpl}
output_dir: output/unit-unclip-768-6view-onlyscan-onlyortho-faceinself-scale0.5-smplx
checkpoint_prefix: ../human_checkpoint_backup/
seed: 42
train_batch_size: 2
validation_batch_size: 1
validation_train_batch_size: 1
max_train_steps: 30000
gradient_accumulation_steps: 2
gradient_checkpointing: true
learning_rate: 0.0001
scale_lr: false
lr_scheduler: piecewise_constant
step_rules: 1:2000,0.5
lr_warmup_steps: 10
snr_gamma: 5.0
use_8bit_adam: false
allow_tf32: true
use_ema: true
dataloader_num_workers: 32
adam_beta1: 0.9
adam_beta2: 0.999
adam_weight_decay: 0.01
adam_epsilon: 1.0e-08
max_grad_norm: 1.0
prediction_type: null
logging_dir: logs
vis_dir: vis
mixed_precision: fp16
report_to: wandb
local_rank: 0
checkpointing_steps: 5000
checkpoints_total_limit: 2
resume_from_checkpoint: latest
enable_xformers_memory_efficient_attention: true
validation_steps: 2500 #
validation_sanity_check: true
tracker_project_name: PSHuman
trainable_modules: null
use_classifier_free_guidance: true
condition_drop_rate: 0.05
scale_input_latents: true
regress_elevation: false
regress_focal_length: false
elevation_loss_weight: 1.0
focal_loss_weight: 0.0
pipe_kwargs:
num_views: ${num_views}
pipe_validation_kwargs:
eta: 1.0
unet_from_pretrained_kwargs:
unclip: true
num_views: ${num_views}
sample_size: 96
zero_init_conv_in: true
regress_elevation: ${regress_elevation}
regress_focal_length: ${regress_focal_length}
num_regress_blocks: 2
camera_embedding_type: e_de_da_sincos
projection_camera_embeddings_input_dim: 2
zero_init_camera_projection: true # modified
init_mvattn_with_selfattn: false
cd_attention_last: false
cd_attention_mid: false
multiview_attention: true
sparse_mv_attention: true
selfattn_block: self_rowwise
mvcd_attention: true
addition_downsample: false
use_face_adapter: false
in_channels: 12
validation_guidance_scales:
- 3.0
validation_grid_nrow: ${num_views}
camera_embedding_lr_mult: 1.0
plot_pose_acc: false
num_views: 7
pred_type: joint
drop_type: drop_as_a_whole
|