File size: 4,237 Bytes
2252f3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
pretrained_model_name_or_path: stabilityai/stable-diffusion-2-1-unclip
pretrained_unet_path: null
revision: null
with_smpl: true
data_common:
  root_dir: /aifs4su/mmcode/lipeng/human_8view_with_smplx/
  predict_relative_views: [0, 1, 2, 4, 6, 7]
  num_validation_samples: 8
  img_wh: [768, 768]
  read_normal: true
  read_color: true
  read_depth: false
  exten: .png
  prompt_embeds_path: mvdiffusion/data/fixed_prompt_embeds_7view
  object_list:
  - data_lists/human_only_scan_with_smplx.json  # modified
  invalid_list:
  - 
  with_smpl: ${with_smpl}

train_dataset:
  root_dir: ${data_common.root_dir}
  azi_interval: 45.0
  random_views: 0
  predict_relative_views: ${data_common.predict_relative_views}
  bg_color: three_choices
  object_list: ${data_common.object_list}
  invalid_list: ${data_common.invalid_list}
  img_wh: ${data_common.img_wh}
  validation: false
  num_validation_samples: ${data_common.num_validation_samples}
  read_normal: ${data_common.read_normal}
  read_color: ${data_common.read_color}
  read_depth: ${data_common.read_depth}
  load_cache: false
  exten: ${data_common.exten}
  prompt_embeds_path: ${data_common.prompt_embeds_path}
  side_views_rate: 0.3
  elevation_list: null
  with_smpl: ${with_smpl}

validation_dataset:
  prompt_embeds_path: ${data_common.prompt_embeds_path}
  root_dir: examples/debug
  num_views: ${num_views}
  bg_color: white
  img_wh: ${data_common.img_wh}
  num_validation_samples: 1000
  margin_size: 10
  # crop_size: 720

validation_train_dataset:
  root_dir: ${data_common.root_dir}
  azi_interval: 45.0
  random_views: 0
  predict_relative_views: ${data_common.predict_relative_views}
  bg_color: white
  object_list: ${data_common.object_list}
  invalid_list: ${data_common.invalid_list}
  img_wh: ${data_common.img_wh}
  validation: false
  num_validation_samples: ${data_common.num_validation_samples}
  read_normal: ${data_common.read_normal}
  read_color: ${data_common.read_color}
  read_depth: ${data_common.read_depth}
  num_samples: ${data_common.num_validation_samples}
  load_cache: false
  exten: ${data_common.exten}
  prompt_embeds_path: ${data_common.prompt_embeds_path}
  elevation_list: null
  with_smpl: ${with_smpl}

output_dir: output/unit-unclip-768-6view-onlyscan-onlyortho-faceinself-scale0.5-smplx
checkpoint_prefix: ../human_checkpoint_backup/
seed: 42
train_batch_size: 2
validation_batch_size: 1
validation_train_batch_size: 1
max_train_steps: 30000
gradient_accumulation_steps: 2
gradient_checkpointing: true
learning_rate: 0.0001
scale_lr: false
lr_scheduler: piecewise_constant
step_rules:  1:2000,0.5
lr_warmup_steps: 10
snr_gamma: 5.0
use_8bit_adam: false
allow_tf32: true
use_ema: true
dataloader_num_workers: 32
adam_beta1: 0.9
adam_beta2: 0.999
adam_weight_decay: 0.01
adam_epsilon: 1.0e-08
max_grad_norm: 1.0
prediction_type: null
logging_dir: logs
vis_dir: vis
mixed_precision: fp16
report_to: wandb
local_rank: 0
checkpointing_steps: 5000
checkpoints_total_limit: 2
resume_from_checkpoint: latest
enable_xformers_memory_efficient_attention: true
validation_steps: 2500 # 
validation_sanity_check: true
tracker_project_name: PSHuman
trainable_modules: null

use_classifier_free_guidance: true
condition_drop_rate: 0.05
scale_input_latents: true
regress_elevation: false
regress_focal_length: false
elevation_loss_weight: 1.0
focal_loss_weight: 0.0
pipe_kwargs:
  num_views: ${num_views}
pipe_validation_kwargs:
  eta: 1.0

unet_from_pretrained_kwargs:
  unclip: true
  num_views: ${num_views}
  sample_size: 96
  zero_init_conv_in: true
  regress_elevation: ${regress_elevation}
  regress_focal_length: ${regress_focal_length}
  num_regress_blocks: 2
  camera_embedding_type: e_de_da_sincos
  projection_camera_embeddings_input_dim: 2
  zero_init_camera_projection: true # modified
  init_mvattn_with_selfattn: false
  cd_attention_last: false
  cd_attention_mid: false
  multiview_attention: true
  sparse_mv_attention: true
  selfattn_block: self_rowwise
  mvcd_attention: true
  addition_downsample: false
  use_face_adapter: false
  in_channels: 12
  

validation_guidance_scales:
- 3.0
validation_grid_nrow: ${num_views}
camera_embedding_lr_mult: 1.0
plot_pose_acc: false
num_views: 7
pred_type: joint
drop_type: drop_as_a_whole