File size: 4,081 Bytes
2252f3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
pretrained_model_name_or_path: stabilityai/stable-diffusion-2-1-unclip
pretrained_unet_path: null
revision: null
with_smpl: false
data_common:
  root_dir: /aifs4su/mmcode/lipeng/human_8view_new/
  predict_relative_views: [0, 1, 2, 4, 6, 7]
  num_validation_samples: 8
  img_wh: [768, 768]
  read_normal: true
  read_color: true
  read_depth: false
  exten: .png
  prompt_embeds_path: mvdiffusion/data/fixed_prompt_embeds_7view
  object_list:
  - data_lists/human_only_scan.json
  invalid_list:
  - 
train_dataset:
  root_dir: ${data_common.root_dir}
  azi_interval: 45.0
  random_views: 3
  predict_relative_views: ${data_common.predict_relative_views}
  bg_color: three_choices
  object_list: ${data_common.object_list}
  invalid_list: ${data_common.invalid_list}
  img_wh: ${data_common.img_wh}
  validation: false
  num_validation_samples: ${data_common.num_validation_samples}
  read_normal: ${data_common.read_normal}
  read_color: ${data_common.read_color}
  read_depth: ${data_common.read_depth}
  load_cache: false
  exten: ${data_common.exten}
  prompt_embeds_path: ${data_common.prompt_embeds_path}
  side_views_rate: 0.3
  elevation_list: null
validation_dataset:
  prompt_embeds_path: ${data_common.prompt_embeds_path}
  root_dir: examples/debug
  num_views: ${num_views}
  bg_color: white
  img_wh: ${data_common.img_wh}
  num_validation_samples: 1000
  crop_size: 740
validation_train_dataset:
  root_dir: ${data_common.root_dir}
  azi_interval: 45.0
  random_views: 3
  predict_relative_views: ${data_common.predict_relative_views}
  bg_color: white
  object_list: ${data_common.object_list}
  invalid_list: ${data_common.invalid_list}
  img_wh: ${data_common.img_wh}
  validation: false
  num_validation_samples: ${data_common.num_validation_samples}
  read_normal: ${data_common.read_normal}
  read_color: ${data_common.read_color}
  read_depth: ${data_common.read_depth}
  num_samples: ${data_common.num_validation_samples}
  load_cache: false
  exten: ${data_common.exten}
  prompt_embeds_path: ${data_common.prompt_embeds_path}
  elevation_list: null
output_dir:  output/unit-unclip-768-6view-onlyscan-onlyortho-faceinself-scale0.5
checkpoint_prefix: ../human_checkpoint_backup/
seed: 42
train_batch_size: 2
validation_batch_size: 1
validation_train_batch_size: 1
max_train_steps: 30000
gradient_accumulation_steps: 2
gradient_checkpointing: true
learning_rate: 0.0001
scale_lr: false
lr_scheduler: piecewise_constant
step_rules:  1:2000,0.5
lr_warmup_steps: 10
snr_gamma: 5.0
use_8bit_adam: false
allow_tf32: true
use_ema: true
dataloader_num_workers: 32
adam_beta1: 0.9
adam_beta2: 0.999
adam_weight_decay: 0.01
adam_epsilon: 1.0e-08
max_grad_norm: 1.0
prediction_type: null
logging_dir: logs
vis_dir: vis
mixed_precision: fp16
report_to: wandb
local_rank: 0
checkpointing_steps: 2500
checkpoints_total_limit: 2
resume_from_checkpoint: latest
enable_xformers_memory_efficient_attention: true
validation_steps: 2500 # 
validation_sanity_check: true
tracker_project_name: PSHuman
trainable_modules: null


use_classifier_free_guidance: true
condition_drop_rate: 0.05
scale_input_latents: true
regress_elevation: false
regress_focal_length: false
elevation_loss_weight: 1.0
focal_loss_weight: 0.0
pipe_kwargs:
  num_views: ${num_views}
pipe_validation_kwargs:
  eta: 1.0

unet_from_pretrained_kwargs:
  unclip: true
  num_views: ${num_views}
  sample_size: 96
  zero_init_conv_in: true
  regress_elevation: ${regress_elevation}
  regress_focal_length: ${regress_focal_length}
  num_regress_blocks: 2
  camera_embedding_type: e_de_da_sincos
  projection_camera_embeddings_input_dim: 2
  zero_init_camera_projection: true # modified
  init_mvattn_with_selfattn: false
  cd_attention_last: false
  cd_attention_mid: false
  multiview_attention: true
  sparse_mv_attention: true
  selfattn_block: self_rowwise
  mvcd_attention: true
  addition_downsample: false
  use_face_adapter: false

validation_guidance_scales:
- 3.0
validation_grid_nrow: ${num_views}
camera_embedding_lr_mult: 1.0
plot_pose_acc: false
num_views: 7
pred_type: joint
drop_type: drop_as_a_whole