LASA / configs /finetune_triplane_diffusion.yaml
HaolinLiu's picture
update files for demo
18bb538
model:
ae: #ae model is loaded to
type: TriVAE
point_emb_dim: 48
padding: 0.1
encoder:
plane_reso: 128
plane_latent_dim: 32
latent_dim: 32
unet:
depth: 4
merge_mode: concat
start_filts: 32
output_dim: 64
decoder:
plane_reso: 128
latent_dim: 32
n_blocks: 5
query_emb_dim: 48
hidden_dim: 128
unet:
depth: 4
merge_mode: concat
start_filts: 64
output_dim: 32
dm:
type: triplane_diff_multiimg_cond
backbone: resunet_multiimg_direct_atten
diff_reso: 64
input_channel: 32
output_channel: 32
triplane_padding: 0.1 #should be consistent with padding in ae
use_par: True
par_channel: 32
par_emb_dim: 48
norm: "batch"
img_in_channels: 1280
vit_reso: 16
use_cat_embedding: False #only use category embedding when all categories are trained
block_type: multiview_local
par_point_encoder:
plane_reso: 64
plane_latent_dim: 32
n_blocks: 5
unet:
depth: 3
merge_mode: concat
start_filts: 32
output_dim: 32
criterion:
type: EDMLoss_MultiImgCond
use_par: True
dataset:
type: Occ_Par_MultiImg_Finetune
data_path: ???
surface_size: 20000
par_pc_size: 2048
load_proj_mat: True
load_image: True
par_point_aug: 0.5
par_prefix: "aug7_"
keyword: lowres #use lowres arkitscene or highres to train, lowres scene is more user accessible
jitter_partial_pretrain: 0.02
jitter_partial_finetune: 0.02
jitter_partial_val: 0.0
use_pretrain_data: False