|
{ |
|
"_name_or_path": "morphablediffusion/config.json", |
|
"activation_dropout": 0.0, |
|
"activation_function": "gelu", |
|
"attention_dropout": 0.0, |
|
"attention_window": 512, |
|
"bos_token_id": 0, |
|
"classifier_dropout": 0.0, |
|
"d_model": 1024, |
|
"data": { |
|
"params": { |
|
"batch_size": 70, |
|
"data_dir": "/cluster/scratch/xiychen/data/facescape_color_calibrated", |
|
"mesh_topology": "flame", |
|
"num_workers": 1, |
|
"shuffled_expression": true |
|
}, |
|
"target": "ldm.data.facescape.FaceScapeDataset" |
|
}, |
|
"decoder_attention_heads": 16, |
|
"decoder_ffn_dim": 4096, |
|
"decoder_layerdrop": 0.0, |
|
"decoder_layers": 12, |
|
"decoder_start_token_id": 2, |
|
"dropout": 0.1, |
|
"encoder_attention_heads": 16, |
|
"encoder_ffn_dim": 4096, |
|
"encoder_layerdrop": 0.0, |
|
"encoder_layers": 12, |
|
"eos_token_id": 2, |
|
"init_std": 0.02, |
|
"is_encoder_decoder": true, |
|
"lightning": { |
|
"callbacks": {}, |
|
"modelcheckpoint": { |
|
"params": { |
|
"every_n_train_steps": 2000 |
|
} |
|
}, |
|
"trainer": { |
|
"accumulate_grad_batches": 1, |
|
"benchmark": true, |
|
"check_val_every_n_epoch": null, |
|
"max_steps": 6000, |
|
"num_sanity_val_steps": 0, |
|
"precision": 32, |
|
"val_check_interval": 250 |
|
} |
|
}, |
|
"max_decoder_position_embeddings": 1024, |
|
"max_encoder_position_embeddings": 16384, |
|
"model": { |
|
"base_learning_rate": "5e-5", |
|
"params": { |
|
"batch_view_num": 4, |
|
"cfg_scale": 2.0, |
|
"clip_image_encoder_path": "./ckpt/ViT-L-14.pt", |
|
"drop_conditions": false, |
|
"finetune_unet": true, |
|
"image_size": 256, |
|
"output_num": 8, |
|
"projection": "perspective", |
|
"scheduler_config": { |
|
"params": { |
|
"cycle_lengths": [ |
|
100000 |
|
], |
|
"f_max": [ |
|
1.0 |
|
], |
|
"f_min": [ |
|
1.0 |
|
], |
|
"f_start": [ |
|
0.02 |
|
], |
|
"warm_up_steps": [ |
|
100 |
|
] |
|
}, |
|
"target": "ldm.lr_scheduler.LambdaLinearScheduler" |
|
}, |
|
"target_elevation": 0, |
|
"unet_config": { |
|
"params": { |
|
"attention_resolutions": [ |
|
4, |
|
2, |
|
1 |
|
], |
|
"channel_mult": [ |
|
1, |
|
2, |
|
4, |
|
4 |
|
], |
|
"context_dim": 768, |
|
"image_size": 32, |
|
"in_channels": 8, |
|
"legacy": false, |
|
"model_channels": 320, |
|
"num_heads": 8, |
|
"num_res_blocks": 2, |
|
"out_channels": 4, |
|
"transformer_depth": 1, |
|
"use_checkpoint": true, |
|
"use_spatial_transformer": true, |
|
"volume_dims": [ |
|
64, |
|
128, |
|
256, |
|
512 |
|
] |
|
}, |
|
"target": "ldm.models.diffusion.attention.DepthWiseAttention" |
|
}, |
|
"use_spatial_volume": false, |
|
"view_num": 16 |
|
}, |
|
"target": "ldm.models.diffusion.morphable_diffusion.SyncMultiviewDiffusion" |
|
}, |
|
"model_type": "led", |
|
"num_hidden_layers": 12, |
|
"pad_token_id": 1, |
|
"transformers_version": "4.42.4", |
|
"use_cache": true, |
|
"vocab_size": 50265 |
|
} |
|
|