{ | |
"num_attention_heads": 16, | |
"attention_head_dim": 72, | |
"in_channels": 4, | |
"cond_channels": 9, | |
"out_channels": 8, | |
"num_layers": 28, | |
"dropout": 0.0, | |
"norm_num_groups": 32, | |
"cross_attention_dim": 1152, | |
"attention_bias": true, | |
"sample_size": 128, | |
"patch_size": 2, | |
"activation_fn": "gelu-approximate", | |
"num_embeds_ada_norm": 1000, | |
"upcast_attention": false, | |
"norm_type": "ada_norm_single", | |
"norm_elementwise_affine": false, | |
"norm_eps": 1e-06, | |
"caption_channels": 4096, | |
"attention_type": "default" | |
} |