|
{ |
|
"_class_name": "MaskGiTUViT", |
|
"_version": "0.0.1", |
|
"add_cond_embeds": true, |
|
"add_cross_attention": true, |
|
"add_micro_cond_embeds": true, |
|
"attention_dropout": 0.0, |
|
"block_has_attention": [ |
|
true |
|
], |
|
"block_num_heads": [ |
|
12 |
|
], |
|
"block_out_channels": [ |
|
768 |
|
], |
|
"codebook_size": 8192, |
|
"cond_embed_dim": 768, |
|
"encoder_hidden_size": 768, |
|
"ffn_type": "glu", |
|
"hidden_dropout": 0.0, |
|
"hidden_size": 1024, |
|
"in_channels": 768, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 2816, |
|
"layer_norm_before_mlm": false, |
|
"layer_norm_embedddings": false, |
|
"layer_norm_eps": 0.000001, |
|
"learn_uncond_embeds": false, |
|
"ln_elementwise_affine": true, |
|
"mask_token_id": 8255, |
|
"max_position_embeddings": 256, |
|
"micro_cond_embed_dim": 1280, |
|
"micro_cond_encode_dim": 256, |
|
"norm_type": "rmsnorm", |
|
"num_attention_heads": 16, |
|
"num_classes": null, |
|
"num_hidden_layers": 22, |
|
"num_res_blocks": 3, |
|
"num_vq_tokens": 256, |
|
"patch_size": 1, |
|
"project_encoder_hidden_states": true, |
|
"res_ffn_factor": 4, |
|
"use_bias": false, |
|
"use_codebook_size_for_output": true, |
|
"use_empty_embeds_for_uncond": true, |
|
"use_encoder_layernorm": false, |
|
"use_normformer": false, |
|
"use_position_embeddings": false, |
|
"use_vannilla_resblock": false, |
|
"vocab_size": 8256, |
|
"xavier_init_embed": true |
|
} |
|
|