{ "architectures": [ "HARTForT2I" ], "attn_drop_rate": 0.0, "attn_l2_norm": false, "attn_type": "llama", "cond_drop_rate": 0.1, "context_dim": 1536, "context_norm_scale": 1.0, "context_token": 300, "depth": 24, "diff_depth": 6, "diff_width": 1024, "diffusion_batch_mul": 4, "diffusion_head_repeats": 1, "disable_aln": true, "drop_path_rate": 0.10000000000000002, "drop_rate": 0.0, "embed_dim": 1536, "flash_if_available": true, "fused_if_available": true, "mlp_ratio": 4.0, "mlp_type": "llama", "model_type": "hart_transformer_t2i", "norm_eps": 1e-06, "num_heads": 24, "num_sampling_steps": "8", "patch_nums": [ 1, 2, 3, 4, 5, 7, 9, 12, 16, 21, 27, 36, 48, 64 ], "sampler": "iddpm", "sep_aln_pooling_mode": "max", "shared_aln": false, "torch_dtype": "float32", "transformers_version": "4.42.2", "use_context_norm": true, "use_cross_attn": false, "use_timestep_embed": true }