{
  "architectures": [
    "HARTForT2I"
  ],
  "attn_drop_rate": 0.0,
  "attn_l2_norm": false,
  "attn_type": "llama",
  "cond_drop_rate": 0.1,
  "context_dim": 1536,
  "context_norm_scale": 1.0,
  "context_token": 300,
  "depth": 24,
  "diff_depth": 6,
  "diff_width": 1024,
  "diffusion_batch_mul": 4,
  "diffusion_head_repeats": 1,
  "disable_aln": true,
  "drop_path_rate": 0.10000000000000002,
  "drop_rate": 0.0,
  "embed_dim": 1536,
  "flash_if_available": true,
  "fused_if_available": true,
  "mlp_ratio": 4.0,
  "mlp_type": "llama",
  "model_type": "hart_transformer_t2i",
  "norm_eps": 1e-06,
  "num_heads": 24,
  "num_sampling_steps": "8",
  "patch_nums": [
    1,
    2,
    3,
    4,
    5,
    7,
    9,
    12,
    16,
    21,
    27,
    36,
    48,
    64
  ],
  "sampler": "iddpm",
  "sep_aln_pooling_mode": "max",
  "shared_aln": false,
  "torch_dtype": "float32",
  "transformers_version": "4.42.2",
  "use_context_norm": true,
  "use_cross_attn": false,
  "use_timestep_embed": true
}