{ "activation": "silu", "attention_type": "k_is_v", "attn_type": "mla", "bias": false, "d_model": 1024, "dropout": 0.2, "head_dim": 64, "hidden_dim": 1536, "kv_lora_rank": null, "mlp": "glu", "num_heads": 21, "num_kv_heads": 0, "num_layers": 16, "rope_head_dim": 64, "seq_len": 256, "vocab_size": 50257, "weight_tying": true }