|
{ |
|
"_name_or_path": "checkpoints/mtgv/MobileLLaMA-1.4B-Chat", |
|
"anyprec": { |
|
"arch_config": { |
|
"layers_name": "layers", |
|
"model_name": "model", |
|
"module_names": [ |
|
"self_attn.q_proj", |
|
"self_attn.k_proj", |
|
"self_attn.v_proj", |
|
"self_attn.o_proj", |
|
"mlp.gate_proj", |
|
"mlp.up_proj", |
|
"mlp.down_proj" |
|
] |
|
}, |
|
"group_count": 1, |
|
"parent_precision": 4, |
|
"seed_precision": 2, |
|
"sparse_numvals": { |
|
"model.layers.0.mlp.down_proj": 391081, |
|
"model.layers.0.mlp.gate_proj": 386698, |
|
"model.layers.0.mlp.up_proj": 378886, |
|
"model.layers.0.self_attn.k_proj": 274937, |
|
"model.layers.0.self_attn.o_proj": 146763, |
|
"model.layers.0.self_attn.q_proj": 253983, |
|
"model.layers.0.self_attn.v_proj": 160101, |
|
"model.layers.1.mlp.down_proj": 385358, |
|
"model.layers.1.mlp.gate_proj": 387502, |
|
"model.layers.1.mlp.up_proj": 379544, |
|
"model.layers.1.self_attn.k_proj": 473298, |
|
"model.layers.1.self_attn.o_proj": 214534, |
|
"model.layers.1.self_attn.q_proj": 474665, |
|
"model.layers.1.self_attn.v_proj": 173411, |
|
"model.layers.10.mlp.down_proj": 384965, |
|
"model.layers.10.mlp.gate_proj": 418178, |
|
"model.layers.10.mlp.up_proj": 394578, |
|
"model.layers.10.self_attn.k_proj": 293514, |
|
"model.layers.10.self_attn.o_proj": 161452, |
|
"model.layers.10.self_attn.q_proj": 275451, |
|
"model.layers.10.self_attn.v_proj": 169149, |
|
"model.layers.11.mlp.down_proj": 386407, |
|
"model.layers.11.mlp.gate_proj": 418151, |
|
"model.layers.11.mlp.up_proj": 396416, |
|
"model.layers.11.self_attn.k_proj": 283696, |
|
"model.layers.11.self_attn.o_proj": 153623, |
|
"model.layers.11.self_attn.q_proj": 278095, |
|
"model.layers.11.self_attn.v_proj": 165117, |
|
"model.layers.12.mlp.down_proj": 395321, |
|
"model.layers.12.mlp.gate_proj": 434671, |
|
"model.layers.12.mlp.up_proj": 406955, |
|
"model.layers.12.self_attn.k_proj": 265587, |
|
"model.layers.12.self_attn.o_proj": 147530, |
|
"model.layers.12.self_attn.q_proj": 254328, |
|
"model.layers.12.self_attn.v_proj": 164366, |
|
"model.layers.13.mlp.down_proj": 402235, |
|
"model.layers.13.mlp.gate_proj": 462668, |
|
"model.layers.13.mlp.up_proj": 412243, |
|
"model.layers.13.self_attn.k_proj": 268726, |
|
"model.layers.13.self_attn.o_proj": 155370, |
|
"model.layers.13.self_attn.q_proj": 264605, |
|
"model.layers.13.self_attn.v_proj": 179360, |
|
"model.layers.14.mlp.down_proj": 411161, |
|
"model.layers.14.mlp.gate_proj": 475909, |
|
"model.layers.14.mlp.up_proj": 421549, |
|
"model.layers.14.self_attn.k_proj": 276528, |
|
"model.layers.14.self_attn.o_proj": 155414, |
|
"model.layers.14.self_attn.q_proj": 263462, |
|
"model.layers.14.self_attn.v_proj": 174496, |
|
"model.layers.15.mlp.down_proj": 407797, |
|
"model.layers.15.mlp.gate_proj": 466950, |
|
"model.layers.15.mlp.up_proj": 422795, |
|
"model.layers.15.self_attn.k_proj": 277968, |
|
"model.layers.15.self_attn.o_proj": 158232, |
|
"model.layers.15.self_attn.q_proj": 277897, |
|
"model.layers.15.self_attn.v_proj": 178970, |
|
"model.layers.16.mlp.down_proj": 406442, |
|
"model.layers.16.mlp.gate_proj": 458305, |
|
"model.layers.16.mlp.up_proj": 423591, |
|
"model.layers.16.self_attn.k_proj": 255441, |
|
"model.layers.16.self_attn.o_proj": 159098, |
|
"model.layers.16.self_attn.q_proj": 250977, |
|
"model.layers.16.self_attn.v_proj": 175023, |
|
"model.layers.17.mlp.down_proj": 402755, |
|
"model.layers.17.mlp.gate_proj": 442715, |
|
"model.layers.17.mlp.up_proj": 417427, |
|
"model.layers.17.self_attn.k_proj": 271682, |
|
"model.layers.17.self_attn.o_proj": 160200, |
|
"model.layers.17.self_attn.q_proj": 268211, |
|
"model.layers.17.self_attn.v_proj": 170915, |
|
"model.layers.18.mlp.down_proj": 393810, |
|
"model.layers.18.mlp.gate_proj": 425845, |
|
"model.layers.18.mlp.up_proj": 412307, |
|
"model.layers.18.self_attn.k_proj": 251695, |
|
"model.layers.18.self_attn.o_proj": 160815, |
|
"model.layers.18.self_attn.q_proj": 270035, |
|
"model.layers.18.self_attn.v_proj": 170143, |
|
"model.layers.19.mlp.down_proj": 391018, |
|
"model.layers.19.mlp.gate_proj": 414782, |
|
"model.layers.19.mlp.up_proj": 407914, |
|
"model.layers.19.self_attn.k_proj": 250159, |
|
"model.layers.19.self_attn.o_proj": 180878, |
|
"model.layers.19.self_attn.q_proj": 248349, |
|
"model.layers.19.self_attn.v_proj": 185533, |
|
"model.layers.2.mlp.down_proj": 377206, |
|
"model.layers.2.mlp.gate_proj": 373395, |
|
"model.layers.2.mlp.up_proj": 376499, |
|
"model.layers.2.self_attn.k_proj": 364956, |
|
"model.layers.2.self_attn.o_proj": 163255, |
|
"model.layers.2.self_attn.q_proj": 313191, |
|
"model.layers.2.self_attn.v_proj": 155599, |
|
"model.layers.20.mlp.down_proj": 390830, |
|
"model.layers.20.mlp.gate_proj": 407028, |
|
"model.layers.20.mlp.up_proj": 404880, |
|
"model.layers.20.self_attn.k_proj": 231168, |
|
"model.layers.20.self_attn.o_proj": 169556, |
|
"model.layers.20.self_attn.q_proj": 226735, |
|
"model.layers.20.self_attn.v_proj": 167471, |
|
"model.layers.21.mlp.down_proj": 388610, |
|
"model.layers.21.mlp.gate_proj": 396068, |
|
"model.layers.21.mlp.up_proj": 394762, |
|
"model.layers.21.self_attn.k_proj": 233144, |
|
"model.layers.21.self_attn.o_proj": 202859, |
|
"model.layers.21.self_attn.q_proj": 224758, |
|
"model.layers.21.self_attn.v_proj": 197088, |
|
"model.layers.22.mlp.down_proj": 410384, |
|
"model.layers.22.mlp.gate_proj": 411952, |
|
"model.layers.22.mlp.up_proj": 405958, |
|
"model.layers.22.self_attn.k_proj": 246798, |
|
"model.layers.22.self_attn.o_proj": 184618, |
|
"model.layers.22.self_attn.q_proj": 244163, |
|
"model.layers.22.self_attn.v_proj": 180896, |
|
"model.layers.23.mlp.down_proj": 504810, |
|
"model.layers.23.mlp.gate_proj": 452978, |
|
"model.layers.23.mlp.up_proj": 477128, |
|
"model.layers.23.self_attn.k_proj": 205122, |
|
"model.layers.23.self_attn.o_proj": 204087, |
|
"model.layers.23.self_attn.q_proj": 196846, |
|
"model.layers.23.self_attn.v_proj": 193336, |
|
"model.layers.3.mlp.down_proj": 374150, |
|
"model.layers.3.mlp.gate_proj": 372350, |
|
"model.layers.3.mlp.up_proj": 375109, |
|
"model.layers.3.self_attn.k_proj": 290489, |
|
"model.layers.3.self_attn.o_proj": 143544, |
|
"model.layers.3.self_attn.q_proj": 247264, |
|
"model.layers.3.self_attn.v_proj": 148063, |
|
"model.layers.4.mlp.down_proj": 381223, |
|
"model.layers.4.mlp.gate_proj": 376904, |
|
"model.layers.4.mlp.up_proj": 378407, |
|
"model.layers.4.self_attn.k_proj": 289375, |
|
"model.layers.4.self_attn.o_proj": 140505, |
|
"model.layers.4.self_attn.q_proj": 237856, |
|
"model.layers.4.self_attn.v_proj": 146613, |
|
"model.layers.5.mlp.down_proj": 379920, |
|
"model.layers.5.mlp.gate_proj": 377189, |
|
"model.layers.5.mlp.up_proj": 377591, |
|
"model.layers.5.self_attn.k_proj": 282178, |
|
"model.layers.5.self_attn.o_proj": 155071, |
|
"model.layers.5.self_attn.q_proj": 251705, |
|
"model.layers.5.self_attn.v_proj": 164087, |
|
"model.layers.6.mlp.down_proj": 374917, |
|
"model.layers.6.mlp.gate_proj": 384595, |
|
"model.layers.6.mlp.up_proj": 378445, |
|
"model.layers.6.self_attn.k_proj": 285864, |
|
"model.layers.6.self_attn.o_proj": 166756, |
|
"model.layers.6.self_attn.q_proj": 254119, |
|
"model.layers.6.self_attn.v_proj": 181957, |
|
"model.layers.7.mlp.down_proj": 376459, |
|
"model.layers.7.mlp.gate_proj": 390927, |
|
"model.layers.7.mlp.up_proj": 385830, |
|
"model.layers.7.self_attn.k_proj": 250179, |
|
"model.layers.7.self_attn.o_proj": 150186, |
|
"model.layers.7.self_attn.q_proj": 220969, |
|
"model.layers.7.self_attn.v_proj": 158929, |
|
"model.layers.8.mlp.down_proj": 382800, |
|
"model.layers.8.mlp.gate_proj": 413009, |
|
"model.layers.8.mlp.up_proj": 396348, |
|
"model.layers.8.self_attn.k_proj": 267251, |
|
"model.layers.8.self_attn.o_proj": 173103, |
|
"model.layers.8.self_attn.q_proj": 235754, |
|
"model.layers.8.self_attn.v_proj": 191066, |
|
"model.layers.9.mlp.down_proj": 381195, |
|
"model.layers.9.mlp.gate_proj": 405387, |
|
"model.layers.9.mlp.up_proj": 395238, |
|
"model.layers.9.self_attn.k_proj": 285793, |
|
"model.layers.9.self_attn.o_proj": 172217, |
|
"model.layers.9.self_attn.q_proj": 269692, |
|
"model.layers.9.self_attn.v_proj": 186773 |
|
} |
|
}, |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"hidden_act": "silu", |
|
"hidden_size": 2048, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 5632, |
|
"max_position_embeddings": 2048, |
|
"max_sequence_length": 2048, |
|
"model_type": "llama", |
|
"num_attention_heads": 16, |
|
"num_hidden_layers": 24, |
|
"num_key_value_heads": 16, |
|
"pad_token_id": 0, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 10000.0, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float16", |
|
"transformers_version": "4.39.3", |
|
"use_cache": true, |
|
"vocab_size": 32000 |
|
} |
|
|