|
{ |
|
"_name_or_path": "checkpoints/mtgv/MobileLLaMA-1.4B-Chat", |
|
"anyprec": { |
|
"arch_config": { |
|
"layers_name": "layers", |
|
"model_name": "model", |
|
"module_names": [ |
|
"self_attn.q_proj", |
|
"self_attn.k_proj", |
|
"self_attn.v_proj", |
|
"self_attn.o_proj", |
|
"mlp.gate_proj", |
|
"mlp.up_proj", |
|
"mlp.down_proj" |
|
] |
|
}, |
|
"group_count": 1, |
|
"parent_precision": 4, |
|
"seed_precision": 2, |
|
"sparse_numvals": { |
|
"model.layers.0.mlp.down_proj": 191326, |
|
"model.layers.0.mlp.gate_proj": 189994, |
|
"model.layers.0.mlp.up_proj": 180517, |
|
"model.layers.0.self_attn.k_proj": 176786, |
|
"model.layers.0.self_attn.o_proj": 72994, |
|
"model.layers.0.self_attn.q_proj": 158594, |
|
"model.layers.0.self_attn.v_proj": 81335, |
|
"model.layers.1.mlp.down_proj": 185829, |
|
"model.layers.1.mlp.gate_proj": 191585, |
|
"model.layers.1.mlp.up_proj": 180912, |
|
"model.layers.1.self_attn.k_proj": 355737, |
|
"model.layers.1.self_attn.o_proj": 125591, |
|
"model.layers.1.self_attn.q_proj": 351572, |
|
"model.layers.1.self_attn.v_proj": 92291, |
|
"model.layers.10.mlp.down_proj": 185125, |
|
"model.layers.10.mlp.gate_proj": 212731, |
|
"model.layers.10.mlp.up_proj": 191684, |
|
"model.layers.10.self_attn.k_proj": 192285, |
|
"model.layers.10.self_attn.o_proj": 85016, |
|
"model.layers.10.self_attn.q_proj": 176785, |
|
"model.layers.10.self_attn.v_proj": 88862, |
|
"model.layers.11.mlp.down_proj": 187377, |
|
"model.layers.11.mlp.gate_proj": 214387, |
|
"model.layers.11.mlp.up_proj": 193449, |
|
"model.layers.11.self_attn.k_proj": 183041, |
|
"model.layers.11.self_attn.o_proj": 79355, |
|
"model.layers.11.self_attn.q_proj": 179551, |
|
"model.layers.11.self_attn.v_proj": 85991, |
|
"model.layers.12.mlp.down_proj": 193165, |
|
"model.layers.12.mlp.gate_proj": 228979, |
|
"model.layers.12.mlp.up_proj": 202530, |
|
"model.layers.12.self_attn.k_proj": 168033, |
|
"model.layers.12.self_attn.o_proj": 74731, |
|
"model.layers.12.self_attn.q_proj": 159784, |
|
"model.layers.12.self_attn.v_proj": 85258, |
|
"model.layers.13.mlp.down_proj": 199578, |
|
"model.layers.13.mlp.gate_proj": 254937, |
|
"model.layers.13.mlp.up_proj": 205412, |
|
"model.layers.13.self_attn.k_proj": 168574, |
|
"model.layers.13.self_attn.o_proj": 80155, |
|
"model.layers.13.self_attn.q_proj": 166589, |
|
"model.layers.13.self_attn.v_proj": 96907, |
|
"model.layers.14.mlp.down_proj": 206123, |
|
"model.layers.14.mlp.gate_proj": 267261, |
|
"model.layers.14.mlp.up_proj": 213029, |
|
"model.layers.14.self_attn.k_proj": 175625, |
|
"model.layers.14.self_attn.o_proj": 79709, |
|
"model.layers.14.self_attn.q_proj": 165058, |
|
"model.layers.14.self_attn.v_proj": 92779, |
|
"model.layers.15.mlp.down_proj": 202519, |
|
"model.layers.15.mlp.gate_proj": 259676, |
|
"model.layers.15.mlp.up_proj": 213468, |
|
"model.layers.15.self_attn.k_proj": 178037, |
|
"model.layers.15.self_attn.o_proj": 82764, |
|
"model.layers.15.self_attn.q_proj": 178982, |
|
"model.layers.15.self_attn.v_proj": 95999, |
|
"model.layers.16.mlp.down_proj": 201854, |
|
"model.layers.16.mlp.gate_proj": 250550, |
|
"model.layers.16.mlp.up_proj": 213411, |
|
"model.layers.16.self_attn.k_proj": 160056, |
|
"model.layers.16.self_attn.o_proj": 83224, |
|
"model.layers.16.self_attn.q_proj": 157716, |
|
"model.layers.16.self_attn.v_proj": 92808, |
|
"model.layers.17.mlp.down_proj": 198730, |
|
"model.layers.17.mlp.gate_proj": 237767, |
|
"model.layers.17.mlp.up_proj": 209909, |
|
"model.layers.17.self_attn.k_proj": 171644, |
|
"model.layers.17.self_attn.o_proj": 83754, |
|
"model.layers.17.self_attn.q_proj": 172707, |
|
"model.layers.17.self_attn.v_proj": 90734, |
|
"model.layers.18.mlp.down_proj": 192702, |
|
"model.layers.18.mlp.gate_proj": 222867, |
|
"model.layers.18.mlp.up_proj": 206730, |
|
"model.layers.18.self_attn.k_proj": 156690, |
|
"model.layers.18.self_attn.o_proj": 84609, |
|
"model.layers.18.self_attn.q_proj": 174408, |
|
"model.layers.18.self_attn.v_proj": 89341, |
|
"model.layers.19.mlp.down_proj": 191205, |
|
"model.layers.19.mlp.gate_proj": 213948, |
|
"model.layers.19.mlp.up_proj": 201900, |
|
"model.layers.19.self_attn.k_proj": 155221, |
|
"model.layers.19.self_attn.o_proj": 100111, |
|
"model.layers.19.self_attn.q_proj": 154902, |
|
"model.layers.19.self_attn.v_proj": 102790, |
|
"model.layers.2.mlp.down_proj": 180930, |
|
"model.layers.2.mlp.gate_proj": 178939, |
|
"model.layers.2.mlp.up_proj": 178176, |
|
"model.layers.2.self_attn.k_proj": 254302, |
|
"model.layers.2.self_attn.o_proj": 85627, |
|
"model.layers.2.self_attn.q_proj": 208855, |
|
"model.layers.2.self_attn.v_proj": 78918, |
|
"model.layers.20.mlp.down_proj": 189531, |
|
"model.layers.20.mlp.gate_proj": 208030, |
|
"model.layers.20.mlp.up_proj": 200150, |
|
"model.layers.20.self_attn.k_proj": 136537, |
|
"model.layers.20.self_attn.o_proj": 91412, |
|
"model.layers.20.self_attn.q_proj": 136465, |
|
"model.layers.20.self_attn.v_proj": 87525, |
|
"model.layers.21.mlp.down_proj": 189005, |
|
"model.layers.21.mlp.gate_proj": 197507, |
|
"model.layers.21.mlp.up_proj": 192699, |
|
"model.layers.21.self_attn.k_proj": 140343, |
|
"model.layers.21.self_attn.o_proj": 117585, |
|
"model.layers.21.self_attn.q_proj": 134785, |
|
"model.layers.21.self_attn.v_proj": 111843, |
|
"model.layers.22.mlp.down_proj": 204005, |
|
"model.layers.22.mlp.gate_proj": 210880, |
|
"model.layers.22.mlp.up_proj": 200959, |
|
"model.layers.22.self_attn.k_proj": 150879, |
|
"model.layers.22.self_attn.o_proj": 101330, |
|
"model.layers.22.self_attn.q_proj": 151577, |
|
"model.layers.22.self_attn.v_proj": 98921, |
|
"model.layers.23.mlp.down_proj": 277332, |
|
"model.layers.23.mlp.gate_proj": 245277, |
|
"model.layers.23.mlp.up_proj": 258698, |
|
"model.layers.23.self_attn.k_proj": 118274, |
|
"model.layers.23.self_attn.o_proj": 114643, |
|
"model.layers.23.self_attn.q_proj": 113038, |
|
"model.layers.23.self_attn.v_proj": 107870, |
|
"model.layers.3.mlp.down_proj": 178845, |
|
"model.layers.3.mlp.gate_proj": 177565, |
|
"model.layers.3.mlp.up_proj": 178331, |
|
"model.layers.3.self_attn.k_proj": 187462, |
|
"model.layers.3.self_attn.o_proj": 70922, |
|
"model.layers.3.self_attn.q_proj": 152011, |
|
"model.layers.3.self_attn.v_proj": 72948, |
|
"model.layers.4.mlp.down_proj": 183192, |
|
"model.layers.4.mlp.gate_proj": 181505, |
|
"model.layers.4.mlp.up_proj": 180662, |
|
"model.layers.4.self_attn.k_proj": 186191, |
|
"model.layers.4.self_attn.o_proj": 68503, |
|
"model.layers.4.self_attn.q_proj": 143279, |
|
"model.layers.4.self_attn.v_proj": 71540, |
|
"model.layers.5.mlp.down_proj": 182326, |
|
"model.layers.5.mlp.gate_proj": 180629, |
|
"model.layers.5.mlp.up_proj": 180722, |
|
"model.layers.5.self_attn.k_proj": 179644, |
|
"model.layers.5.self_attn.o_proj": 78534, |
|
"model.layers.5.self_attn.q_proj": 155296, |
|
"model.layers.5.self_attn.v_proj": 84819, |
|
"model.layers.6.mlp.down_proj": 179180, |
|
"model.layers.6.mlp.gate_proj": 187118, |
|
"model.layers.6.mlp.up_proj": 179770, |
|
"model.layers.6.self_attn.k_proj": 181747, |
|
"model.layers.6.self_attn.o_proj": 86711, |
|
"model.layers.6.self_attn.q_proj": 156780, |
|
"model.layers.6.self_attn.v_proj": 97342, |
|
"model.layers.7.mlp.down_proj": 179455, |
|
"model.layers.7.mlp.gate_proj": 190350, |
|
"model.layers.7.mlp.up_proj": 185410, |
|
"model.layers.7.self_attn.k_proj": 152715, |
|
"model.layers.7.self_attn.o_proj": 75703, |
|
"model.layers.7.self_attn.q_proj": 130349, |
|
"model.layers.7.self_attn.v_proj": 81380, |
|
"model.layers.8.mlp.down_proj": 184587, |
|
"model.layers.8.mlp.gate_proj": 206660, |
|
"model.layers.8.mlp.up_proj": 192351, |
|
"model.layers.8.self_attn.k_proj": 167562, |
|
"model.layers.8.self_attn.o_proj": 90781, |
|
"model.layers.8.self_attn.q_proj": 142073, |
|
"model.layers.8.self_attn.v_proj": 103015, |
|
"model.layers.9.mlp.down_proj": 183099, |
|
"model.layers.9.mlp.gate_proj": 202416, |
|
"model.layers.9.mlp.up_proj": 192308, |
|
"model.layers.9.self_attn.k_proj": 184713, |
|
"model.layers.9.self_attn.o_proj": 92257, |
|
"model.layers.9.self_attn.q_proj": 170487, |
|
"model.layers.9.self_attn.v_proj": 101971 |
|
} |
|
}, |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"hidden_act": "silu", |
|
"hidden_size": 2048, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 5632, |
|
"max_position_embeddings": 2048, |
|
"max_sequence_length": 2048, |
|
"model_type": "llama", |
|
"num_attention_heads": 16, |
|
"num_hidden_layers": 24, |
|
"num_key_value_heads": 16, |
|
"pad_token_id": 0, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 10000.0, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float16", |
|
"transformers_version": "4.39.3", |
|
"use_cache": true, |
|
"vocab_size": 32000 |
|
} |
|
|