imfinethx's picture
v1
9237479
{
"_name_or_path": "checkpoints/mtgv/MobileLLaMA-1.4B-Chat",
"anyprec": {
"arch_config": {
"layers_name": "layers",
"model_name": "model",
"module_names": [
"self_attn.q_proj",
"self_attn.k_proj",
"self_attn.v_proj",
"self_attn.o_proj",
"mlp.gate_proj",
"mlp.up_proj",
"mlp.down_proj"
]
},
"group_count": 1,
"parent_precision": 4,
"seed_precision": 2,
"sparse_numvals": {
"model.layers.0.mlp.down_proj": 391081,
"model.layers.0.mlp.gate_proj": 386698,
"model.layers.0.mlp.up_proj": 378886,
"model.layers.0.self_attn.k_proj": 274937,
"model.layers.0.self_attn.o_proj": 146763,
"model.layers.0.self_attn.q_proj": 253983,
"model.layers.0.self_attn.v_proj": 160101,
"model.layers.1.mlp.down_proj": 385358,
"model.layers.1.mlp.gate_proj": 387502,
"model.layers.1.mlp.up_proj": 379544,
"model.layers.1.self_attn.k_proj": 473298,
"model.layers.1.self_attn.o_proj": 214534,
"model.layers.1.self_attn.q_proj": 474665,
"model.layers.1.self_attn.v_proj": 173411,
"model.layers.10.mlp.down_proj": 384965,
"model.layers.10.mlp.gate_proj": 418178,
"model.layers.10.mlp.up_proj": 394578,
"model.layers.10.self_attn.k_proj": 293514,
"model.layers.10.self_attn.o_proj": 161452,
"model.layers.10.self_attn.q_proj": 275451,
"model.layers.10.self_attn.v_proj": 169149,
"model.layers.11.mlp.down_proj": 386407,
"model.layers.11.mlp.gate_proj": 418151,
"model.layers.11.mlp.up_proj": 396416,
"model.layers.11.self_attn.k_proj": 283696,
"model.layers.11.self_attn.o_proj": 153623,
"model.layers.11.self_attn.q_proj": 278095,
"model.layers.11.self_attn.v_proj": 165117,
"model.layers.12.mlp.down_proj": 395321,
"model.layers.12.mlp.gate_proj": 434671,
"model.layers.12.mlp.up_proj": 406955,
"model.layers.12.self_attn.k_proj": 265587,
"model.layers.12.self_attn.o_proj": 147530,
"model.layers.12.self_attn.q_proj": 254328,
"model.layers.12.self_attn.v_proj": 164366,
"model.layers.13.mlp.down_proj": 402235,
"model.layers.13.mlp.gate_proj": 462668,
"model.layers.13.mlp.up_proj": 412243,
"model.layers.13.self_attn.k_proj": 268726,
"model.layers.13.self_attn.o_proj": 155370,
"model.layers.13.self_attn.q_proj": 264605,
"model.layers.13.self_attn.v_proj": 179360,
"model.layers.14.mlp.down_proj": 411161,
"model.layers.14.mlp.gate_proj": 475909,
"model.layers.14.mlp.up_proj": 421549,
"model.layers.14.self_attn.k_proj": 276528,
"model.layers.14.self_attn.o_proj": 155414,
"model.layers.14.self_attn.q_proj": 263462,
"model.layers.14.self_attn.v_proj": 174496,
"model.layers.15.mlp.down_proj": 407797,
"model.layers.15.mlp.gate_proj": 466950,
"model.layers.15.mlp.up_proj": 422795,
"model.layers.15.self_attn.k_proj": 277968,
"model.layers.15.self_attn.o_proj": 158232,
"model.layers.15.self_attn.q_proj": 277897,
"model.layers.15.self_attn.v_proj": 178970,
"model.layers.16.mlp.down_proj": 406442,
"model.layers.16.mlp.gate_proj": 458305,
"model.layers.16.mlp.up_proj": 423591,
"model.layers.16.self_attn.k_proj": 255441,
"model.layers.16.self_attn.o_proj": 159098,
"model.layers.16.self_attn.q_proj": 250977,
"model.layers.16.self_attn.v_proj": 175023,
"model.layers.17.mlp.down_proj": 402755,
"model.layers.17.mlp.gate_proj": 442715,
"model.layers.17.mlp.up_proj": 417427,
"model.layers.17.self_attn.k_proj": 271682,
"model.layers.17.self_attn.o_proj": 160200,
"model.layers.17.self_attn.q_proj": 268211,
"model.layers.17.self_attn.v_proj": 170915,
"model.layers.18.mlp.down_proj": 393810,
"model.layers.18.mlp.gate_proj": 425845,
"model.layers.18.mlp.up_proj": 412307,
"model.layers.18.self_attn.k_proj": 251695,
"model.layers.18.self_attn.o_proj": 160815,
"model.layers.18.self_attn.q_proj": 270035,
"model.layers.18.self_attn.v_proj": 170143,
"model.layers.19.mlp.down_proj": 391018,
"model.layers.19.mlp.gate_proj": 414782,
"model.layers.19.mlp.up_proj": 407914,
"model.layers.19.self_attn.k_proj": 250159,
"model.layers.19.self_attn.o_proj": 180878,
"model.layers.19.self_attn.q_proj": 248349,
"model.layers.19.self_attn.v_proj": 185533,
"model.layers.2.mlp.down_proj": 377206,
"model.layers.2.mlp.gate_proj": 373395,
"model.layers.2.mlp.up_proj": 376499,
"model.layers.2.self_attn.k_proj": 364956,
"model.layers.2.self_attn.o_proj": 163255,
"model.layers.2.self_attn.q_proj": 313191,
"model.layers.2.self_attn.v_proj": 155599,
"model.layers.20.mlp.down_proj": 390830,
"model.layers.20.mlp.gate_proj": 407028,
"model.layers.20.mlp.up_proj": 404880,
"model.layers.20.self_attn.k_proj": 231168,
"model.layers.20.self_attn.o_proj": 169556,
"model.layers.20.self_attn.q_proj": 226735,
"model.layers.20.self_attn.v_proj": 167471,
"model.layers.21.mlp.down_proj": 388610,
"model.layers.21.mlp.gate_proj": 396068,
"model.layers.21.mlp.up_proj": 394762,
"model.layers.21.self_attn.k_proj": 233144,
"model.layers.21.self_attn.o_proj": 202859,
"model.layers.21.self_attn.q_proj": 224758,
"model.layers.21.self_attn.v_proj": 197088,
"model.layers.22.mlp.down_proj": 410384,
"model.layers.22.mlp.gate_proj": 411952,
"model.layers.22.mlp.up_proj": 405958,
"model.layers.22.self_attn.k_proj": 246798,
"model.layers.22.self_attn.o_proj": 184618,
"model.layers.22.self_attn.q_proj": 244163,
"model.layers.22.self_attn.v_proj": 180896,
"model.layers.23.mlp.down_proj": 504810,
"model.layers.23.mlp.gate_proj": 452978,
"model.layers.23.mlp.up_proj": 477128,
"model.layers.23.self_attn.k_proj": 205122,
"model.layers.23.self_attn.o_proj": 204087,
"model.layers.23.self_attn.q_proj": 196846,
"model.layers.23.self_attn.v_proj": 193336,
"model.layers.3.mlp.down_proj": 374150,
"model.layers.3.mlp.gate_proj": 372350,
"model.layers.3.mlp.up_proj": 375109,
"model.layers.3.self_attn.k_proj": 290489,
"model.layers.3.self_attn.o_proj": 143544,
"model.layers.3.self_attn.q_proj": 247264,
"model.layers.3.self_attn.v_proj": 148063,
"model.layers.4.mlp.down_proj": 381223,
"model.layers.4.mlp.gate_proj": 376904,
"model.layers.4.mlp.up_proj": 378407,
"model.layers.4.self_attn.k_proj": 289375,
"model.layers.4.self_attn.o_proj": 140505,
"model.layers.4.self_attn.q_proj": 237856,
"model.layers.4.self_attn.v_proj": 146613,
"model.layers.5.mlp.down_proj": 379920,
"model.layers.5.mlp.gate_proj": 377189,
"model.layers.5.mlp.up_proj": 377591,
"model.layers.5.self_attn.k_proj": 282178,
"model.layers.5.self_attn.o_proj": 155071,
"model.layers.5.self_attn.q_proj": 251705,
"model.layers.5.self_attn.v_proj": 164087,
"model.layers.6.mlp.down_proj": 374917,
"model.layers.6.mlp.gate_proj": 384595,
"model.layers.6.mlp.up_proj": 378445,
"model.layers.6.self_attn.k_proj": 285864,
"model.layers.6.self_attn.o_proj": 166756,
"model.layers.6.self_attn.q_proj": 254119,
"model.layers.6.self_attn.v_proj": 181957,
"model.layers.7.mlp.down_proj": 376459,
"model.layers.7.mlp.gate_proj": 390927,
"model.layers.7.mlp.up_proj": 385830,
"model.layers.7.self_attn.k_proj": 250179,
"model.layers.7.self_attn.o_proj": 150186,
"model.layers.7.self_attn.q_proj": 220969,
"model.layers.7.self_attn.v_proj": 158929,
"model.layers.8.mlp.down_proj": 382800,
"model.layers.8.mlp.gate_proj": 413009,
"model.layers.8.mlp.up_proj": 396348,
"model.layers.8.self_attn.k_proj": 267251,
"model.layers.8.self_attn.o_proj": 173103,
"model.layers.8.self_attn.q_proj": 235754,
"model.layers.8.self_attn.v_proj": 191066,
"model.layers.9.mlp.down_proj": 381195,
"model.layers.9.mlp.gate_proj": 405387,
"model.layers.9.mlp.up_proj": 395238,
"model.layers.9.self_attn.k_proj": 285793,
"model.layers.9.self_attn.o_proj": 172217,
"model.layers.9.self_attn.q_proj": 269692,
"model.layers.9.self_attn.v_proj": 186773
}
},
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 2048,
"initializer_range": 0.02,
"intermediate_size": 5632,
"max_position_embeddings": 2048,
"max_sequence_length": 2048,
"model_type": "llama",
"num_attention_heads": 16,
"num_hidden_layers": 24,
"num_key_value_heads": 16,
"pad_token_id": 0,
"pretraining_tp": 1,
"rms_norm_eps": 1e-06,
"rope_scaling": null,
"rope_theta": 10000.0,
"tie_word_embeddings": false,
"torch_dtype": "float16",
"transformers_version": "4.39.3",
"use_cache": true,
"vocab_size": 32000
}