{ "_name_or_path": "checkpoints/mtgv/MobileLLaMA-1.4B-Chat", "anyprec": { "arch_config": { "layers_name": "layers", "model_name": "model", "module_names": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj", "self_attn.o_proj", "mlp.gate_proj", "mlp.up_proj", "mlp.down_proj" ] }, "group_count": 1, "parent_precision": 4, "seed_precision": 2, "sparse_numvals": { "model.layers.0.mlp.down_proj": 391081, "model.layers.0.mlp.gate_proj": 386698, "model.layers.0.mlp.up_proj": 378886, "model.layers.0.self_attn.k_proj": 274937, "model.layers.0.self_attn.o_proj": 146763, "model.layers.0.self_attn.q_proj": 253983, "model.layers.0.self_attn.v_proj": 160101, "model.layers.1.mlp.down_proj": 385358, "model.layers.1.mlp.gate_proj": 387502, "model.layers.1.mlp.up_proj": 379544, "model.layers.1.self_attn.k_proj": 473298, "model.layers.1.self_attn.o_proj": 214534, "model.layers.1.self_attn.q_proj": 474665, "model.layers.1.self_attn.v_proj": 173411, "model.layers.10.mlp.down_proj": 384965, "model.layers.10.mlp.gate_proj": 418178, "model.layers.10.mlp.up_proj": 394578, "model.layers.10.self_attn.k_proj": 293514, "model.layers.10.self_attn.o_proj": 161452, "model.layers.10.self_attn.q_proj": 275451, "model.layers.10.self_attn.v_proj": 169149, "model.layers.11.mlp.down_proj": 386407, "model.layers.11.mlp.gate_proj": 418151, "model.layers.11.mlp.up_proj": 396416, "model.layers.11.self_attn.k_proj": 283696, "model.layers.11.self_attn.o_proj": 153623, "model.layers.11.self_attn.q_proj": 278095, "model.layers.11.self_attn.v_proj": 165117, "model.layers.12.mlp.down_proj": 395321, "model.layers.12.mlp.gate_proj": 434671, "model.layers.12.mlp.up_proj": 406955, "model.layers.12.self_attn.k_proj": 265587, "model.layers.12.self_attn.o_proj": 147530, "model.layers.12.self_attn.q_proj": 254328, "model.layers.12.self_attn.v_proj": 164366, "model.layers.13.mlp.down_proj": 402235, "model.layers.13.mlp.gate_proj": 462668, "model.layers.13.mlp.up_proj": 412243, "model.layers.13.self_attn.k_proj": 268726, "model.layers.13.self_attn.o_proj": 155370, "model.layers.13.self_attn.q_proj": 264605, "model.layers.13.self_attn.v_proj": 179360, "model.layers.14.mlp.down_proj": 411161, "model.layers.14.mlp.gate_proj": 475909, "model.layers.14.mlp.up_proj": 421549, "model.layers.14.self_attn.k_proj": 276528, "model.layers.14.self_attn.o_proj": 155414, "model.layers.14.self_attn.q_proj": 263462, "model.layers.14.self_attn.v_proj": 174496, "model.layers.15.mlp.down_proj": 407797, "model.layers.15.mlp.gate_proj": 466950, "model.layers.15.mlp.up_proj": 422795, "model.layers.15.self_attn.k_proj": 277968, "model.layers.15.self_attn.o_proj": 158232, "model.layers.15.self_attn.q_proj": 277897, "model.layers.15.self_attn.v_proj": 178970, "model.layers.16.mlp.down_proj": 406442, "model.layers.16.mlp.gate_proj": 458305, "model.layers.16.mlp.up_proj": 423591, "model.layers.16.self_attn.k_proj": 255441, "model.layers.16.self_attn.o_proj": 159098, "model.layers.16.self_attn.q_proj": 250977, "model.layers.16.self_attn.v_proj": 175023, "model.layers.17.mlp.down_proj": 402755, "model.layers.17.mlp.gate_proj": 442715, "model.layers.17.mlp.up_proj": 417427, "model.layers.17.self_attn.k_proj": 271682, "model.layers.17.self_attn.o_proj": 160200, "model.layers.17.self_attn.q_proj": 268211, "model.layers.17.self_attn.v_proj": 170915, "model.layers.18.mlp.down_proj": 393810, "model.layers.18.mlp.gate_proj": 425845, "model.layers.18.mlp.up_proj": 412307, "model.layers.18.self_attn.k_proj": 251695, "model.layers.18.self_attn.o_proj": 160815, "model.layers.18.self_attn.q_proj": 270035, "model.layers.18.self_attn.v_proj": 170143, "model.layers.19.mlp.down_proj": 391018, "model.layers.19.mlp.gate_proj": 414782, "model.layers.19.mlp.up_proj": 407914, "model.layers.19.self_attn.k_proj": 250159, "model.layers.19.self_attn.o_proj": 180878, "model.layers.19.self_attn.q_proj": 248349, "model.layers.19.self_attn.v_proj": 185533, "model.layers.2.mlp.down_proj": 377206, "model.layers.2.mlp.gate_proj": 373395, "model.layers.2.mlp.up_proj": 376499, "model.layers.2.self_attn.k_proj": 364956, "model.layers.2.self_attn.o_proj": 163255, "model.layers.2.self_attn.q_proj": 313191, "model.layers.2.self_attn.v_proj": 155599, "model.layers.20.mlp.down_proj": 390830, "model.layers.20.mlp.gate_proj": 407028, "model.layers.20.mlp.up_proj": 404880, "model.layers.20.self_attn.k_proj": 231168, "model.layers.20.self_attn.o_proj": 169556, "model.layers.20.self_attn.q_proj": 226735, "model.layers.20.self_attn.v_proj": 167471, "model.layers.21.mlp.down_proj": 388610, "model.layers.21.mlp.gate_proj": 396068, "model.layers.21.mlp.up_proj": 394762, "model.layers.21.self_attn.k_proj": 233144, "model.layers.21.self_attn.o_proj": 202859, "model.layers.21.self_attn.q_proj": 224758, "model.layers.21.self_attn.v_proj": 197088, "model.layers.22.mlp.down_proj": 410384, "model.layers.22.mlp.gate_proj": 411952, "model.layers.22.mlp.up_proj": 405958, "model.layers.22.self_attn.k_proj": 246798, "model.layers.22.self_attn.o_proj": 184618, "model.layers.22.self_attn.q_proj": 244163, "model.layers.22.self_attn.v_proj": 180896, "model.layers.23.mlp.down_proj": 504810, "model.layers.23.mlp.gate_proj": 452978, "model.layers.23.mlp.up_proj": 477128, "model.layers.23.self_attn.k_proj": 205122, "model.layers.23.self_attn.o_proj": 204087, "model.layers.23.self_attn.q_proj": 196846, "model.layers.23.self_attn.v_proj": 193336, "model.layers.3.mlp.down_proj": 374150, "model.layers.3.mlp.gate_proj": 372350, "model.layers.3.mlp.up_proj": 375109, "model.layers.3.self_attn.k_proj": 290489, "model.layers.3.self_attn.o_proj": 143544, "model.layers.3.self_attn.q_proj": 247264, "model.layers.3.self_attn.v_proj": 148063, "model.layers.4.mlp.down_proj": 381223, "model.layers.4.mlp.gate_proj": 376904, "model.layers.4.mlp.up_proj": 378407, "model.layers.4.self_attn.k_proj": 289375, "model.layers.4.self_attn.o_proj": 140505, "model.layers.4.self_attn.q_proj": 237856, "model.layers.4.self_attn.v_proj": 146613, "model.layers.5.mlp.down_proj": 379920, "model.layers.5.mlp.gate_proj": 377189, "model.layers.5.mlp.up_proj": 377591, "model.layers.5.self_attn.k_proj": 282178, "model.layers.5.self_attn.o_proj": 155071, "model.layers.5.self_attn.q_proj": 251705, "model.layers.5.self_attn.v_proj": 164087, "model.layers.6.mlp.down_proj": 374917, "model.layers.6.mlp.gate_proj": 384595, "model.layers.6.mlp.up_proj": 378445, "model.layers.6.self_attn.k_proj": 285864, "model.layers.6.self_attn.o_proj": 166756, "model.layers.6.self_attn.q_proj": 254119, "model.layers.6.self_attn.v_proj": 181957, "model.layers.7.mlp.down_proj": 376459, "model.layers.7.mlp.gate_proj": 390927, "model.layers.7.mlp.up_proj": 385830, "model.layers.7.self_attn.k_proj": 250179, "model.layers.7.self_attn.o_proj": 150186, "model.layers.7.self_attn.q_proj": 220969, "model.layers.7.self_attn.v_proj": 158929, "model.layers.8.mlp.down_proj": 382800, "model.layers.8.mlp.gate_proj": 413009, "model.layers.8.mlp.up_proj": 396348, "model.layers.8.self_attn.k_proj": 267251, "model.layers.8.self_attn.o_proj": 173103, "model.layers.8.self_attn.q_proj": 235754, "model.layers.8.self_attn.v_proj": 191066, "model.layers.9.mlp.down_proj": 381195, "model.layers.9.mlp.gate_proj": 405387, "model.layers.9.mlp.up_proj": 395238, "model.layers.9.self_attn.k_proj": 285793, "model.layers.9.self_attn.o_proj": 172217, "model.layers.9.self_attn.q_proj": 269692, "model.layers.9.self_attn.v_proj": 186773 } }, "architectures": [ "LlamaForCausalLM" ], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 5632, "max_position_embeddings": 2048, "max_sequence_length": 2048, "model_type": "llama", "num_attention_heads": 16, "num_hidden_layers": 24, "num_key_value_heads": 16, "pad_token_id": 0, "pretraining_tp": 1, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "transformers_version": "4.39.3", "use_cache": true, "vocab_size": 32000 }