|
{ |
|
"_name_or_path": "checkpoints/lmsys/vicuna-7b-v1.5", |
|
"anyprec": { |
|
"arch_config": { |
|
"layers_name": "layers", |
|
"model_name": "model", |
|
"module_names": [ |
|
"self_attn.q_proj", |
|
"self_attn.k_proj", |
|
"self_attn.v_proj", |
|
"self_attn.o_proj", |
|
"mlp.gate_proj", |
|
"mlp.up_proj", |
|
"mlp.down_proj" |
|
] |
|
}, |
|
"group_count": 1, |
|
"parent_precision": 4, |
|
"seed_precision": 2, |
|
"sparse_numvals": { |
|
"model.layers.0.mlp.down_proj": 865717, |
|
"model.layers.0.mlp.gate_proj": 891731, |
|
"model.layers.0.mlp.up_proj": 852167, |
|
"model.layers.0.self_attn.k_proj": 1666837, |
|
"model.layers.0.self_attn.o_proj": 1167262, |
|
"model.layers.0.self_attn.q_proj": 1863615, |
|
"model.layers.0.self_attn.v_proj": 969624, |
|
"model.layers.1.mlp.down_proj": 807017, |
|
"model.layers.1.mlp.gate_proj": 839914, |
|
"model.layers.1.mlp.up_proj": 822530, |
|
"model.layers.1.self_attn.k_proj": 1576204, |
|
"model.layers.1.self_attn.o_proj": 1519063, |
|
"model.layers.1.self_attn.q_proj": 1832663, |
|
"model.layers.1.self_attn.v_proj": 1032409, |
|
"model.layers.10.mlp.down_proj": 846612, |
|
"model.layers.10.mlp.gate_proj": 1101086, |
|
"model.layers.10.mlp.up_proj": 847383, |
|
"model.layers.10.self_attn.k_proj": 832812, |
|
"model.layers.10.self_attn.o_proj": 331226, |
|
"model.layers.10.self_attn.q_proj": 722725, |
|
"model.layers.10.self_attn.v_proj": 355123, |
|
"model.layers.11.mlp.down_proj": 835304, |
|
"model.layers.11.mlp.gate_proj": 1058635, |
|
"model.layers.11.mlp.up_proj": 839004, |
|
"model.layers.11.self_attn.k_proj": 894813, |
|
"model.layers.11.self_attn.o_proj": 362320, |
|
"model.layers.11.self_attn.q_proj": 841764, |
|
"model.layers.11.self_attn.v_proj": 389021, |
|
"model.layers.12.mlp.down_proj": 834675, |
|
"model.layers.12.mlp.gate_proj": 1032051, |
|
"model.layers.12.mlp.up_proj": 835628, |
|
"model.layers.12.self_attn.k_proj": 785556, |
|
"model.layers.12.self_attn.o_proj": 350763, |
|
"model.layers.12.self_attn.q_proj": 690419, |
|
"model.layers.12.self_attn.v_proj": 379603, |
|
"model.layers.13.mlp.down_proj": 838436, |
|
"model.layers.13.mlp.gate_proj": 1038017, |
|
"model.layers.13.mlp.up_proj": 843742, |
|
"model.layers.13.self_attn.k_proj": 731054, |
|
"model.layers.13.self_attn.o_proj": 346685, |
|
"model.layers.13.self_attn.q_proj": 671416, |
|
"model.layers.13.self_attn.v_proj": 372488, |
|
"model.layers.14.mlp.down_proj": 829572, |
|
"model.layers.14.mlp.gate_proj": 1003234, |
|
"model.layers.14.mlp.up_proj": 836142, |
|
"model.layers.14.self_attn.k_proj": 751100, |
|
"model.layers.14.self_attn.o_proj": 332196, |
|
"model.layers.14.self_attn.q_proj": 685869, |
|
"model.layers.14.self_attn.v_proj": 357686, |
|
"model.layers.15.mlp.down_proj": 838548, |
|
"model.layers.15.mlp.gate_proj": 1029194, |
|
"model.layers.15.mlp.up_proj": 845673, |
|
"model.layers.15.self_attn.k_proj": 705811, |
|
"model.layers.15.self_attn.o_proj": 334447, |
|
"model.layers.15.self_attn.q_proj": 625606, |
|
"model.layers.15.self_attn.v_proj": 352729, |
|
"model.layers.16.mlp.down_proj": 826342, |
|
"model.layers.16.mlp.gate_proj": 994497, |
|
"model.layers.16.mlp.up_proj": 827543, |
|
"model.layers.16.self_attn.k_proj": 753410, |
|
"model.layers.16.self_attn.o_proj": 323099, |
|
"model.layers.16.self_attn.q_proj": 665073, |
|
"model.layers.16.self_attn.v_proj": 347081, |
|
"model.layers.17.mlp.down_proj": 811167, |
|
"model.layers.17.mlp.gate_proj": 972726, |
|
"model.layers.17.mlp.up_proj": 812071, |
|
"model.layers.17.self_attn.k_proj": 659277, |
|
"model.layers.17.self_attn.o_proj": 317972, |
|
"model.layers.17.self_attn.q_proj": 600119, |
|
"model.layers.17.self_attn.v_proj": 333850, |
|
"model.layers.18.mlp.down_proj": 806090, |
|
"model.layers.18.mlp.gate_proj": 960664, |
|
"model.layers.18.mlp.up_proj": 808056, |
|
"model.layers.18.self_attn.k_proj": 625283, |
|
"model.layers.18.self_attn.o_proj": 319237, |
|
"model.layers.18.self_attn.q_proj": 566406, |
|
"model.layers.18.self_attn.v_proj": 340075, |
|
"model.layers.19.mlp.down_proj": 800026, |
|
"model.layers.19.mlp.gate_proj": 933181, |
|
"model.layers.19.mlp.up_proj": 800906, |
|
"model.layers.19.self_attn.k_proj": 672961, |
|
"model.layers.19.self_attn.o_proj": 323140, |
|
"model.layers.19.self_attn.q_proj": 607565, |
|
"model.layers.19.self_attn.v_proj": 344369, |
|
"model.layers.2.mlp.down_proj": 790912, |
|
"model.layers.2.mlp.gate_proj": 799857, |
|
"model.layers.2.mlp.up_proj": 801079, |
|
"model.layers.2.self_attn.k_proj": 1427644, |
|
"model.layers.2.self_attn.o_proj": 346883, |
|
"model.layers.2.self_attn.q_proj": 1150405, |
|
"model.layers.2.self_attn.v_proj": 363279, |
|
"model.layers.20.mlp.down_proj": 793927, |
|
"model.layers.20.mlp.gate_proj": 894220, |
|
"model.layers.20.mlp.up_proj": 794121, |
|
"model.layers.20.self_attn.k_proj": 714458, |
|
"model.layers.20.self_attn.o_proj": 307963, |
|
"model.layers.20.self_attn.q_proj": 647779, |
|
"model.layers.20.self_attn.v_proj": 319545, |
|
"model.layers.21.mlp.down_proj": 791701, |
|
"model.layers.21.mlp.gate_proj": 878917, |
|
"model.layers.21.mlp.up_proj": 786352, |
|
"model.layers.21.self_attn.k_proj": 724078, |
|
"model.layers.21.self_attn.o_proj": 307127, |
|
"model.layers.21.self_attn.q_proj": 658484, |
|
"model.layers.21.self_attn.v_proj": 324002, |
|
"model.layers.22.mlp.down_proj": 789108, |
|
"model.layers.22.mlp.gate_proj": 864456, |
|
"model.layers.22.mlp.up_proj": 785534, |
|
"model.layers.22.self_attn.k_proj": 668520, |
|
"model.layers.22.self_attn.o_proj": 322998, |
|
"model.layers.22.self_attn.q_proj": 610997, |
|
"model.layers.22.self_attn.v_proj": 331764, |
|
"model.layers.23.mlp.down_proj": 785969, |
|
"model.layers.23.mlp.gate_proj": 844693, |
|
"model.layers.23.mlp.up_proj": 784126, |
|
"model.layers.23.self_attn.k_proj": 596794, |
|
"model.layers.23.self_attn.o_proj": 321698, |
|
"model.layers.23.self_attn.q_proj": 557578, |
|
"model.layers.23.self_attn.v_proj": 332073, |
|
"model.layers.24.mlp.down_proj": 783216, |
|
"model.layers.24.mlp.gate_proj": 850741, |
|
"model.layers.24.mlp.up_proj": 781510, |
|
"model.layers.24.self_attn.k_proj": 772877, |
|
"model.layers.24.self_attn.o_proj": 314960, |
|
"model.layers.24.self_attn.q_proj": 713560, |
|
"model.layers.24.self_attn.v_proj": 322067, |
|
"model.layers.25.mlp.down_proj": 789855, |
|
"model.layers.25.mlp.gate_proj": 866039, |
|
"model.layers.25.mlp.up_proj": 781967, |
|
"model.layers.25.self_attn.k_proj": 614644, |
|
"model.layers.25.self_attn.o_proj": 311631, |
|
"model.layers.25.self_attn.q_proj": 590696, |
|
"model.layers.25.self_attn.v_proj": 320576, |
|
"model.layers.26.mlp.down_proj": 794329, |
|
"model.layers.26.mlp.gate_proj": 902478, |
|
"model.layers.26.mlp.up_proj": 784671, |
|
"model.layers.26.self_attn.k_proj": 669669, |
|
"model.layers.26.self_attn.o_proj": 363895, |
|
"model.layers.26.self_attn.q_proj": 625906, |
|
"model.layers.26.self_attn.v_proj": 352631, |
|
"model.layers.27.mlp.down_proj": 804636, |
|
"model.layers.27.mlp.gate_proj": 940867, |
|
"model.layers.27.mlp.up_proj": 788268, |
|
"model.layers.27.self_attn.k_proj": 492336, |
|
"model.layers.27.self_attn.o_proj": 324286, |
|
"model.layers.27.self_attn.q_proj": 475451, |
|
"model.layers.27.self_attn.v_proj": 325662, |
|
"model.layers.28.mlp.down_proj": 817492, |
|
"model.layers.28.mlp.gate_proj": 972600, |
|
"model.layers.28.mlp.up_proj": 800454, |
|
"model.layers.28.self_attn.k_proj": 553908, |
|
"model.layers.28.self_attn.o_proj": 336761, |
|
"model.layers.28.self_attn.q_proj": 533911, |
|
"model.layers.28.self_attn.v_proj": 350670, |
|
"model.layers.29.mlp.down_proj": 845082, |
|
"model.layers.29.mlp.gate_proj": 1019804, |
|
"model.layers.29.mlp.up_proj": 812861, |
|
"model.layers.29.self_attn.k_proj": 637146, |
|
"model.layers.29.self_attn.o_proj": 320821, |
|
"model.layers.29.self_attn.q_proj": 602307, |
|
"model.layers.29.self_attn.v_proj": 333849, |
|
"model.layers.3.mlp.down_proj": 799046, |
|
"model.layers.3.mlp.gate_proj": 819762, |
|
"model.layers.3.mlp.up_proj": 807689, |
|
"model.layers.3.self_attn.k_proj": 1037087, |
|
"model.layers.3.self_attn.o_proj": 381601, |
|
"model.layers.3.self_attn.q_proj": 882621, |
|
"model.layers.3.self_attn.v_proj": 393971, |
|
"model.layers.30.mlp.down_proj": 915437, |
|
"model.layers.30.mlp.gate_proj": 1066678, |
|
"model.layers.30.mlp.up_proj": 833731, |
|
"model.layers.30.self_attn.k_proj": 513532, |
|
"model.layers.30.self_attn.o_proj": 321348, |
|
"model.layers.30.self_attn.q_proj": 497984, |
|
"model.layers.30.self_attn.v_proj": 325219, |
|
"model.layers.31.mlp.down_proj": 1046449, |
|
"model.layers.31.mlp.gate_proj": 1064742, |
|
"model.layers.31.mlp.up_proj": 913077, |
|
"model.layers.31.self_attn.k_proj": 587052, |
|
"model.layers.31.self_attn.o_proj": 362137, |
|
"model.layers.31.self_attn.q_proj": 515769, |
|
"model.layers.31.self_attn.v_proj": 359219, |
|
"model.layers.4.mlp.down_proj": 809715, |
|
"model.layers.4.mlp.gate_proj": 877702, |
|
"model.layers.4.mlp.up_proj": 816908, |
|
"model.layers.4.self_attn.k_proj": 903251, |
|
"model.layers.4.self_attn.o_proj": 319964, |
|
"model.layers.4.self_attn.q_proj": 761555, |
|
"model.layers.4.self_attn.v_proj": 338525, |
|
"model.layers.5.mlp.down_proj": 818173, |
|
"model.layers.5.mlp.gate_proj": 940627, |
|
"model.layers.5.mlp.up_proj": 822212, |
|
"model.layers.5.self_attn.k_proj": 814611, |
|
"model.layers.5.self_attn.o_proj": 348464, |
|
"model.layers.5.self_attn.q_proj": 684653, |
|
"model.layers.5.self_attn.v_proj": 377853, |
|
"model.layers.6.mlp.down_proj": 816090, |
|
"model.layers.6.mlp.gate_proj": 979907, |
|
"model.layers.6.mlp.up_proj": 825004, |
|
"model.layers.6.self_attn.k_proj": 894466, |
|
"model.layers.6.self_attn.o_proj": 370829, |
|
"model.layers.6.self_attn.q_proj": 806875, |
|
"model.layers.6.self_attn.v_proj": 408275, |
|
"model.layers.7.mlp.down_proj": 817051, |
|
"model.layers.7.mlp.gate_proj": 1011373, |
|
"model.layers.7.mlp.up_proj": 825160, |
|
"model.layers.7.self_attn.k_proj": 870797, |
|
"model.layers.7.self_attn.o_proj": 392151, |
|
"model.layers.7.self_attn.q_proj": 815554, |
|
"model.layers.7.self_attn.v_proj": 426945, |
|
"model.layers.8.mlp.down_proj": 824092, |
|
"model.layers.8.mlp.gate_proj": 1018165, |
|
"model.layers.8.mlp.up_proj": 827172, |
|
"model.layers.8.self_attn.k_proj": 820539, |
|
"model.layers.8.self_attn.o_proj": 345716, |
|
"model.layers.8.self_attn.q_proj": 757234, |
|
"model.layers.8.self_attn.v_proj": 378191, |
|
"model.layers.9.mlp.down_proj": 834891, |
|
"model.layers.9.mlp.gate_proj": 1037677, |
|
"model.layers.9.mlp.up_proj": 835887, |
|
"model.layers.9.self_attn.k_proj": 739574, |
|
"model.layers.9.self_attn.o_proj": 346792, |
|
"model.layers.9.self_attn.q_proj": 655396, |
|
"model.layers.9.self_attn.v_proj": 385099 |
|
} |
|
}, |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"hidden_act": "silu", |
|
"hidden_size": 4096, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 11008, |
|
"max_position_embeddings": 4096, |
|
"model_type": "llama", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 32, |
|
"num_key_value_heads": 32, |
|
"pad_token_id": 0, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-05, |
|
"rope_scaling": null, |
|
"rope_theta": 10000.0, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float16", |
|
"transformers_version": "4.39.3", |
|
"use_cache": true, |
|
"vocab_size": 32000 |
|
} |
|
|