{ "_name_or_path": "checkpoints/lmsys/vicuna-7b-v1.5", "anyprec": { "arch_config": { "layers_name": "layers", "model_name": "model", "module_names": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj", "self_attn.o_proj", "mlp.gate_proj", "mlp.up_proj", "mlp.down_proj" ] }, "group_count": 1, "parent_precision": 4, "seed_precision": 2, "sparse_numvals": { "model.layers.0.mlp.down_proj": 865717, "model.layers.0.mlp.gate_proj": 891731, "model.layers.0.mlp.up_proj": 852167, "model.layers.0.self_attn.k_proj": 1666837, "model.layers.0.self_attn.o_proj": 1167262, "model.layers.0.self_attn.q_proj": 1863615, "model.layers.0.self_attn.v_proj": 969624, "model.layers.1.mlp.down_proj": 807017, "model.layers.1.mlp.gate_proj": 839914, "model.layers.1.mlp.up_proj": 822530, "model.layers.1.self_attn.k_proj": 1576204, "model.layers.1.self_attn.o_proj": 1519063, "model.layers.1.self_attn.q_proj": 1832663, "model.layers.1.self_attn.v_proj": 1032409, "model.layers.10.mlp.down_proj": 846612, "model.layers.10.mlp.gate_proj": 1101086, "model.layers.10.mlp.up_proj": 847383, "model.layers.10.self_attn.k_proj": 832812, "model.layers.10.self_attn.o_proj": 331226, "model.layers.10.self_attn.q_proj": 722725, "model.layers.10.self_attn.v_proj": 355123, "model.layers.11.mlp.down_proj": 835304, "model.layers.11.mlp.gate_proj": 1058635, "model.layers.11.mlp.up_proj": 839004, "model.layers.11.self_attn.k_proj": 894813, "model.layers.11.self_attn.o_proj": 362320, "model.layers.11.self_attn.q_proj": 841764, "model.layers.11.self_attn.v_proj": 389021, "model.layers.12.mlp.down_proj": 834675, "model.layers.12.mlp.gate_proj": 1032051, "model.layers.12.mlp.up_proj": 835628, "model.layers.12.self_attn.k_proj": 785556, "model.layers.12.self_attn.o_proj": 350763, "model.layers.12.self_attn.q_proj": 690419, "model.layers.12.self_attn.v_proj": 379603, "model.layers.13.mlp.down_proj": 838436, "model.layers.13.mlp.gate_proj": 1038017, "model.layers.13.mlp.up_proj": 843742, "model.layers.13.self_attn.k_proj": 731054, "model.layers.13.self_attn.o_proj": 346685, "model.layers.13.self_attn.q_proj": 671416, "model.layers.13.self_attn.v_proj": 372488, "model.layers.14.mlp.down_proj": 829572, "model.layers.14.mlp.gate_proj": 1003234, "model.layers.14.mlp.up_proj": 836142, "model.layers.14.self_attn.k_proj": 751100, "model.layers.14.self_attn.o_proj": 332196, "model.layers.14.self_attn.q_proj": 685869, "model.layers.14.self_attn.v_proj": 357686, "model.layers.15.mlp.down_proj": 838548, "model.layers.15.mlp.gate_proj": 1029194, "model.layers.15.mlp.up_proj": 845673, "model.layers.15.self_attn.k_proj": 705811, "model.layers.15.self_attn.o_proj": 334447, "model.layers.15.self_attn.q_proj": 625606, "model.layers.15.self_attn.v_proj": 352729, "model.layers.16.mlp.down_proj": 826342, "model.layers.16.mlp.gate_proj": 994497, "model.layers.16.mlp.up_proj": 827543, "model.layers.16.self_attn.k_proj": 753410, "model.layers.16.self_attn.o_proj": 323099, "model.layers.16.self_attn.q_proj": 665073, "model.layers.16.self_attn.v_proj": 347081, "model.layers.17.mlp.down_proj": 811167, "model.layers.17.mlp.gate_proj": 972726, "model.layers.17.mlp.up_proj": 812071, "model.layers.17.self_attn.k_proj": 659277, "model.layers.17.self_attn.o_proj": 317972, "model.layers.17.self_attn.q_proj": 600119, "model.layers.17.self_attn.v_proj": 333850, "model.layers.18.mlp.down_proj": 806090, "model.layers.18.mlp.gate_proj": 960664, "model.layers.18.mlp.up_proj": 808056, "model.layers.18.self_attn.k_proj": 625283, "model.layers.18.self_attn.o_proj": 319237, "model.layers.18.self_attn.q_proj": 566406, "model.layers.18.self_attn.v_proj": 340075, "model.layers.19.mlp.down_proj": 800026, "model.layers.19.mlp.gate_proj": 933181, "model.layers.19.mlp.up_proj": 800906, "model.layers.19.self_attn.k_proj": 672961, "model.layers.19.self_attn.o_proj": 323140, "model.layers.19.self_attn.q_proj": 607565, "model.layers.19.self_attn.v_proj": 344369, "model.layers.2.mlp.down_proj": 790912, "model.layers.2.mlp.gate_proj": 799857, "model.layers.2.mlp.up_proj": 801079, "model.layers.2.self_attn.k_proj": 1427644, "model.layers.2.self_attn.o_proj": 346883, "model.layers.2.self_attn.q_proj": 1150405, "model.layers.2.self_attn.v_proj": 363279, "model.layers.20.mlp.down_proj": 793927, "model.layers.20.mlp.gate_proj": 894220, "model.layers.20.mlp.up_proj": 794121, "model.layers.20.self_attn.k_proj": 714458, "model.layers.20.self_attn.o_proj": 307963, "model.layers.20.self_attn.q_proj": 647779, "model.layers.20.self_attn.v_proj": 319545, "model.layers.21.mlp.down_proj": 791701, "model.layers.21.mlp.gate_proj": 878917, "model.layers.21.mlp.up_proj": 786352, "model.layers.21.self_attn.k_proj": 724078, "model.layers.21.self_attn.o_proj": 307127, "model.layers.21.self_attn.q_proj": 658484, "model.layers.21.self_attn.v_proj": 324002, "model.layers.22.mlp.down_proj": 789108, "model.layers.22.mlp.gate_proj": 864456, "model.layers.22.mlp.up_proj": 785534, "model.layers.22.self_attn.k_proj": 668520, "model.layers.22.self_attn.o_proj": 322998, "model.layers.22.self_attn.q_proj": 610997, "model.layers.22.self_attn.v_proj": 331764, "model.layers.23.mlp.down_proj": 785969, "model.layers.23.mlp.gate_proj": 844693, "model.layers.23.mlp.up_proj": 784126, "model.layers.23.self_attn.k_proj": 596794, "model.layers.23.self_attn.o_proj": 321698, "model.layers.23.self_attn.q_proj": 557578, "model.layers.23.self_attn.v_proj": 332073, "model.layers.24.mlp.down_proj": 783216, "model.layers.24.mlp.gate_proj": 850741, "model.layers.24.mlp.up_proj": 781510, "model.layers.24.self_attn.k_proj": 772877, "model.layers.24.self_attn.o_proj": 314960, "model.layers.24.self_attn.q_proj": 713560, "model.layers.24.self_attn.v_proj": 322067, "model.layers.25.mlp.down_proj": 789855, "model.layers.25.mlp.gate_proj": 866039, "model.layers.25.mlp.up_proj": 781967, "model.layers.25.self_attn.k_proj": 614644, "model.layers.25.self_attn.o_proj": 311631, "model.layers.25.self_attn.q_proj": 590696, "model.layers.25.self_attn.v_proj": 320576, "model.layers.26.mlp.down_proj": 794329, "model.layers.26.mlp.gate_proj": 902478, "model.layers.26.mlp.up_proj": 784671, "model.layers.26.self_attn.k_proj": 669669, "model.layers.26.self_attn.o_proj": 363895, "model.layers.26.self_attn.q_proj": 625906, "model.layers.26.self_attn.v_proj": 352631, "model.layers.27.mlp.down_proj": 804636, "model.layers.27.mlp.gate_proj": 940867, "model.layers.27.mlp.up_proj": 788268, "model.layers.27.self_attn.k_proj": 492336, "model.layers.27.self_attn.o_proj": 324286, "model.layers.27.self_attn.q_proj": 475451, "model.layers.27.self_attn.v_proj": 325662, "model.layers.28.mlp.down_proj": 817492, "model.layers.28.mlp.gate_proj": 972600, "model.layers.28.mlp.up_proj": 800454, "model.layers.28.self_attn.k_proj": 553908, "model.layers.28.self_attn.o_proj": 336761, "model.layers.28.self_attn.q_proj": 533911, "model.layers.28.self_attn.v_proj": 350670, "model.layers.29.mlp.down_proj": 845082, "model.layers.29.mlp.gate_proj": 1019804, "model.layers.29.mlp.up_proj": 812861, "model.layers.29.self_attn.k_proj": 637146, "model.layers.29.self_attn.o_proj": 320821, "model.layers.29.self_attn.q_proj": 602307, "model.layers.29.self_attn.v_proj": 333849, "model.layers.3.mlp.down_proj": 799046, "model.layers.3.mlp.gate_proj": 819762, "model.layers.3.mlp.up_proj": 807689, "model.layers.3.self_attn.k_proj": 1037087, "model.layers.3.self_attn.o_proj": 381601, "model.layers.3.self_attn.q_proj": 882621, "model.layers.3.self_attn.v_proj": 393971, "model.layers.30.mlp.down_proj": 915437, "model.layers.30.mlp.gate_proj": 1066678, "model.layers.30.mlp.up_proj": 833731, "model.layers.30.self_attn.k_proj": 513532, "model.layers.30.self_attn.o_proj": 321348, "model.layers.30.self_attn.q_proj": 497984, "model.layers.30.self_attn.v_proj": 325219, "model.layers.31.mlp.down_proj": 1046449, "model.layers.31.mlp.gate_proj": 1064742, "model.layers.31.mlp.up_proj": 913077, "model.layers.31.self_attn.k_proj": 587052, "model.layers.31.self_attn.o_proj": 362137, "model.layers.31.self_attn.q_proj": 515769, "model.layers.31.self_attn.v_proj": 359219, "model.layers.4.mlp.down_proj": 809715, "model.layers.4.mlp.gate_proj": 877702, "model.layers.4.mlp.up_proj": 816908, "model.layers.4.self_attn.k_proj": 903251, "model.layers.4.self_attn.o_proj": 319964, "model.layers.4.self_attn.q_proj": 761555, "model.layers.4.self_attn.v_proj": 338525, "model.layers.5.mlp.down_proj": 818173, "model.layers.5.mlp.gate_proj": 940627, "model.layers.5.mlp.up_proj": 822212, "model.layers.5.self_attn.k_proj": 814611, "model.layers.5.self_attn.o_proj": 348464, "model.layers.5.self_attn.q_proj": 684653, "model.layers.5.self_attn.v_proj": 377853, "model.layers.6.mlp.down_proj": 816090, "model.layers.6.mlp.gate_proj": 979907, "model.layers.6.mlp.up_proj": 825004, "model.layers.6.self_attn.k_proj": 894466, "model.layers.6.self_attn.o_proj": 370829, "model.layers.6.self_attn.q_proj": 806875, "model.layers.6.self_attn.v_proj": 408275, "model.layers.7.mlp.down_proj": 817051, "model.layers.7.mlp.gate_proj": 1011373, "model.layers.7.mlp.up_proj": 825160, "model.layers.7.self_attn.k_proj": 870797, "model.layers.7.self_attn.o_proj": 392151, "model.layers.7.self_attn.q_proj": 815554, "model.layers.7.self_attn.v_proj": 426945, "model.layers.8.mlp.down_proj": 824092, "model.layers.8.mlp.gate_proj": 1018165, "model.layers.8.mlp.up_proj": 827172, "model.layers.8.self_attn.k_proj": 820539, "model.layers.8.self_attn.o_proj": 345716, "model.layers.8.self_attn.q_proj": 757234, "model.layers.8.self_attn.v_proj": 378191, "model.layers.9.mlp.down_proj": 834891, "model.layers.9.mlp.gate_proj": 1037677, "model.layers.9.mlp.up_proj": 835887, "model.layers.9.self_attn.k_proj": 739574, "model.layers.9.self_attn.o_proj": 346792, "model.layers.9.self_attn.q_proj": 655396, "model.layers.9.self_attn.v_proj": 385099 } }, "architectures": [ "LlamaForCausalLM" ], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pad_token_id": 0, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "transformers_version": "4.39.3", "use_cache": true, "vocab_size": 32000 }