{ "_name_or_path": "checkpoints/lmsys/vicuna-7b-v1.5", "anyprec": { "arch_config": { "layers_name": "layers", "model_name": "model", "module_names": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj", "self_attn.o_proj", "mlp.gate_proj", "mlp.up_proj", "mlp.down_proj" ] }, "group_count": 1, "parent_precision": 4, "seed_precision": 2, "sparse_numvals": { "model.layers.0.mlp.down_proj": 1233978, "model.layers.0.mlp.gate_proj": 1258296, "model.layers.0.mlp.up_proj": 1224078, "model.layers.0.self_attn.k_proj": 1883193, "model.layers.0.self_attn.o_proj": 1335571, "model.layers.0.self_attn.q_proj": 2066059, "model.layers.0.self_attn.v_proj": 1149426, "model.layers.1.mlp.down_proj": 1170303, "model.layers.1.mlp.gate_proj": 1203169, "model.layers.1.mlp.up_proj": 1193914, "model.layers.1.self_attn.k_proj": 1774927, "model.layers.1.self_attn.o_proj": 1735353, "model.layers.1.self_attn.q_proj": 2045430, "model.layers.1.self_attn.v_proj": 1233905, "model.layers.10.mlp.down_proj": 1214601, "model.layers.10.mlp.gate_proj": 1496901, "model.layers.10.mlp.up_proj": 1216897, "model.layers.10.self_attn.k_proj": 1031993, "model.layers.10.self_attn.o_proj": 469774, "model.layers.10.self_attn.q_proj": 910405, "model.layers.10.self_attn.v_proj": 499623, "model.layers.11.mlp.down_proj": 1202594, "model.layers.11.mlp.gate_proj": 1447186, "model.layers.11.mlp.up_proj": 1208047, "model.layers.11.self_attn.k_proj": 1091923, "model.layers.11.self_attn.o_proj": 506382, "model.layers.11.self_attn.q_proj": 1031388, "model.layers.11.self_attn.v_proj": 538763, "model.layers.12.mlp.down_proj": 1203837, "model.layers.12.mlp.gate_proj": 1414253, "model.layers.12.mlp.up_proj": 1204037, "model.layers.12.self_attn.k_proj": 979989, "model.layers.12.self_attn.o_proj": 491939, "model.layers.12.self_attn.q_proj": 872235, "model.layers.12.self_attn.v_proj": 525411, "model.layers.13.mlp.down_proj": 1203607, "model.layers.13.mlp.gate_proj": 1423537, "model.layers.13.mlp.up_proj": 1209880, "model.layers.13.self_attn.k_proj": 917637, "model.layers.13.self_attn.o_proj": 486449, "model.layers.13.self_attn.q_proj": 854578, "model.layers.13.self_attn.v_proj": 518950, "model.layers.14.mlp.down_proj": 1197605, "model.layers.14.mlp.gate_proj": 1381285, "model.layers.14.mlp.up_proj": 1204898, "model.layers.14.self_attn.k_proj": 940564, "model.layers.14.self_attn.o_proj": 470137, "model.layers.14.self_attn.q_proj": 867007, "model.layers.14.self_attn.v_proj": 501744, "model.layers.15.mlp.down_proj": 1208918, "model.layers.15.mlp.gate_proj": 1408663, "model.layers.15.mlp.up_proj": 1214748, "model.layers.15.self_attn.k_proj": 893986, "model.layers.15.self_attn.o_proj": 474383, "model.layers.15.self_attn.q_proj": 805850, "model.layers.15.self_attn.v_proj": 496044, "model.layers.16.mlp.down_proj": 1193388, "model.layers.16.mlp.gate_proj": 1369230, "model.layers.16.mlp.up_proj": 1196630, "model.layers.16.self_attn.k_proj": 943400, "model.layers.16.self_attn.o_proj": 459789, "model.layers.16.self_attn.q_proj": 844853, "model.layers.16.self_attn.v_proj": 490430, "model.layers.17.mlp.down_proj": 1176012, "model.layers.17.mlp.gate_proj": 1343222, "model.layers.17.mlp.up_proj": 1180452, "model.layers.17.self_attn.k_proj": 842348, "model.layers.17.self_attn.o_proj": 455401, "model.layers.17.self_attn.q_proj": 776532, "model.layers.17.self_attn.v_proj": 472993, "model.layers.18.mlp.down_proj": 1171208, "model.layers.18.mlp.gate_proj": 1331858, "model.layers.18.mlp.up_proj": 1171681, "model.layers.18.self_attn.k_proj": 807039, "model.layers.18.self_attn.o_proj": 455862, "model.layers.18.self_attn.q_proj": 741688, "model.layers.18.self_attn.v_proj": 481202, "model.layers.19.mlp.down_proj": 1162136, "model.layers.19.mlp.gate_proj": 1303078, "model.layers.19.mlp.up_proj": 1163440, "model.layers.19.self_attn.k_proj": 859732, "model.layers.19.self_attn.o_proj": 462864, "model.layers.19.self_attn.q_proj": 785772, "model.layers.19.self_attn.v_proj": 486944, "model.layers.2.mlp.down_proj": 1152157, "model.layers.2.mlp.gate_proj": 1159260, "model.layers.2.mlp.up_proj": 1165168, "model.layers.2.self_attn.k_proj": 1663751, "model.layers.2.self_attn.o_proj": 485472, "model.layers.2.self_attn.q_proj": 1371023, "model.layers.2.self_attn.v_proj": 512415, "model.layers.20.mlp.down_proj": 1160157, "model.layers.20.mlp.gate_proj": 1257357, "model.layers.20.mlp.up_proj": 1155843, "model.layers.20.self_attn.k_proj": 906734, "model.layers.20.self_attn.o_proj": 443501, "model.layers.20.self_attn.q_proj": 833323, "model.layers.20.self_attn.v_proj": 457808, "model.layers.21.mlp.down_proj": 1152844, "model.layers.21.mlp.gate_proj": 1240343, "model.layers.21.mlp.up_proj": 1145607, "model.layers.21.self_attn.k_proj": 919054, "model.layers.21.self_attn.o_proj": 442822, "model.layers.21.self_attn.q_proj": 846683, "model.layers.21.self_attn.v_proj": 462912, "model.layers.22.mlp.down_proj": 1149367, "model.layers.22.mlp.gate_proj": 1226597, "model.layers.22.mlp.up_proj": 1145408, "model.layers.22.self_attn.k_proj": 862091, "model.layers.22.self_attn.o_proj": 462002, "model.layers.22.self_attn.q_proj": 795237, "model.layers.22.self_attn.v_proj": 472279, "model.layers.23.mlp.down_proj": 1148308, "model.layers.23.mlp.gate_proj": 1205318, "model.layers.23.mlp.up_proj": 1143303, "model.layers.23.self_attn.k_proj": 780914, "model.layers.23.self_attn.o_proj": 458216, "model.layers.23.self_attn.q_proj": 736854, "model.layers.23.self_attn.v_proj": 473637, "model.layers.24.mlp.down_proj": 1142395, "model.layers.24.mlp.gate_proj": 1214395, "model.layers.24.mlp.up_proj": 1137655, "model.layers.24.self_attn.k_proj": 977188, "model.layers.24.self_attn.o_proj": 451991, "model.layers.24.self_attn.q_proj": 908230, "model.layers.24.self_attn.v_proj": 462751, "model.layers.25.mlp.down_proj": 1147862, "model.layers.25.mlp.gate_proj": 1227621, "model.layers.25.mlp.up_proj": 1142373, "model.layers.25.self_attn.k_proj": 796583, "model.layers.25.self_attn.o_proj": 446459, "model.layers.25.self_attn.q_proj": 771205, "model.layers.25.self_attn.v_proj": 459858, "model.layers.26.mlp.down_proj": 1157619, "model.layers.26.mlp.gate_proj": 1270276, "model.layers.26.mlp.up_proj": 1143819, "model.layers.26.self_attn.k_proj": 861270, "model.layers.26.self_attn.o_proj": 506930, "model.layers.26.self_attn.q_proj": 812193, "model.layers.26.self_attn.v_proj": 496655, "model.layers.27.mlp.down_proj": 1167794, "model.layers.27.mlp.gate_proj": 1311278, "model.layers.27.mlp.up_proj": 1150479, "model.layers.27.self_attn.k_proj": 660898, "model.layers.27.self_attn.o_proj": 462408, "model.layers.27.self_attn.q_proj": 639690, "model.layers.27.self_attn.v_proj": 465375, "model.layers.28.mlp.down_proj": 1185382, "model.layers.28.mlp.gate_proj": 1339786, "model.layers.28.mlp.up_proj": 1160799, "model.layers.28.self_attn.k_proj": 729108, "model.layers.28.self_attn.o_proj": 477348, "model.layers.28.self_attn.q_proj": 705351, "model.layers.28.self_attn.v_proj": 493689, "model.layers.29.mlp.down_proj": 1213857, "model.layers.29.mlp.gate_proj": 1391366, "model.layers.29.mlp.up_proj": 1176903, "model.layers.29.self_attn.k_proj": 819925, "model.layers.29.self_attn.o_proj": 458954, "model.layers.29.self_attn.q_proj": 780980, "model.layers.29.self_attn.v_proj": 475585, "model.layers.3.mlp.down_proj": 1160791, "model.layers.3.mlp.gate_proj": 1181464, "model.layers.3.mlp.up_proj": 1173791, "model.layers.3.self_attn.k_proj": 1255296, "model.layers.3.self_attn.o_proj": 532234, "model.layers.3.self_attn.q_proj": 1087792, "model.layers.3.self_attn.v_proj": 546946, "model.layers.30.mlp.down_proj": 1296643, "model.layers.30.mlp.gate_proj": 1443559, "model.layers.30.mlp.up_proj": 1200401, "model.layers.30.self_attn.k_proj": 682857, "model.layers.30.self_attn.o_proj": 458368, "model.layers.30.self_attn.q_proj": 662921, "model.layers.30.self_attn.v_proj": 463573, "model.layers.31.mlp.down_proj": 1451063, "model.layers.31.mlp.gate_proj": 1443243, "model.layers.31.mlp.up_proj": 1291025, "model.layers.31.self_attn.k_proj": 761264, "model.layers.31.self_attn.o_proj": 504372, "model.layers.31.self_attn.q_proj": 682521, "model.layers.31.self_attn.v_proj": 503567, "model.layers.4.mlp.down_proj": 1174946, "model.layers.4.mlp.gate_proj": 1247025, "model.layers.4.mlp.up_proj": 1183052, "model.layers.4.self_attn.k_proj": 1113167, "model.layers.4.self_attn.o_proj": 457105, "model.layers.4.self_attn.q_proj": 956349, "model.layers.4.self_attn.v_proj": 480678, "model.layers.5.mlp.down_proj": 1185177, "model.layers.5.mlp.gate_proj": 1315315, "model.layers.5.mlp.up_proj": 1190124, "model.layers.5.self_attn.k_proj": 1013955, "model.layers.5.self_attn.o_proj": 492438, "model.layers.5.self_attn.q_proj": 871171, "model.layers.5.self_attn.v_proj": 529252, "model.layers.6.mlp.down_proj": 1185025, "model.layers.6.mlp.gate_proj": 1363849, "model.layers.6.mlp.up_proj": 1189310, "model.layers.6.self_attn.k_proj": 1096207, "model.layers.6.self_attn.o_proj": 514822, "model.layers.6.self_attn.q_proj": 1000413, "model.layers.6.self_attn.v_proj": 559422, "model.layers.7.mlp.down_proj": 1185723, "model.layers.7.mlp.gate_proj": 1396153, "model.layers.7.mlp.up_proj": 1192986, "model.layers.7.self_attn.k_proj": 1072532, "model.layers.7.self_attn.o_proj": 541576, "model.layers.7.self_attn.q_proj": 1010874, "model.layers.7.self_attn.v_proj": 579662, "model.layers.8.mlp.down_proj": 1195389, "model.layers.8.mlp.gate_proj": 1405540, "model.layers.8.mlp.up_proj": 1195047, "model.layers.8.self_attn.k_proj": 1017816, "model.layers.8.self_attn.o_proj": 486159, "model.layers.8.self_attn.q_proj": 944341, "model.layers.8.self_attn.v_proj": 524279, "model.layers.9.mlp.down_proj": 1204613, "model.layers.9.mlp.gate_proj": 1426994, "model.layers.9.mlp.up_proj": 1202174, "model.layers.9.self_attn.k_proj": 927717, "model.layers.9.self_attn.o_proj": 488203, "model.layers.9.self_attn.q_proj": 834266, "model.layers.9.self_attn.v_proj": 531763 } }, "architectures": [ "LlamaForCausalLM" ], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pad_token_id": 0, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "transformers_version": "4.39.3", "use_cache": true, "vocab_size": 32000 }