|
{ |
|
"_name_or_path": "checkpoints/lmsys/vicuna-7b-v1.5", |
|
"anyprec": { |
|
"arch_config": { |
|
"layers_name": "layers", |
|
"model_name": "model", |
|
"module_names": [ |
|
"self_attn.q_proj", |
|
"self_attn.k_proj", |
|
"self_attn.v_proj", |
|
"self_attn.o_proj", |
|
"mlp.gate_proj", |
|
"mlp.up_proj", |
|
"mlp.down_proj" |
|
] |
|
}, |
|
"group_count": 1, |
|
"parent_precision": 4, |
|
"seed_precision": 2, |
|
"sparse_numvals": { |
|
"model.layers.0.mlp.down_proj": 1233978, |
|
"model.layers.0.mlp.gate_proj": 1258296, |
|
"model.layers.0.mlp.up_proj": 1224078, |
|
"model.layers.0.self_attn.k_proj": 1883193, |
|
"model.layers.0.self_attn.o_proj": 1335571, |
|
"model.layers.0.self_attn.q_proj": 2066059, |
|
"model.layers.0.self_attn.v_proj": 1149426, |
|
"model.layers.1.mlp.down_proj": 1170303, |
|
"model.layers.1.mlp.gate_proj": 1203169, |
|
"model.layers.1.mlp.up_proj": 1193914, |
|
"model.layers.1.self_attn.k_proj": 1774927, |
|
"model.layers.1.self_attn.o_proj": 1735353, |
|
"model.layers.1.self_attn.q_proj": 2045430, |
|
"model.layers.1.self_attn.v_proj": 1233905, |
|
"model.layers.10.mlp.down_proj": 1214601, |
|
"model.layers.10.mlp.gate_proj": 1496901, |
|
"model.layers.10.mlp.up_proj": 1216897, |
|
"model.layers.10.self_attn.k_proj": 1031993, |
|
"model.layers.10.self_attn.o_proj": 469774, |
|
"model.layers.10.self_attn.q_proj": 910405, |
|
"model.layers.10.self_attn.v_proj": 499623, |
|
"model.layers.11.mlp.down_proj": 1202594, |
|
"model.layers.11.mlp.gate_proj": 1447186, |
|
"model.layers.11.mlp.up_proj": 1208047, |
|
"model.layers.11.self_attn.k_proj": 1091923, |
|
"model.layers.11.self_attn.o_proj": 506382, |
|
"model.layers.11.self_attn.q_proj": 1031388, |
|
"model.layers.11.self_attn.v_proj": 538763, |
|
"model.layers.12.mlp.down_proj": 1203837, |
|
"model.layers.12.mlp.gate_proj": 1414253, |
|
"model.layers.12.mlp.up_proj": 1204037, |
|
"model.layers.12.self_attn.k_proj": 979989, |
|
"model.layers.12.self_attn.o_proj": 491939, |
|
"model.layers.12.self_attn.q_proj": 872235, |
|
"model.layers.12.self_attn.v_proj": 525411, |
|
"model.layers.13.mlp.down_proj": 1203607, |
|
"model.layers.13.mlp.gate_proj": 1423537, |
|
"model.layers.13.mlp.up_proj": 1209880, |
|
"model.layers.13.self_attn.k_proj": 917637, |
|
"model.layers.13.self_attn.o_proj": 486449, |
|
"model.layers.13.self_attn.q_proj": 854578, |
|
"model.layers.13.self_attn.v_proj": 518950, |
|
"model.layers.14.mlp.down_proj": 1197605, |
|
"model.layers.14.mlp.gate_proj": 1381285, |
|
"model.layers.14.mlp.up_proj": 1204898, |
|
"model.layers.14.self_attn.k_proj": 940564, |
|
"model.layers.14.self_attn.o_proj": 470137, |
|
"model.layers.14.self_attn.q_proj": 867007, |
|
"model.layers.14.self_attn.v_proj": 501744, |
|
"model.layers.15.mlp.down_proj": 1208918, |
|
"model.layers.15.mlp.gate_proj": 1408663, |
|
"model.layers.15.mlp.up_proj": 1214748, |
|
"model.layers.15.self_attn.k_proj": 893986, |
|
"model.layers.15.self_attn.o_proj": 474383, |
|
"model.layers.15.self_attn.q_proj": 805850, |
|
"model.layers.15.self_attn.v_proj": 496044, |
|
"model.layers.16.mlp.down_proj": 1193388, |
|
"model.layers.16.mlp.gate_proj": 1369230, |
|
"model.layers.16.mlp.up_proj": 1196630, |
|
"model.layers.16.self_attn.k_proj": 943400, |
|
"model.layers.16.self_attn.o_proj": 459789, |
|
"model.layers.16.self_attn.q_proj": 844853, |
|
"model.layers.16.self_attn.v_proj": 490430, |
|
"model.layers.17.mlp.down_proj": 1176012, |
|
"model.layers.17.mlp.gate_proj": 1343222, |
|
"model.layers.17.mlp.up_proj": 1180452, |
|
"model.layers.17.self_attn.k_proj": 842348, |
|
"model.layers.17.self_attn.o_proj": 455401, |
|
"model.layers.17.self_attn.q_proj": 776532, |
|
"model.layers.17.self_attn.v_proj": 472993, |
|
"model.layers.18.mlp.down_proj": 1171208, |
|
"model.layers.18.mlp.gate_proj": 1331858, |
|
"model.layers.18.mlp.up_proj": 1171681, |
|
"model.layers.18.self_attn.k_proj": 807039, |
|
"model.layers.18.self_attn.o_proj": 455862, |
|
"model.layers.18.self_attn.q_proj": 741688, |
|
"model.layers.18.self_attn.v_proj": 481202, |
|
"model.layers.19.mlp.down_proj": 1162136, |
|
"model.layers.19.mlp.gate_proj": 1303078, |
|
"model.layers.19.mlp.up_proj": 1163440, |
|
"model.layers.19.self_attn.k_proj": 859732, |
|
"model.layers.19.self_attn.o_proj": 462864, |
|
"model.layers.19.self_attn.q_proj": 785772, |
|
"model.layers.19.self_attn.v_proj": 486944, |
|
"model.layers.2.mlp.down_proj": 1152157, |
|
"model.layers.2.mlp.gate_proj": 1159260, |
|
"model.layers.2.mlp.up_proj": 1165168, |
|
"model.layers.2.self_attn.k_proj": 1663751, |
|
"model.layers.2.self_attn.o_proj": 485472, |
|
"model.layers.2.self_attn.q_proj": 1371023, |
|
"model.layers.2.self_attn.v_proj": 512415, |
|
"model.layers.20.mlp.down_proj": 1160157, |
|
"model.layers.20.mlp.gate_proj": 1257357, |
|
"model.layers.20.mlp.up_proj": 1155843, |
|
"model.layers.20.self_attn.k_proj": 906734, |
|
"model.layers.20.self_attn.o_proj": 443501, |
|
"model.layers.20.self_attn.q_proj": 833323, |
|
"model.layers.20.self_attn.v_proj": 457808, |
|
"model.layers.21.mlp.down_proj": 1152844, |
|
"model.layers.21.mlp.gate_proj": 1240343, |
|
"model.layers.21.mlp.up_proj": 1145607, |
|
"model.layers.21.self_attn.k_proj": 919054, |
|
"model.layers.21.self_attn.o_proj": 442822, |
|
"model.layers.21.self_attn.q_proj": 846683, |
|
"model.layers.21.self_attn.v_proj": 462912, |
|
"model.layers.22.mlp.down_proj": 1149367, |
|
"model.layers.22.mlp.gate_proj": 1226597, |
|
"model.layers.22.mlp.up_proj": 1145408, |
|
"model.layers.22.self_attn.k_proj": 862091, |
|
"model.layers.22.self_attn.o_proj": 462002, |
|
"model.layers.22.self_attn.q_proj": 795237, |
|
"model.layers.22.self_attn.v_proj": 472279, |
|
"model.layers.23.mlp.down_proj": 1148308, |
|
"model.layers.23.mlp.gate_proj": 1205318, |
|
"model.layers.23.mlp.up_proj": 1143303, |
|
"model.layers.23.self_attn.k_proj": 780914, |
|
"model.layers.23.self_attn.o_proj": 458216, |
|
"model.layers.23.self_attn.q_proj": 736854, |
|
"model.layers.23.self_attn.v_proj": 473637, |
|
"model.layers.24.mlp.down_proj": 1142395, |
|
"model.layers.24.mlp.gate_proj": 1214395, |
|
"model.layers.24.mlp.up_proj": 1137655, |
|
"model.layers.24.self_attn.k_proj": 977188, |
|
"model.layers.24.self_attn.o_proj": 451991, |
|
"model.layers.24.self_attn.q_proj": 908230, |
|
"model.layers.24.self_attn.v_proj": 462751, |
|
"model.layers.25.mlp.down_proj": 1147862, |
|
"model.layers.25.mlp.gate_proj": 1227621, |
|
"model.layers.25.mlp.up_proj": 1142373, |
|
"model.layers.25.self_attn.k_proj": 796583, |
|
"model.layers.25.self_attn.o_proj": 446459, |
|
"model.layers.25.self_attn.q_proj": 771205, |
|
"model.layers.25.self_attn.v_proj": 459858, |
|
"model.layers.26.mlp.down_proj": 1157619, |
|
"model.layers.26.mlp.gate_proj": 1270276, |
|
"model.layers.26.mlp.up_proj": 1143819, |
|
"model.layers.26.self_attn.k_proj": 861270, |
|
"model.layers.26.self_attn.o_proj": 506930, |
|
"model.layers.26.self_attn.q_proj": 812193, |
|
"model.layers.26.self_attn.v_proj": 496655, |
|
"model.layers.27.mlp.down_proj": 1167794, |
|
"model.layers.27.mlp.gate_proj": 1311278, |
|
"model.layers.27.mlp.up_proj": 1150479, |
|
"model.layers.27.self_attn.k_proj": 660898, |
|
"model.layers.27.self_attn.o_proj": 462408, |
|
"model.layers.27.self_attn.q_proj": 639690, |
|
"model.layers.27.self_attn.v_proj": 465375, |
|
"model.layers.28.mlp.down_proj": 1185382, |
|
"model.layers.28.mlp.gate_proj": 1339786, |
|
"model.layers.28.mlp.up_proj": 1160799, |
|
"model.layers.28.self_attn.k_proj": 729108, |
|
"model.layers.28.self_attn.o_proj": 477348, |
|
"model.layers.28.self_attn.q_proj": 705351, |
|
"model.layers.28.self_attn.v_proj": 493689, |
|
"model.layers.29.mlp.down_proj": 1213857, |
|
"model.layers.29.mlp.gate_proj": 1391366, |
|
"model.layers.29.mlp.up_proj": 1176903, |
|
"model.layers.29.self_attn.k_proj": 819925, |
|
"model.layers.29.self_attn.o_proj": 458954, |
|
"model.layers.29.self_attn.q_proj": 780980, |
|
"model.layers.29.self_attn.v_proj": 475585, |
|
"model.layers.3.mlp.down_proj": 1160791, |
|
"model.layers.3.mlp.gate_proj": 1181464, |
|
"model.layers.3.mlp.up_proj": 1173791, |
|
"model.layers.3.self_attn.k_proj": 1255296, |
|
"model.layers.3.self_attn.o_proj": 532234, |
|
"model.layers.3.self_attn.q_proj": 1087792, |
|
"model.layers.3.self_attn.v_proj": 546946, |
|
"model.layers.30.mlp.down_proj": 1296643, |
|
"model.layers.30.mlp.gate_proj": 1443559, |
|
"model.layers.30.mlp.up_proj": 1200401, |
|
"model.layers.30.self_attn.k_proj": 682857, |
|
"model.layers.30.self_attn.o_proj": 458368, |
|
"model.layers.30.self_attn.q_proj": 662921, |
|
"model.layers.30.self_attn.v_proj": 463573, |
|
"model.layers.31.mlp.down_proj": 1451063, |
|
"model.layers.31.mlp.gate_proj": 1443243, |
|
"model.layers.31.mlp.up_proj": 1291025, |
|
"model.layers.31.self_attn.k_proj": 761264, |
|
"model.layers.31.self_attn.o_proj": 504372, |
|
"model.layers.31.self_attn.q_proj": 682521, |
|
"model.layers.31.self_attn.v_proj": 503567, |
|
"model.layers.4.mlp.down_proj": 1174946, |
|
"model.layers.4.mlp.gate_proj": 1247025, |
|
"model.layers.4.mlp.up_proj": 1183052, |
|
"model.layers.4.self_attn.k_proj": 1113167, |
|
"model.layers.4.self_attn.o_proj": 457105, |
|
"model.layers.4.self_attn.q_proj": 956349, |
|
"model.layers.4.self_attn.v_proj": 480678, |
|
"model.layers.5.mlp.down_proj": 1185177, |
|
"model.layers.5.mlp.gate_proj": 1315315, |
|
"model.layers.5.mlp.up_proj": 1190124, |
|
"model.layers.5.self_attn.k_proj": 1013955, |
|
"model.layers.5.self_attn.o_proj": 492438, |
|
"model.layers.5.self_attn.q_proj": 871171, |
|
"model.layers.5.self_attn.v_proj": 529252, |
|
"model.layers.6.mlp.down_proj": 1185025, |
|
"model.layers.6.mlp.gate_proj": 1363849, |
|
"model.layers.6.mlp.up_proj": 1189310, |
|
"model.layers.6.self_attn.k_proj": 1096207, |
|
"model.layers.6.self_attn.o_proj": 514822, |
|
"model.layers.6.self_attn.q_proj": 1000413, |
|
"model.layers.6.self_attn.v_proj": 559422, |
|
"model.layers.7.mlp.down_proj": 1185723, |
|
"model.layers.7.mlp.gate_proj": 1396153, |
|
"model.layers.7.mlp.up_proj": 1192986, |
|
"model.layers.7.self_attn.k_proj": 1072532, |
|
"model.layers.7.self_attn.o_proj": 541576, |
|
"model.layers.7.self_attn.q_proj": 1010874, |
|
"model.layers.7.self_attn.v_proj": 579662, |
|
"model.layers.8.mlp.down_proj": 1195389, |
|
"model.layers.8.mlp.gate_proj": 1405540, |
|
"model.layers.8.mlp.up_proj": 1195047, |
|
"model.layers.8.self_attn.k_proj": 1017816, |
|
"model.layers.8.self_attn.o_proj": 486159, |
|
"model.layers.8.self_attn.q_proj": 944341, |
|
"model.layers.8.self_attn.v_proj": 524279, |
|
"model.layers.9.mlp.down_proj": 1204613, |
|
"model.layers.9.mlp.gate_proj": 1426994, |
|
"model.layers.9.mlp.up_proj": 1202174, |
|
"model.layers.9.self_attn.k_proj": 927717, |
|
"model.layers.9.self_attn.o_proj": 488203, |
|
"model.layers.9.self_attn.q_proj": 834266, |
|
"model.layers.9.self_attn.v_proj": 531763 |
|
} |
|
}, |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"hidden_act": "silu", |
|
"hidden_size": 4096, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 11008, |
|
"max_position_embeddings": 4096, |
|
"model_type": "llama", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 32, |
|
"num_key_value_heads": 32, |
|
"pad_token_id": 0, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-05, |
|
"rope_scaling": null, |
|
"rope_theta": 10000.0, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float16", |
|
"transformers_version": "4.39.3", |
|
"use_cache": true, |
|
"vocab_size": 32000 |
|
} |
|
|