lrl-modelcloud's picture
0decbb086ebbb7c49e39a6ae805fbea65d86fa081da9c4368dd7f808f0d1bd52
726f914 verified
raw
history blame
46.7 kB
[{"layer": 1, "module": "self_attn.k_proj", "avg_loss": "0.0340", "time": "2.3061"}, {"layer": 1, "module": "self_attn.v_proj", "avg_loss": "0.0248", "time": "2.1271"}, {"layer": 1, "module": "self_attn.q_proj", "avg_loss": "0.0717", "time": "2.2017"}, {"layer": 1, "module": "self_attn.o_proj", "avg_loss": "0.0000", "time": "2.1944"}, {"layer": 1, "module": "mlp.up_proj", "avg_loss": "0.0188", "time": "2.3013"}, {"layer": 1, "module": "mlp.gate_proj", "avg_loss": "0.0193", "time": "2.2927"}, {"layer": 1, "module": "mlp.down_proj", "avg_loss": "0.0001", "time": "11.0541"}, {"layer": 2, "module": "self_attn.k_proj", "avg_loss": "0.0298", "time": "2.1542"}, {"layer": 2, "module": "self_attn.v_proj", "avg_loss": "0.0083", "time": "2.1475"}, {"layer": 2, "module": "self_attn.q_proj", "avg_loss": "0.1213", "time": "2.2089"}, {"layer": 2, "module": "self_attn.o_proj", "avg_loss": "0.0001", "time": "2.2142"}, {"layer": 2, "module": "mlp.up_proj", "avg_loss": "0.0511", "time": "2.3350"}, {"layer": 2, "module": "mlp.gate_proj", "avg_loss": "0.0525", "time": "2.3345"}, {"layer": 2, "module": "mlp.down_proj", "avg_loss": "0.0001", "time": "10.8336"}, {"layer": 3, "module": "self_attn.k_proj", "avg_loss": "0.0103", "time": "2.1100"}, {"layer": 3, "module": "self_attn.v_proj", "avg_loss": "0.0012", "time": "2.1055"}, {"layer": 3, "module": "self_attn.q_proj", "avg_loss": "0.0545", "time": "2.1490"}, {"layer": 3, "module": "self_attn.o_proj", "avg_loss": "0.0001", "time": "2.1458"}, {"layer": 3, "module": "mlp.up_proj", "avg_loss": "0.1016", "time": "2.2752"}, {"layer": 3, "module": "mlp.gate_proj", "avg_loss": "0.1041", "time": "2.2613"}, {"layer": 3, "module": "mlp.down_proj", "avg_loss": "0.0002", "time": "10.9387"}, {"layer": 4, "module": "self_attn.k_proj", "avg_loss": "0.0307", "time": "2.1228"}, {"layer": 4, "module": "self_attn.v_proj", "avg_loss": "0.0026", "time": "2.1129"}, {"layer": 4, "module": "self_attn.q_proj", "avg_loss": "0.1134", "time": "2.1820"}, {"layer": 4, "module": "self_attn.o_proj", "avg_loss": "0.0001", "time": "2.1727"}, {"layer": 4, "module": "mlp.up_proj", "avg_loss": "0.1692", "time": "2.3433"}, {"layer": 4, "module": "mlp.gate_proj", "avg_loss": "0.1858", "time": "2.3306"}, {"layer": 4, "module": "mlp.down_proj", "avg_loss": "0.0018", "time": "10.9853"}, {"layer": 5, "module": "self_attn.k_proj", "avg_loss": "0.0347", "time": "2.1145"}, {"layer": 5, "module": "self_attn.v_proj", "avg_loss": "0.0045", "time": "2.1125"}, {"layer": 5, "module": "self_attn.q_proj", "avg_loss": "0.1056", "time": "2.1785"}, {"layer": 5, "module": "self_attn.o_proj", "avg_loss": "0.0001", "time": "2.1553"}, {"layer": 5, "module": "mlp.up_proj", "avg_loss": "0.1813", "time": "2.3161"}, {"layer": 5, "module": "mlp.gate_proj", "avg_loss": "0.1865", "time": "2.2846"}, {"layer": 5, "module": "mlp.down_proj", "avg_loss": "0.0005", "time": "11.8761"}, {"layer": 6, "module": "self_attn.k_proj", "avg_loss": "0.0527", "time": "2.6124"}, {"layer": 6, "module": "self_attn.v_proj", "avg_loss": "0.0097", "time": "2.2247"}, {"layer": 6, "module": "self_attn.q_proj", "avg_loss": "0.1675", "time": "2.3726"}, {"layer": 6, "module": "self_attn.o_proj", "avg_loss": "0.0001", "time": "3.2719"}, {"layer": 6, "module": "mlp.up_proj", "avg_loss": "0.2062", "time": "3.3886"}, {"layer": 6, "module": "mlp.gate_proj", "avg_loss": "0.2178", "time": "2.5251"}, {"layer": 6, "module": "mlp.down_proj", "avg_loss": "0.0007", "time": "13.5402"}, {"layer": 7, "module": "self_attn.k_proj", "avg_loss": "0.0992", "time": "2.2638"}, {"layer": 7, "module": "self_attn.v_proj", "avg_loss": "0.0140", "time": "2.1225"}, {"layer": 7, "module": "self_attn.q_proj", "avg_loss": "0.2872", "time": "2.1839"}, {"layer": 7, "module": "self_attn.o_proj", "avg_loss": "0.0002", "time": "2.1686"}, {"layer": 7, "module": "mlp.up_proj", "avg_loss": "0.2218", "time": "2.3308"}, {"layer": 7, "module": "mlp.gate_proj", "avg_loss": "0.2334", "time": "2.3384"}, {"layer": 7, "module": "mlp.down_proj", "avg_loss": "0.0009", "time": "11.0866"}, {"layer": 8, "module": "self_attn.k_proj", "avg_loss": "0.1086", "time": "2.2547"}, {"layer": 8, "module": "self_attn.v_proj", "avg_loss": "0.0144", "time": "2.1342"}, {"layer": 8, "module": "self_attn.q_proj", "avg_loss": "0.3255", "time": "2.1702"}, {"layer": 8, "module": "self_attn.o_proj", "avg_loss": "0.0001", "time": "2.1891"}, {"layer": 8, "module": "mlp.up_proj", "avg_loss": "0.2559", "time": "2.2620"}, {"layer": 8, "module": "mlp.gate_proj", "avg_loss": "0.2708", "time": "2.2363"}, {"layer": 8, "module": "mlp.down_proj", "avg_loss": "0.0012", "time": "10.8485"}, {"layer": 9, "module": "self_attn.k_proj", "avg_loss": "0.0867", "time": "2.1783"}, {"layer": 9, "module": "self_attn.v_proj", "avg_loss": "0.0152", "time": "2.2986"}, {"layer": 9, "module": "self_attn.q_proj", "avg_loss": "0.2477", "time": "2.6036"}, {"layer": 9, "module": "self_attn.o_proj", "avg_loss": "0.0003", "time": "2.1641"}, {"layer": 9, "module": "mlp.up_proj", "avg_loss": "0.3460", "time": "2.2503"}, {"layer": 9, "module": "mlp.gate_proj", "avg_loss": "0.3657", "time": "2.4037"}, {"layer": 9, "module": "mlp.down_proj", "avg_loss": "0.0015", "time": "12.1335"}, {"layer": 10, "module": "self_attn.k_proj", "avg_loss": "0.0693", "time": "2.4013"}, {"layer": 10, "module": "self_attn.v_proj", "avg_loss": "0.0113", "time": "2.3331"}, {"layer": 10, "module": "self_attn.q_proj", "avg_loss": "0.2066", "time": "2.4911"}, {"layer": 10, "module": "self_attn.o_proj", "avg_loss": "0.0004", "time": "2.4018"}, {"layer": 10, "module": "mlp.up_proj", "avg_loss": "0.5040", "time": "2.6325"}, {"layer": 10, "module": "mlp.gate_proj", "avg_loss": "0.5358", "time": "2.5068"}, {"layer": 10, "module": "mlp.down_proj", "avg_loss": "0.0020", "time": "12.2718"}, {"layer": 11, "module": "self_attn.k_proj", "avg_loss": "0.2136", "time": "2.2625"}, {"layer": 11, "module": "self_attn.v_proj", "avg_loss": "0.0277", "time": "2.2458"}, {"layer": 11, "module": "self_attn.q_proj", "avg_loss": "0.6006", "time": "2.3952"}, {"layer": 11, "module": "self_attn.o_proj", "avg_loss": "0.0006", "time": "2.3536"}, {"layer": 11, "module": "mlp.up_proj", "avg_loss": "0.4056", "time": "2.4984"}, {"layer": 11, "module": "mlp.gate_proj", "avg_loss": "0.4341", "time": "2.5977"}, {"layer": 11, "module": "mlp.down_proj", "avg_loss": "0.0032", "time": "11.2669"}, {"layer": 12, "module": "self_attn.k_proj", "avg_loss": "0.2191", "time": "2.2457"}, {"layer": 12, "module": "self_attn.v_proj", "avg_loss": "0.0301", "time": "2.2262"}, {"layer": 12, "module": "self_attn.q_proj", "avg_loss": "0.5334", "time": "2.4246"}, {"layer": 12, "module": "self_attn.o_proj", "avg_loss": "0.0007", "time": "2.3091"}, {"layer": 12, "module": "mlp.up_proj", "avg_loss": "0.5819", "time": "2.7207"}, {"layer": 12, "module": "mlp.gate_proj", "avg_loss": "0.6209", "time": "2.8096"}, {"layer": 12, "module": "mlp.down_proj", "avg_loss": "0.0041", "time": "11.9428"}, {"layer": 13, "module": "self_attn.k_proj", "avg_loss": "0.1503", "time": "2.4324"}, {"layer": 13, "module": "self_attn.v_proj", "avg_loss": "0.0254", "time": "2.5869"}, {"layer": 13, "module": "self_attn.q_proj", "avg_loss": "0.4283", "time": "2.3209"}, {"layer": 13, "module": "self_attn.o_proj", "avg_loss": "0.0006", "time": "2.3571"}, {"layer": 13, "module": "mlp.up_proj", "avg_loss": "0.5228", "time": "2.5104"}, {"layer": 13, "module": "mlp.gate_proj", "avg_loss": "0.5596", "time": "2.5255"}, {"layer": 13, "module": "mlp.down_proj", "avg_loss": "0.0045", "time": "11.9247"}, {"layer": 14, "module": "self_attn.k_proj", "avg_loss": "0.2188", "time": "2.2858"}, {"layer": 14, "module": "self_attn.v_proj", "avg_loss": "0.0287", "time": "2.2842"}, {"layer": 14, "module": "self_attn.q_proj", "avg_loss": "0.5806", "time": "2.2953"}, {"layer": 14, "module": "self_attn.o_proj", "avg_loss": "0.0023", "time": "2.4934"}, {"layer": 14, "module": "mlp.up_proj", "avg_loss": "0.4704", "time": "2.5714"}, {"layer": 14, "module": "mlp.gate_proj", "avg_loss": "0.4991", "time": "2.8217"}, {"layer": 14, "module": "mlp.down_proj", "avg_loss": "0.0055", "time": "11.6756"}, {"layer": 15, "module": "self_attn.k_proj", "avg_loss": "0.2683", "time": "2.4382"}, {"layer": 15, "module": "self_attn.v_proj", "avg_loss": "0.0340", "time": "2.2872"}, {"layer": 15, "module": "self_attn.q_proj", "avg_loss": "0.7832", "time": "2.4047"}, {"layer": 15, "module": "self_attn.o_proj", "avg_loss": "0.0026", "time": "2.4892"}, {"layer": 15, "module": "mlp.up_proj", "avg_loss": "0.6002", "time": "2.8302"}, {"layer": 15, "module": "mlp.gate_proj", "avg_loss": "0.6422", "time": "2.5439"}, {"layer": 15, "module": "mlp.down_proj", "avg_loss": "0.0076", "time": "12.1513"}, {"layer": 16, "module": "self_attn.k_proj", "avg_loss": "0.2242", "time": "2.3480"}, {"layer": 16, "module": "self_attn.v_proj", "avg_loss": "0.0310", "time": "2.2570"}, {"layer": 16, "module": "self_attn.q_proj", "avg_loss": "0.6324", "time": "2.3232"}, {"layer": 16, "module": "self_attn.o_proj", "avg_loss": "0.0028", "time": "2.4641"}, {"layer": 16, "module": "mlp.up_proj", "avg_loss": "0.7036", "time": "2.6242"}, {"layer": 16, "module": "mlp.gate_proj", "avg_loss": "0.7689", "time": "2.7480"}, {"layer": 16, "module": "mlp.down_proj", "avg_loss": "0.0100", "time": "11.8954"}, {"layer": 17, "module": "self_attn.k_proj", "avg_loss": "0.2410", "time": "2.3035"}, {"layer": 17, "module": "self_attn.v_proj", "avg_loss": "0.0355", "time": "2.3183"}, {"layer": 17, "module": "self_attn.q_proj", "avg_loss": "0.7361", "time": "2.5062"}, {"layer": 17, "module": "self_attn.o_proj", "avg_loss": "0.0038", "time": "2.3595"}, {"layer": 17, "module": "mlp.up_proj", "avg_loss": "0.8927", "time": "2.4757"}, {"layer": 17, "module": "mlp.gate_proj", "avg_loss": "0.9823", "time": "2.4849"}, {"layer": 17, "module": "mlp.down_proj", "avg_loss": "0.0136", "time": "11.9572"}, {"layer": 18, "module": "self_attn.k_proj", "avg_loss": "0.3868", "time": "2.2977"}, {"layer": 18, "module": "self_attn.v_proj", "avg_loss": "0.0401", "time": "2.2947"}, {"layer": 18, "module": "self_attn.q_proj", "avg_loss": "1.0522", "time": "2.3790"}, {"layer": 18, "module": "self_attn.o_proj", "avg_loss": "0.0100", "time": "2.6136"}, {"layer": 18, "module": "mlp.up_proj", "avg_loss": "1.0473", "time": "2.7921"}, {"layer": 18, "module": "mlp.gate_proj", "avg_loss": "1.1509", "time": "3.2724"}, {"layer": 18, "module": "mlp.down_proj", "avg_loss": "0.0187", "time": "12.1213"}, {"layer": 19, "module": "self_attn.k_proj", "avg_loss": "0.4875", "time": "2.2760"}, {"layer": 19, "module": "self_attn.v_proj", "avg_loss": "0.0530", "time": "2.2780"}, {"layer": 19, "module": "self_attn.q_proj", "avg_loss": "1.4200", "time": "2.3663"}, {"layer": 19, "module": "self_attn.o_proj", "avg_loss": "0.0100", "time": "2.3269"}, {"layer": 19, "module": "mlp.up_proj", "avg_loss": "1.3378", "time": "2.5476"}, {"layer": 19, "module": "mlp.gate_proj", "avg_loss": "1.5072", "time": "2.8983"}, {"layer": 19, "module": "mlp.down_proj", "avg_loss": "0.0246", "time": "11.9091"}, {"layer": 20, "module": "self_attn.k_proj", "avg_loss": "0.4460", "time": "2.2856"}, {"layer": 20, "module": "self_attn.v_proj", "avg_loss": "0.0617", "time": "2.2893"}, {"layer": 20, "module": "self_attn.q_proj", "avg_loss": "1.2334", "time": "2.4448"}, {"layer": 20, "module": "self_attn.o_proj", "avg_loss": "0.0093", "time": "2.3386"}, {"layer": 20, "module": "mlp.up_proj", "avg_loss": "1.4231", "time": "2.5915"}, {"layer": 20, "module": "mlp.gate_proj", "avg_loss": "1.6567", "time": "2.5700"}, {"layer": 20, "module": "mlp.down_proj", "avg_loss": "0.0295", "time": "11.6946"}, {"layer": 21, "module": "self_attn.k_proj", "avg_loss": "0.5894", "time": "2.4274"}, {"layer": 21, "module": "self_attn.v_proj", "avg_loss": "0.0659", "time": "2.2505"}, {"layer": 21, "module": "self_attn.q_proj", "avg_loss": "1.2721", "time": "2.3769"}, {"layer": 21, "module": "self_attn.o_proj", "avg_loss": "0.0072", "time": "2.3418"}, {"layer": 21, "module": "mlp.up_proj", "avg_loss": "1.6326", "time": "2.5951"}, {"layer": 21, "module": "mlp.gate_proj", "avg_loss": "1.8617", "time": "2.5092"}, {"layer": 21, "module": "mlp.down_proj", "avg_loss": "0.0304", "time": "11.9673"}, {"layer": 22, "module": "self_attn.k_proj", "avg_loss": "0.8882", "time": "2.4347"}, {"layer": 22, "module": "self_attn.v_proj", "avg_loss": "0.0784", "time": "2.4376"}, {"layer": 22, "module": "self_attn.q_proj", "avg_loss": "2.0454", "time": "2.5039"}, {"layer": 22, "module": "self_attn.o_proj", "avg_loss": "0.0112", "time": "2.9112"}, {"layer": 22, "module": "mlp.up_proj", "avg_loss": "1.8651", "time": "2.5677"}, {"layer": 22, "module": "mlp.gate_proj", "avg_loss": "2.1412", "time": "2.5779"}, {"layer": 22, "module": "mlp.down_proj", "avg_loss": "0.0333", "time": "12.0547"}, {"layer": 23, "module": "self_attn.k_proj", "avg_loss": "0.9634", "time": "2.3299"}, {"layer": 23, "module": "self_attn.v_proj", "avg_loss": "0.1213", "time": "2.4904"}, {"layer": 23, "module": "self_attn.q_proj", "avg_loss": "2.1048", "time": "2.5611"}, {"layer": 23, "module": "self_attn.o_proj", "avg_loss": "0.0072", "time": "2.4912"}, {"layer": 23, "module": "mlp.up_proj", "avg_loss": "2.0504", "time": "2.6063"}, {"layer": 23, "module": "mlp.gate_proj", "avg_loss": "2.4101", "time": "2.4969"}, {"layer": 23, "module": "mlp.down_proj", "avg_loss": "0.0340", "time": "11.5209"}, {"layer": 24, "module": "self_attn.k_proj", "avg_loss": "1.0875", "time": "2.5237"}, {"layer": 24, "module": "self_attn.v_proj", "avg_loss": "0.1122", "time": "2.3874"}, {"layer": 24, "module": "self_attn.q_proj", "avg_loss": "2.2684", "time": "2.3761"}, {"layer": 24, "module": "self_attn.o_proj", "avg_loss": "0.0088", "time": "2.9202"}, {"layer": 24, "module": "mlp.up_proj", "avg_loss": "2.1628", "time": "3.3543"}, {"layer": 24, "module": "mlp.gate_proj", "avg_loss": "2.5593", "time": "3.4625"}, {"layer": 24, "module": "mlp.down_proj", "avg_loss": "0.0363", "time": "11.6492"}, {"layer": 25, "module": "self_attn.k_proj", "avg_loss": "0.5985", "time": "3.0171"}, {"layer": 25, "module": "self_attn.v_proj", "avg_loss": "0.0601", "time": "3.2322"}, {"layer": 25, "module": "self_attn.q_proj", "avg_loss": "1.0316", "time": "2.6673"}, {"layer": 25, "module": "self_attn.o_proj", "avg_loss": "0.0072", "time": "2.5850"}, {"layer": 25, "module": "mlp.up_proj", "avg_loss": "2.2648", "time": "2.4304"}, {"layer": 25, "module": "mlp.gate_proj", "avg_loss": "2.6151", "time": "2.3587"}, {"layer": 25, "module": "mlp.down_proj", "avg_loss": "0.0363", "time": "14.0065"}, {"layer": 26, "module": "self_attn.k_proj", "avg_loss": "1.0977", "time": "2.2340"}, {"layer": 26, "module": "self_attn.v_proj", "avg_loss": "0.1190", "time": "2.2492"}, {"layer": 26, "module": "self_attn.q_proj", "avg_loss": "2.0767", "time": "2.2604"}, {"layer": 26, "module": "self_attn.o_proj", "avg_loss": "0.0073", "time": "2.4496"}, {"layer": 26, "module": "mlp.up_proj", "avg_loss": "2.4029", "time": "2.3587"}, {"layer": 26, "module": "mlp.gate_proj", "avg_loss": "2.7810", "time": "2.4058"}, {"layer": 26, "module": "mlp.down_proj", "avg_loss": "0.0373", "time": "11.8029"}, {"layer": 27, "module": "self_attn.k_proj", "avg_loss": "1.0647", "time": "2.3830"}, {"layer": 27, "module": "self_attn.v_proj", "avg_loss": "0.1189", "time": "2.3592"}, {"layer": 27, "module": "self_attn.q_proj", "avg_loss": "2.0757", "time": "2.4755"}, {"layer": 27, "module": "self_attn.o_proj", "avg_loss": "0.0079", "time": "2.9426"}, {"layer": 27, "module": "mlp.up_proj", "avg_loss": "2.4028", "time": "2.8783"}, {"layer": 27, "module": "mlp.gate_proj", "avg_loss": "2.7568", "time": "2.6592"}, {"layer": 27, "module": "mlp.down_proj", "avg_loss": "0.0391", "time": "11.2783"}, {"layer": 28, "module": "self_attn.k_proj", "avg_loss": "1.0676", "time": "2.4219"}, {"layer": 28, "module": "self_attn.v_proj", "avg_loss": "0.0722", "time": "2.4293"}, {"layer": 28, "module": "self_attn.q_proj", "avg_loss": "2.0441", "time": "2.5298"}, {"layer": 28, "module": "self_attn.o_proj", "avg_loss": "0.0166", "time": "2.3209"}, {"layer": 28, "module": "mlp.up_proj", "avg_loss": "2.2581", "time": "2.5151"}, {"layer": 28, "module": "mlp.gate_proj", "avg_loss": "2.5407", "time": "2.4829"}, {"layer": 28, "module": "mlp.down_proj", "avg_loss": "0.0433", "time": "12.4605"}, {"layer": 29, "module": "self_attn.k_proj", "avg_loss": "1.1916", "time": "2.2734"}, {"layer": 29, "module": "self_attn.v_proj", "avg_loss": "0.1003", "time": "2.2934"}, {"layer": 29, "module": "self_attn.q_proj", "avg_loss": "2.3925", "time": "2.4116"}, {"layer": 29, "module": "self_attn.o_proj", "avg_loss": "0.0134", "time": "2.4406"}, {"layer": 29, "module": "mlp.up_proj", "avg_loss": "2.3000", "time": "2.5702"}, {"layer": 29, "module": "mlp.gate_proj", "avg_loss": "2.5859", "time": "2.7159"}, {"layer": 29, "module": "mlp.down_proj", "avg_loss": "0.0473", "time": "11.7721"}, {"layer": 30, "module": "self_attn.k_proj", "avg_loss": "1.2489", "time": "2.6985"}, {"layer": 30, "module": "self_attn.v_proj", "avg_loss": "0.0948", "time": "2.6283"}, {"layer": 30, "module": "self_attn.q_proj", "avg_loss": "2.3910", "time": "2.5259"}, {"layer": 30, "module": "self_attn.o_proj", "avg_loss": "0.0156", "time": "3.3308"}, {"layer": 30, "module": "mlp.up_proj", "avg_loss": "2.3619", "time": "2.7388"}, {"layer": 30, "module": "mlp.gate_proj", "avg_loss": "2.6558", "time": "2.7080"}, {"layer": 30, "module": "mlp.down_proj", "avg_loss": "0.0491", "time": "12.5610"}, {"layer": 31, "module": "self_attn.k_proj", "avg_loss": "1.0105", "time": "2.3092"}, {"layer": 31, "module": "self_attn.v_proj", "avg_loss": "0.0804", "time": "2.4097"}, {"layer": 31, "module": "self_attn.q_proj", "avg_loss": "1.9316", "time": "2.5905"}, {"layer": 31, "module": "self_attn.o_proj", "avg_loss": "0.0179", "time": "2.6900"}, {"layer": 31, "module": "mlp.up_proj", "avg_loss": "2.2516", "time": "2.4284"}, {"layer": 31, "module": "mlp.gate_proj", "avg_loss": "2.5044", "time": "2.4881"}, {"layer": 31, "module": "mlp.down_proj", "avg_loss": "0.0469", "time": "11.6420"}, {"layer": 32, "module": "self_attn.k_proj", "avg_loss": "1.2391", "time": "2.2986"}, {"layer": 32, "module": "self_attn.v_proj", "avg_loss": "0.0790", "time": "2.3146"}, {"layer": 32, "module": "self_attn.q_proj", "avg_loss": "2.6753", "time": "2.3641"}, {"layer": 32, "module": "self_attn.o_proj", "avg_loss": "0.0225", "time": "2.4001"}, {"layer": 32, "module": "mlp.up_proj", "avg_loss": "2.2719", "time": "3.0904"}, {"layer": 32, "module": "mlp.gate_proj", "avg_loss": "2.4724", "time": "3.2915"}, {"layer": 32, "module": "mlp.down_proj", "avg_loss": "0.0471", "time": "11.7021"}, {"layer": 33, "module": "self_attn.k_proj", "avg_loss": "0.8308", "time": "2.1627"}, {"layer": 33, "module": "self_attn.v_proj", "avg_loss": "0.0897", "time": "2.2433"}, {"layer": 33, "module": "self_attn.q_proj", "avg_loss": "1.6442", "time": "2.2615"}, {"layer": 33, "module": "self_attn.o_proj", "avg_loss": "0.0103", "time": "2.2047"}, {"layer": 33, "module": "mlp.up_proj", "avg_loss": "2.2521", "time": "2.3213"}, {"layer": 33, "module": "mlp.gate_proj", "avg_loss": "2.4846", "time": "2.3936"}, {"layer": 33, "module": "mlp.down_proj", "avg_loss": "0.0497", "time": "11.6459"}, {"layer": 34, "module": "self_attn.k_proj", "avg_loss": "1.2606", "time": "2.1617"}, {"layer": 34, "module": "self_attn.v_proj", "avg_loss": "0.0860", "time": "2.1530"}, {"layer": 34, "module": "self_attn.q_proj", "avg_loss": "2.7065", "time": "2.2676"}, {"layer": 34, "module": "self_attn.o_proj", "avg_loss": "0.0220", "time": "2.4558"}, {"layer": 34, "module": "mlp.up_proj", "avg_loss": "4.9602", "time": "2.3981"}, {"layer": 34, "module": "mlp.gate_proj", "avg_loss": "5.3332", "time": "2.3555"}, {"layer": 34, "module": "mlp.down_proj", "avg_loss": "0.0487", "time": "11.2016"}, {"layer": 35, "module": "self_attn.k_proj", "avg_loss": "1.1053", "time": "2.4382"}, {"layer": 35, "module": "self_attn.v_proj", "avg_loss": "0.0861", "time": "2.3616"}, {"layer": 35, "module": "self_attn.q_proj", "avg_loss": "2.3570", "time": "2.3524"}, {"layer": 35, "module": "self_attn.o_proj", "avg_loss": "0.0204", "time": "2.2096"}, {"layer": 35, "module": "mlp.up_proj", "avg_loss": "2.1861", "time": "2.4172"}, {"layer": 35, "module": "mlp.gate_proj", "avg_loss": "2.3878", "time": "2.3805"}, {"layer": 35, "module": "mlp.down_proj", "avg_loss": "0.0504", "time": "12.2437"}, {"layer": 36, "module": "self_attn.k_proj", "avg_loss": "1.2348", "time": "2.1707"}, {"layer": 36, "module": "self_attn.v_proj", "avg_loss": "0.0866", "time": "2.1406"}, {"layer": 36, "module": "self_attn.q_proj", "avg_loss": "2.5172", "time": "2.3632"}, {"layer": 36, "module": "self_attn.o_proj", "avg_loss": "0.0213", "time": "2.2463"}, {"layer": 36, "module": "mlp.up_proj", "avg_loss": "2.2280", "time": "2.4010"}, {"layer": 36, "module": "mlp.gate_proj", "avg_loss": "2.4495", "time": "2.6013"}, {"layer": 36, "module": "mlp.down_proj", "avg_loss": "0.0482", "time": "11.5002"}, {"layer": 37, "module": "self_attn.k_proj", "avg_loss": "0.9752", "time": "2.3477"}, {"layer": 37, "module": "self_attn.v_proj", "avg_loss": "0.0786", "time": "2.3115"}, {"layer": 37, "module": "self_attn.q_proj", "avg_loss": "1.7807", "time": "2.2528"}, {"layer": 37, "module": "self_attn.o_proj", "avg_loss": "0.0113", "time": "2.2159"}, {"layer": 37, "module": "mlp.up_proj", "avg_loss": "2.2418", "time": "2.3613"}, {"layer": 37, "module": "mlp.gate_proj", "avg_loss": "2.5169", "time": "2.3766"}, {"layer": 37, "module": "mlp.down_proj", "avg_loss": "0.0510", "time": "11.6284"}, {"layer": 38, "module": "self_attn.k_proj", "avg_loss": "1.2602", "time": "2.2606"}, {"layer": 38, "module": "self_attn.v_proj", "avg_loss": "0.0854", "time": "2.6242"}, {"layer": 38, "module": "self_attn.q_proj", "avg_loss": "2.3376", "time": "2.2812"}, {"layer": 38, "module": "self_attn.o_proj", "avg_loss": "0.0145", "time": "2.3023"}, {"layer": 38, "module": "mlp.up_proj", "avg_loss": "2.3320", "time": "2.4330"}, {"layer": 38, "module": "mlp.gate_proj", "avg_loss": "2.6194", "time": "2.3898"}, {"layer": 38, "module": "mlp.down_proj", "avg_loss": "0.0508", "time": "11.3476"}, {"layer": 39, "module": "self_attn.k_proj", "avg_loss": "1.1717", "time": "2.2152"}, {"layer": 39, "module": "self_attn.v_proj", "avg_loss": "0.0966", "time": "2.1183"}, {"layer": 39, "module": "self_attn.q_proj", "avg_loss": "2.1522", "time": "2.1930"}, {"layer": 39, "module": "self_attn.o_proj", "avg_loss": "0.0127", "time": "2.2822"}, {"layer": 39, "module": "mlp.up_proj", "avg_loss": "2.4036", "time": "2.3615"}, {"layer": 39, "module": "mlp.gate_proj", "avg_loss": "2.7398", "time": "2.3945"}, {"layer": 39, "module": "mlp.down_proj", "avg_loss": "0.0501", "time": "12.8508"}, {"layer": 40, "module": "self_attn.k_proj", "avg_loss": "1.2272", "time": "2.2228"}, {"layer": 40, "module": "self_attn.v_proj", "avg_loss": "0.1037", "time": "2.1629"}, {"layer": 40, "module": "self_attn.q_proj", "avg_loss": "2.1165", "time": "2.3175"}, {"layer": 40, "module": "self_attn.o_proj", "avg_loss": "0.0137", "time": "2.2414"}, {"layer": 40, "module": "mlp.up_proj", "avg_loss": "2.4614", "time": "2.4506"}, {"layer": 40, "module": "mlp.gate_proj", "avg_loss": "2.8403", "time": "2.4072"}, {"layer": 40, "module": "mlp.down_proj", "avg_loss": "0.0517", "time": "11.4937"}, {"layer": 41, "module": "self_attn.k_proj", "avg_loss": "0.9674", "time": "2.2806"}, {"layer": 41, "module": "self_attn.v_proj", "avg_loss": "0.1242", "time": "2.2545"}, {"layer": 41, "module": "self_attn.q_proj", "avg_loss": "1.9310", "time": "2.3418"}, {"layer": 41, "module": "self_attn.o_proj", "avg_loss": "0.0066", "time": "2.6530"}, {"layer": 41, "module": "mlp.up_proj", "avg_loss": "2.5373", "time": "2.6394"}, {"layer": 41, "module": "mlp.gate_proj", "avg_loss": "2.9504", "time": "2.5879"}, {"layer": 41, "module": "mlp.down_proj", "avg_loss": "0.0552", "time": "11.8368"}, {"layer": 42, "module": "self_attn.k_proj", "avg_loss": "1.2513", "time": "2.3142"}, {"layer": 42, "module": "self_attn.v_proj", "avg_loss": "0.1039", "time": "2.2137"}, {"layer": 42, "module": "self_attn.q_proj", "avg_loss": "2.1090", "time": "2.3427"}, {"layer": 42, "module": "self_attn.o_proj", "avg_loss": "0.0093", "time": "2.2352"}, {"layer": 42, "module": "mlp.up_proj", "avg_loss": "2.6617", "time": "2.4736"}, {"layer": 42, "module": "mlp.gate_proj", "avg_loss": "3.0939", "time": "2.4096"}, {"layer": 42, "module": "mlp.down_proj", "avg_loss": "0.0534", "time": "11.1704"}, {"layer": 43, "module": "self_attn.k_proj", "avg_loss": "0.8837", "time": "2.5452"}, {"layer": 43, "module": "self_attn.v_proj", "avg_loss": "0.0860", "time": "2.5169"}, {"layer": 43, "module": "self_attn.q_proj", "avg_loss": "1.3037", "time": "2.4674"}, {"layer": 43, "module": "self_attn.o_proj", "avg_loss": "0.0065", "time": "2.3108"}, {"layer": 43, "module": "mlp.up_proj", "avg_loss": "2.7478", "time": "2.3894"}, {"layer": 43, "module": "mlp.gate_proj", "avg_loss": "3.2208", "time": "2.3338"}, {"layer": 43, "module": "mlp.down_proj", "avg_loss": "0.0510", "time": "11.5457"}, {"layer": 44, "module": "self_attn.k_proj", "avg_loss": "0.9321", "time": "2.1733"}, {"layer": 44, "module": "self_attn.v_proj", "avg_loss": "0.0935", "time": "2.2012"}, {"layer": 44, "module": "self_attn.q_proj", "avg_loss": "1.4437", "time": "2.3272"}, {"layer": 44, "module": "self_attn.o_proj", "avg_loss": "0.0073", "time": "2.4816"}, {"layer": 44, "module": "mlp.up_proj", "avg_loss": "2.8247", "time": "2.3247"}, {"layer": 44, "module": "mlp.gate_proj", "avg_loss": "3.3326", "time": "2.3850"}, {"layer": 44, "module": "mlp.down_proj", "avg_loss": "0.0542", "time": "11.2610"}, {"layer": 45, "module": "self_attn.k_proj", "avg_loss": "1.4224", "time": "2.3964"}, {"layer": 45, "module": "self_attn.v_proj", "avg_loss": "0.1535", "time": "2.2252"}, {"layer": 45, "module": "self_attn.q_proj", "avg_loss": "2.8055", "time": "2.4616"}, {"layer": 45, "module": "self_attn.o_proj", "avg_loss": "0.0072", "time": "2.2663"}, {"layer": 45, "module": "mlp.up_proj", "avg_loss": "2.8984", "time": "2.4386"}, {"layer": 45, "module": "mlp.gate_proj", "avg_loss": "3.4405", "time": "2.3517"}, {"layer": 45, "module": "mlp.down_proj", "avg_loss": "0.0596", "time": "11.2169"}, {"layer": 46, "module": "self_attn.k_proj", "avg_loss": "0.9101", "time": "2.2069"}, {"layer": 46, "module": "self_attn.v_proj", "avg_loss": "0.0905", "time": "2.1429"}, {"layer": 46, "module": "self_attn.q_proj", "avg_loss": "1.4365", "time": "2.2747"}, {"layer": 46, "module": "self_attn.o_proj", "avg_loss": "0.0078", "time": "2.2129"}, {"layer": 46, "module": "mlp.up_proj", "avg_loss": "3.0091", "time": "2.2778"}, {"layer": 46, "module": "mlp.gate_proj", "avg_loss": "3.5750", "time": "2.2699"}, {"layer": 46, "module": "mlp.down_proj", "avg_loss": "0.0607", "time": "10.8912"}, {"layer": 47, "module": "self_attn.k_proj", "avg_loss": "0.6509", "time": "2.1438"}, {"layer": 47, "module": "self_attn.v_proj", "avg_loss": "0.0407", "time": "2.1720"}, {"layer": 47, "module": "self_attn.q_proj", "avg_loss": "0.4181", "time": "2.2129"}, {"layer": 47, "module": "self_attn.o_proj", "avg_loss": "0.0030", "time": "2.2296"}, {"layer": 47, "module": "mlp.up_proj", "avg_loss": "3.0751", "time": "2.5685"}, {"layer": 47, "module": "mlp.gate_proj", "avg_loss": "3.6549", "time": "2.4761"}, {"layer": 47, "module": "mlp.down_proj", "avg_loss": "0.0567", "time": "11.0770"}, {"layer": 48, "module": "self_attn.k_proj", "avg_loss": "1.0540", "time": "2.1394"}, {"layer": 48, "module": "self_attn.v_proj", "avg_loss": "0.1139", "time": "2.1839"}, {"layer": 48, "module": "self_attn.q_proj", "avg_loss": "1.4910", "time": "2.2348"}, {"layer": 48, "module": "self_attn.o_proj", "avg_loss": "0.0071", "time": "2.2208"}, {"layer": 48, "module": "mlp.up_proj", "avg_loss": "3.1755", "time": "2.4037"}, {"layer": 48, "module": "mlp.gate_proj", "avg_loss": "3.7738", "time": "2.3254"}, {"layer": 48, "module": "mlp.down_proj", "avg_loss": "0.0617", "time": "16.9508"}, {"layer": 49, "module": "self_attn.k_proj", "avg_loss": "1.0907", "time": "2.1344"}, {"layer": 49, "module": "self_attn.v_proj", "avg_loss": "0.1234", "time": "2.3376"}, {"layer": 49, "module": "self_attn.q_proj", "avg_loss": "1.5673", "time": "2.2792"}, {"layer": 49, "module": "self_attn.o_proj", "avg_loss": "0.0049", "time": "2.2311"}, {"layer": 49, "module": "mlp.up_proj", "avg_loss": "3.2565", "time": "2.3003"}, {"layer": 49, "module": "mlp.gate_proj", "avg_loss": "3.8816", "time": "2.3870"}, {"layer": 49, "module": "mlp.down_proj", "avg_loss": "0.0688", "time": "11.1178"}, {"layer": 50, "module": "self_attn.k_proj", "avg_loss": "1.1160", "time": "2.1548"}, {"layer": 50, "module": "self_attn.v_proj", "avg_loss": "0.1163", "time": "2.2012"}, {"layer": 50, "module": "self_attn.q_proj", "avg_loss": "1.5585", "time": "2.2243"}, {"layer": 50, "module": "self_attn.o_proj", "avg_loss": "0.0064", "time": "2.2423"}, {"layer": 50, "module": "mlp.up_proj", "avg_loss": "3.3524", "time": "2.3503"}, {"layer": 50, "module": "mlp.gate_proj", "avg_loss": "3.9966", "time": "2.3442"}, {"layer": 50, "module": "mlp.down_proj", "avg_loss": "0.0683", "time": "11.1286"}, {"layer": 51, "module": "self_attn.k_proj", "avg_loss": "0.9289", "time": "2.1935"}, {"layer": 51, "module": "self_attn.v_proj", "avg_loss": "0.0942", "time": "2.1488"}, {"layer": 51, "module": "self_attn.q_proj", "avg_loss": "1.0070", "time": "2.2291"}, {"layer": 51, "module": "self_attn.o_proj", "avg_loss": "0.0056", "time": "2.3676"}, {"layer": 51, "module": "mlp.up_proj", "avg_loss": "3.4238", "time": "2.5192"}, {"layer": 51, "module": "mlp.gate_proj", "avg_loss": "4.0851", "time": "2.4819"}, {"layer": 51, "module": "mlp.down_proj", "avg_loss": "0.0635", "time": "11.1578"}, {"layer": 52, "module": "self_attn.k_proj", "avg_loss": "1.0201", "time": "2.1689"}, {"layer": 52, "module": "self_attn.v_proj", "avg_loss": "0.1151", "time": "2.1680"}, {"layer": 52, "module": "self_attn.q_proj", "avg_loss": "1.4009", "time": "2.2136"}, {"layer": 52, "module": "self_attn.o_proj", "avg_loss": "0.0076", "time": "2.2242"}, {"layer": 52, "module": "mlp.up_proj", "avg_loss": "3.5032", "time": "2.3570"}, {"layer": 52, "module": "mlp.gate_proj", "avg_loss": "4.1729", "time": "2.3617"}, {"layer": 52, "module": "mlp.down_proj", "avg_loss": "0.0691", "time": "11.0436"}, {"layer": 53, "module": "self_attn.k_proj", "avg_loss": "1.5932", "time": "2.1510"}, {"layer": 53, "module": "self_attn.v_proj", "avg_loss": "0.1606", "time": "2.1391"}, {"layer": 53, "module": "self_attn.q_proj", "avg_loss": "2.9191", "time": "2.1710"}, {"layer": 53, "module": "self_attn.o_proj", "avg_loss": "0.0163", "time": "2.1882"}, {"layer": 53, "module": "mlp.up_proj", "avg_loss": "3.5688", "time": "2.3320"}, {"layer": 53, "module": "mlp.gate_proj", "avg_loss": "4.2689", "time": "2.3301"}, {"layer": 53, "module": "mlp.down_proj", "avg_loss": "0.0772", "time": "11.3115"}, {"layer": 54, "module": "self_attn.k_proj", "avg_loss": "1.3127", "time": "2.8158"}, {"layer": 54, "module": "self_attn.v_proj", "avg_loss": "0.1332", "time": "2.1227"}, {"layer": 54, "module": "self_attn.q_proj", "avg_loss": "1.7549", "time": "2.2132"}, {"layer": 54, "module": "self_attn.o_proj", "avg_loss": "0.0064", "time": "2.2526"}, {"layer": 54, "module": "mlp.up_proj", "avg_loss": "3.6895", "time": "2.3295"}, {"layer": 54, "module": "mlp.gate_proj", "avg_loss": "4.4158", "time": "2.3146"}, {"layer": 54, "module": "mlp.down_proj", "avg_loss": "0.0754", "time": "11.4966"}, {"layer": 55, "module": "self_attn.k_proj", "avg_loss": "0.9449", "time": "2.1646"}, {"layer": 55, "module": "self_attn.v_proj", "avg_loss": "0.0838", "time": "2.4741"}, {"layer": 55, "module": "self_attn.q_proj", "avg_loss": "1.0269", "time": "2.2758"}, {"layer": 55, "module": "self_attn.o_proj", "avg_loss": "0.0096", "time": "2.5404"}, {"layer": 55, "module": "mlp.up_proj", "avg_loss": "3.7722", "time": "2.6583"}, {"layer": 55, "module": "mlp.gate_proj", "avg_loss": "4.4638", "time": "2.6545"}, {"layer": 55, "module": "mlp.down_proj", "avg_loss": "0.0723", "time": "12.2239"}, {"layer": 56, "module": "self_attn.k_proj", "avg_loss": "0.8518", "time": "2.5223"}, {"layer": 56, "module": "self_attn.v_proj", "avg_loss": "0.0859", "time": "2.4997"}, {"layer": 56, "module": "self_attn.q_proj", "avg_loss": "1.1305", "time": "2.5938"}, {"layer": 56, "module": "self_attn.o_proj", "avg_loss": "0.0086", "time": "2.5501"}, {"layer": 56, "module": "mlp.up_proj", "avg_loss": "3.7874", "time": "2.7679"}, {"layer": 56, "module": "mlp.gate_proj", "avg_loss": "4.4930", "time": "2.4068"}, {"layer": 56, "module": "mlp.down_proj", "avg_loss": "0.0764", "time": "11.9478"}, {"layer": 57, "module": "self_attn.k_proj", "avg_loss": "1.5925", "time": "2.1555"}, {"layer": 57, "module": "self_attn.v_proj", "avg_loss": "0.1966", "time": "2.4286"}, {"layer": 57, "module": "self_attn.q_proj", "avg_loss": "2.9786", "time": "2.4948"}, {"layer": 57, "module": "self_attn.o_proj", "avg_loss": "0.0138", "time": "2.2762"}, {"layer": 57, "module": "mlp.up_proj", "avg_loss": "3.8554", "time": "2.5377"}, {"layer": 57, "module": "mlp.gate_proj", "avg_loss": "4.6229", "time": "2.6450"}, {"layer": 57, "module": "mlp.down_proj", "avg_loss": "0.0837", "time": "13.4185"}, {"layer": 58, "module": "self_attn.k_proj", "avg_loss": "1.2939", "time": "2.4924"}, {"layer": 58, "module": "self_attn.v_proj", "avg_loss": "0.1466", "time": "2.3488"}, {"layer": 58, "module": "self_attn.q_proj", "avg_loss": "1.7831", "time": "2.4576"}, {"layer": 58, "module": "self_attn.o_proj", "avg_loss": "0.0118", "time": "2.2869"}, {"layer": 58, "module": "mlp.up_proj", "avg_loss": "3.9844", "time": "2.5134"}, {"layer": 58, "module": "mlp.gate_proj", "avg_loss": "4.7872", "time": "2.5032"}, {"layer": 58, "module": "mlp.down_proj", "avg_loss": "0.0843", "time": "12.9486"}, {"layer": 59, "module": "self_attn.k_proj", "avg_loss": "0.9669", "time": "2.8121"}, {"layer": 59, "module": "self_attn.v_proj", "avg_loss": "0.1203", "time": "2.3197"}, {"layer": 59, "module": "self_attn.q_proj", "avg_loss": "1.1647", "time": "2.3491"}, {"layer": 59, "module": "self_attn.o_proj", "avg_loss": "0.0082", "time": "3.3607"}, {"layer": 59, "module": "mlp.up_proj", "avg_loss": "4.0677", "time": "2.6960"}, {"layer": 59, "module": "mlp.gate_proj", "avg_loss": "4.8120", "time": "2.4537"}, {"layer": 59, "module": "mlp.down_proj", "avg_loss": "0.0803", "time": "11.9518"}, {"layer": 60, "module": "self_attn.k_proj", "avg_loss": "0.9486", "time": "2.5062"}, {"layer": 60, "module": "self_attn.v_proj", "avg_loss": "0.0979", "time": "2.4530"}, {"layer": 60, "module": "self_attn.q_proj", "avg_loss": "1.0229", "time": "2.4611"}, {"layer": 60, "module": "self_attn.o_proj", "avg_loss": "0.0055", "time": "2.3857"}, {"layer": 60, "module": "mlp.up_proj", "avg_loss": "4.1927", "time": "3.2814"}, {"layer": 60, "module": "mlp.gate_proj", "avg_loss": "5.0441", "time": "2.8121"}, {"layer": 60, "module": "mlp.down_proj", "avg_loss": "0.0888", "time": "10.9882"}, {"layer": 61, "module": "self_attn.k_proj", "avg_loss": "1.5447", "time": "2.0483"}, {"layer": 61, "module": "self_attn.v_proj", "avg_loss": "0.2319", "time": "2.2497"}, {"layer": 61, "module": "self_attn.q_proj", "avg_loss": "2.8067", "time": "2.4656"}, {"layer": 61, "module": "self_attn.o_proj", "avg_loss": "0.0114", "time": "2.1375"}, {"layer": 61, "module": "mlp.up_proj", "avg_loss": "4.2931", "time": "2.2500"}, {"layer": 61, "module": "mlp.gate_proj", "avg_loss": "5.2370", "time": "2.3491"}, {"layer": 61, "module": "mlp.down_proj", "avg_loss": "0.1010", "time": "10.8398"}, {"layer": 62, "module": "self_attn.k_proj", "avg_loss": "1.1489", "time": "2.0788"}, {"layer": 62, "module": "self_attn.v_proj", "avg_loss": "0.1405", "time": "2.1153"}, {"layer": 62, "module": "self_attn.q_proj", "avg_loss": "1.2857", "time": "2.1534"}, {"layer": 62, "module": "self_attn.o_proj", "avg_loss": "0.0091", "time": "2.0947"}, {"layer": 62, "module": "mlp.up_proj", "avg_loss": "4.3813", "time": "2.2918"}, {"layer": 62, "module": "mlp.gate_proj", "avg_loss": "5.3303", "time": "2.2712"}, {"layer": 62, "module": "mlp.down_proj", "avg_loss": "0.0987", "time": "11.0789"}, {"layer": 63, "module": "self_attn.k_proj", "avg_loss": "0.8684", "time": "2.1155"}, {"layer": 63, "module": "self_attn.v_proj", "avg_loss": "0.0581", "time": "2.1090"}, {"layer": 63, "module": "self_attn.q_proj", "avg_loss": "0.4725", "time": "2.1719"}, {"layer": 63, "module": "self_attn.o_proj", "avg_loss": "0.0043", "time": "2.1868"}, {"layer": 63, "module": "mlp.up_proj", "avg_loss": "4.4117", "time": "2.2723"}, {"layer": 63, "module": "mlp.gate_proj", "avg_loss": "5.2807", "time": "2.2665"}, {"layer": 63, "module": "mlp.down_proj", "avg_loss": "0.0907", "time": "10.9134"}, {"layer": 64, "module": "self_attn.k_proj", "avg_loss": "0.9271", "time": "2.1335"}, {"layer": 64, "module": "self_attn.v_proj", "avg_loss": "0.0973", "time": "2.1017"}, {"layer": 64, "module": "self_attn.q_proj", "avg_loss": "0.9333", "time": "2.1569"}, {"layer": 64, "module": "self_attn.o_proj", "avg_loss": "0.0080", "time": "2.2548"}, {"layer": 64, "module": "mlp.up_proj", "avg_loss": "4.5773", "time": "2.3103"}, {"layer": 64, "module": "mlp.gate_proj", "avg_loss": "5.5771", "time": "2.3893"}, {"layer": 64, "module": "mlp.down_proj", "avg_loss": "0.1005", "time": "10.6589"}, {"layer": 65, "module": "self_attn.k_proj", "avg_loss": "1.4859", "time": "2.1759"}, {"layer": 65, "module": "self_attn.v_proj", "avg_loss": "0.2075", "time": "2.0889"}, {"layer": 65, "module": "self_attn.q_proj", "avg_loss": "2.2830", "time": "2.1945"}, {"layer": 65, "module": "self_attn.o_proj", "avg_loss": "0.0182", "time": "2.1165"}, {"layer": 65, "module": "mlp.up_proj", "avg_loss": "4.7719", "time": "2.2606"}, {"layer": 65, "module": "mlp.gate_proj", "avg_loss": "5.9687", "time": "2.2431"}, {"layer": 65, "module": "mlp.down_proj", "avg_loss": "0.1181", "time": "10.6729"}, {"layer": 66, "module": "self_attn.k_proj", "avg_loss": "0.9040", "time": "2.1032"}, {"layer": 66, "module": "self_attn.v_proj", "avg_loss": "0.0830", "time": "2.0963"}, {"layer": 66, "module": "self_attn.q_proj", "avg_loss": "0.7910", "time": "2.1869"}, {"layer": 66, "module": "self_attn.o_proj", "avg_loss": "0.0057", "time": "2.2624"}, {"layer": 66, "module": "mlp.up_proj", "avg_loss": "4.8326", "time": "2.2887"}, {"layer": 66, "module": "mlp.gate_proj", "avg_loss": "6.0423", "time": "2.2494"}, {"layer": 66, "module": "mlp.down_proj", "avg_loss": "0.1149", "time": "10.7798"}, {"layer": 67, "module": "self_attn.k_proj", "avg_loss": "0.8976", "time": "2.1822"}, {"layer": 67, "module": "self_attn.v_proj", "avg_loss": "0.1104", "time": "2.1266"}, {"layer": 67, "module": "self_attn.q_proj", "avg_loss": "1.0224", "time": "2.2375"}, {"layer": 67, "module": "self_attn.o_proj", "avg_loss": "0.0098", "time": "2.1674"}, {"layer": 67, "module": "mlp.up_proj", "avg_loss": "4.8444", "time": "2.3403"}, {"layer": 67, "module": "mlp.gate_proj", "avg_loss": "5.8299", "time": "2.2446"}, {"layer": 67, "module": "mlp.down_proj", "avg_loss": "0.1058", "time": "10.9653"}, {"layer": 68, "module": "self_attn.k_proj", "avg_loss": "1.4440", "time": "2.1311"}, {"layer": 68, "module": "self_attn.v_proj", "avg_loss": "0.2341", "time": "2.0929"}, {"layer": 68, "module": "self_attn.q_proj", "avg_loss": "2.2420", "time": "2.2183"}, {"layer": 68, "module": "self_attn.o_proj", "avg_loss": "0.0188", "time": "2.1945"}, {"layer": 68, "module": "mlp.up_proj", "avg_loss": "5.0351", "time": "2.3526"}, {"layer": 68, "module": "mlp.gate_proj", "avg_loss": "6.0770", "time": "2.2704"}, {"layer": 68, "module": "mlp.down_proj", "avg_loss": "0.1168", "time": "10.9222"}, {"layer": 69, "module": "self_attn.k_proj", "avg_loss": "1.5391", "time": "2.1107"}, {"layer": 69, "module": "self_attn.v_proj", "avg_loss": "0.2907", "time": "2.0946"}, {"layer": 69, "module": "self_attn.q_proj", "avg_loss": "2.6824", "time": "2.1842"}, {"layer": 69, "module": "self_attn.o_proj", "avg_loss": "0.0223", "time": "2.1581"}, {"layer": 69, "module": "mlp.up_proj", "avg_loss": "5.3251", "time": "2.3772"}, {"layer": 69, "module": "mlp.gate_proj", "avg_loss": "6.6366", "time": "2.2277"}, {"layer": 69, "module": "mlp.down_proj", "avg_loss": "0.1433", "time": "10.9078"}, {"layer": 70, "module": "self_attn.k_proj", "avg_loss": "1.2412", "time": "2.1232"}, {"layer": 70, "module": "self_attn.v_proj", "avg_loss": "0.2299", "time": "2.1128"}, {"layer": 70, "module": "self_attn.q_proj", "avg_loss": "2.0021", "time": "2.1650"}, {"layer": 70, "module": "self_attn.o_proj", "avg_loss": "0.0192", "time": "2.1631"}, {"layer": 70, "module": "mlp.up_proj", "avg_loss": "5.4520", "time": "2.3461"}, {"layer": 70, "module": "mlp.gate_proj", "avg_loss": "6.7114", "time": "2.3102"}, {"layer": 70, "module": "mlp.down_proj", "avg_loss": "0.1439", "time": "11.6774"}, {"layer": 71, "module": "self_attn.k_proj", "avg_loss": "1.1749", "time": "2.1096"}, {"layer": 71, "module": "self_attn.v_proj", "avg_loss": "0.2096", "time": "2.1120"}, {"layer": 71, "module": "self_attn.q_proj", "avg_loss": "2.1842", "time": "2.1841"}, {"layer": 71, "module": "self_attn.o_proj", "avg_loss": "0.0294", "time": "2.1569"}, {"layer": 71, "module": "mlp.up_proj", "avg_loss": "5.5132", "time": "2.3457"}, {"layer": 71, "module": "mlp.gate_proj", "avg_loss": "6.5528", "time": "2.2910"}, {"layer": 71, "module": "mlp.down_proj", "avg_loss": "0.1433", "time": "10.9313"}, {"layer": 72, "module": "self_attn.k_proj", "avg_loss": "1.3755", "time": "2.1045"}, {"layer": 72, "module": "self_attn.v_proj", "avg_loss": "0.2820", "time": "2.1033"}, {"layer": 72, "module": "self_attn.q_proj", "avg_loss": "2.9723", "time": "2.1704"}, {"layer": 72, "module": "self_attn.o_proj", "avg_loss": "0.0499", "time": "2.1580"}, {"layer": 72, "module": "mlp.up_proj", "avg_loss": "5.8745", "time": "2.3227"}, {"layer": 72, "module": "mlp.gate_proj", "avg_loss": "6.9651", "time": "2.2985"}, {"layer": 72, "module": "mlp.down_proj", "avg_loss": "0.1698", "time": "10.8350"}, {"layer": 73, "module": "self_attn.k_proj", "avg_loss": "1.4094", "time": "2.0903"}, {"layer": 73, "module": "self_attn.v_proj", "avg_loss": "0.3139", "time": "2.0519"}, {"layer": 73, "module": "self_attn.q_proj", "avg_loss": "3.2349", "time": "2.1052"}, {"layer": 73, "module": "self_attn.o_proj", "avg_loss": "0.0377", "time": "2.1632"}, {"layer": 73, "module": "mlp.up_proj", "avg_loss": "6.2045", "time": "2.3015"}, {"layer": 73, "module": "mlp.gate_proj", "avg_loss": "7.6306", "time": "2.2878"}, {"layer": 73, "module": "mlp.down_proj", "avg_loss": "0.2064", "time": "10.8582"}, {"layer": 74, "module": "self_attn.k_proj", "avg_loss": "1.4072", "time": "2.0768"}, {"layer": 74, "module": "self_attn.v_proj", "avg_loss": "0.3345", "time": "2.0583"}, {"layer": 74, "module": "self_attn.q_proj", "avg_loss": "3.1687", "time": "2.0840"}, {"layer": 74, "module": "self_attn.o_proj", "avg_loss": "0.0671", "time": "2.1023"}, {"layer": 74, "module": "mlp.up_proj", "avg_loss": "6.3774", "time": "2.3439"}, {"layer": 74, "module": "mlp.gate_proj", "avg_loss": "7.7800", "time": "2.2427"}, {"layer": 74, "module": "mlp.down_proj", "avg_loss": "0.2281", "time": "10.8692"}, {"layer": 75, "module": "self_attn.k_proj", "avg_loss": "1.3489", "time": "2.0618"}, {"layer": 75, "module": "self_attn.v_proj", "avg_loss": "0.3295", "time": "2.0675"}, {"layer": 75, "module": "self_attn.q_proj", "avg_loss": "3.1315", "time": "2.1264"}, {"layer": 75, "module": "self_attn.o_proj", "avg_loss": "0.0803", "time": "2.1843"}, {"layer": 75, "module": "mlp.up_proj", "avg_loss": "6.7887", "time": "2.2997"}, {"layer": 75, "module": "mlp.gate_proj", "avg_loss": "8.2213", "time": "2.3144"}, {"layer": 75, "module": "mlp.down_proj", "avg_loss": "0.2640", "time": "10.8770"}, {"layer": 76, "module": "self_attn.k_proj", "avg_loss": "1.2652", "time": "2.0859"}, {"layer": 76, "module": "self_attn.v_proj", "avg_loss": "0.3689", "time": "2.1039"}, {"layer": 76, "module": "self_attn.q_proj", "avg_loss": "3.2988", "time": "2.1568"}, {"layer": 76, "module": "self_attn.o_proj", "avg_loss": "0.1543", "time": "2.1340"}, {"layer": 76, "module": "mlp.up_proj", "avg_loss": "7.1897", "time": "2.2988"}, {"layer": 76, "module": "mlp.gate_proj", "avg_loss": "8.9562", "time": "2.2709"}, {"layer": 76, "module": "mlp.down_proj", "avg_loss": "0.3607", "time": "10.8481"}, {"layer": 77, "module": "self_attn.k_proj", "avg_loss": "1.0979", "time": "2.1108"}, {"layer": 77, "module": "self_attn.v_proj", "avg_loss": "0.3566", "time": "2.1288"}, {"layer": 77, "module": "self_attn.q_proj", "avg_loss": "2.9150", "time": "2.1524"}, {"layer": 77, "module": "self_attn.o_proj", "avg_loss": "0.0780", "time": "2.1765"}, {"layer": 77, "module": "mlp.up_proj", "avg_loss": "8.3468", "time": "2.3633"}, {"layer": 77, "module": "mlp.gate_proj", "avg_loss": "12.7151", "time": "2.2980"}, {"layer": 77, "module": "mlp.down_proj", "avg_loss": "0.5127", "time": "10.8747"}, {"layer": 78, "module": "self_attn.k_proj", "avg_loss": "1.2737", "time": "2.1396"}, {"layer": 78, "module": "self_attn.v_proj", "avg_loss": "0.4278", "time": "2.1499"}, {"layer": 78, "module": "self_attn.q_proj", "avg_loss": "3.2045", "time": "2.1768"}, {"layer": 78, "module": "self_attn.o_proj", "avg_loss": "0.1335", "time": "2.1957"}, {"layer": 78, "module": "mlp.up_proj", "avg_loss": "8.0339", "time": "2.3193"}, {"layer": 78, "module": "mlp.gate_proj", "avg_loss": "10.1534", "time": "2.2767"}, {"layer": 78, "module": "mlp.down_proj", "avg_loss": "0.6413", "time": "10.7532"}, {"layer": 79, "module": "self_attn.k_proj", "avg_loss": "1.1283", "time": "2.3559"}, {"layer": 79, "module": "self_attn.v_proj", "avg_loss": "0.3104", "time": "2.0844"}, {"layer": 79, "module": "self_attn.q_proj", "avg_loss": "2.6459", "time": "2.1630"}, {"layer": 79, "module": "self_attn.o_proj", "avg_loss": "0.1654", "time": "2.1770"}, {"layer": 79, "module": "mlp.up_proj", "avg_loss": "7.7876", "time": "2.3229"}, {"layer": 79, "module": "mlp.gate_proj", "avg_loss": "9.6232", "time": "2.2428"}, {"layer": 79, "module": "mlp.down_proj", "avg_loss": "1.3524", "time": "10.8338"}, {"layer": 80, "module": "self_attn.k_proj", "avg_loss": "0.5214", "time": "2.1245"}, {"layer": 80, "module": "self_attn.v_proj", "avg_loss": "0.1630", "time": "2.0802"}, {"layer": 80, "module": "self_attn.q_proj", "avg_loss": "1.0062", "time": "2.2022"}, {"layer": 80, "module": "self_attn.o_proj", "avg_loss": "0.0864", "time": "2.1480"}, {"layer": 80, "module": "mlp.up_proj", "avg_loss": "4.9997", "time": "2.3091"}, {"layer": 80, "module": "mlp.gate_proj", "avg_loss": "5.8891", "time": "2.2619"}, {"layer": 80, "module": "mlp.down_proj", "avg_loss": "1.4404", "time": "10.9247"}]