diff --git "a/measurement.json" "b/measurement.json" --- "a/measurement.json" +++ "b/measurement.json" @@ -2,7 +2,7 @@ "measurement": { "model.layers.0.self_attn": [ { - "accuracy": 0.8782956600189209, + "accuracy": 0.8949892520904541, "total_bits": 89141248, "q_proj": { "group_size": { @@ -66,7 +66,7 @@ } }, { - "accuracy": 0.8954365253448486, + "accuracy": 0.9084022045135498, "total_bits": 91697152, "q_proj": { "group_size": { @@ -130,7 +130,7 @@ } }, { - "accuracy": 0.9110851883888245, + "accuracy": 0.9217575788497925, "total_bits": 95234560, "q_proj": { "group_size": { @@ -194,7 +194,7 @@ } }, { - "accuracy": 0.9389305114746094, + "accuracy": 0.9459508657455444, "total_bits": 111748096, "q_proj": { "group_size": { @@ -258,7 +258,7 @@ } }, { - "accuracy": 0.9404471516609192, + "accuracy": 0.947314441204071, "total_bits": 132388864, "q_proj": { "group_size": { @@ -322,7 +322,7 @@ } }, { - "accuracy": 0.9450368881225586, + "accuracy": 0.9525381326675415, "total_bits": 132455936, "q_proj": { "group_size": { @@ -386,7 +386,7 @@ } }, { - "accuracy": 0.9496564269065857, + "accuracy": 0.9558830261230469, "total_bits": 169089024, "q_proj": { "group_size": { @@ -438,7 +438,7 @@ } }, { - "accuracy": 0.9554595947265625, + "accuracy": 0.9619208574295044, "total_bits": 169221632, "q_proj": { "group_size": { @@ -490,7 +490,7 @@ } }, { - "accuracy": 0.9594838619232178, + "accuracy": 0.9619631767272949, "total_bits": 170671104, "q_proj": { "group_size": { @@ -542,7 +542,7 @@ } }, { - "accuracy": 0.9606420397758484, + "accuracy": 0.9629597663879395, "total_bits": 173039616, "q_proj": { "group_size": { @@ -594,7 +594,7 @@ } }, { - "accuracy": 0.9656413793563843, + "accuracy": 0.9697002172470093, "total_bits": 174398976, "q_proj": { "group_size": { @@ -658,7 +658,7 @@ } }, { - "accuracy": 0.9694782495498657, + "accuracy": 0.9721242785453796, "total_bits": 175225856, "q_proj": { "group_size": { @@ -722,7 +722,7 @@ } }, { - "accuracy": 0.9676574468612671, + "accuracy": 0.9712086319923401, "total_bits": 178728960, "q_proj": { "group_size": { @@ -783,7 +783,7 @@ } }, { - "accuracy": 0.971149206161499, + "accuracy": 0.9735980033874512, "total_bits": 181067776, "q_proj": { "group_size": { @@ -844,7 +844,7 @@ } }, { - "accuracy": 0.9798630475997925, + "accuracy": 0.9818163514137268, "total_bits": 219944960, "q_proj": { "group_size": { @@ -905,7 +905,7 @@ } }, { - "accuracy": 0.9819849133491516, + "accuracy": 0.9828636050224304, "total_bits": 223010816, "q_proj": { "group_size": { @@ -966,7 +966,7 @@ } }, { - "accuracy": 0.9812813997268677, + "accuracy": 0.9831178784370422, "total_bits": 252975104, "q_proj": { "group_size": { @@ -1018,7 +1018,7 @@ } }, { - "accuracy": 0.9852613210678101, + "accuracy": 0.9854915738105774, "total_bits": 265314304, "q_proj": { "group_size": { @@ -1070,7 +1070,7 @@ } }, { - "accuracy": 0.9887813329696655, + "accuracy": 0.9882479906082153, "total_bits": 336861184, "q_proj": { "group_size": { @@ -1124,7 +1124,7 @@ ], "model.layers.0.block_sparse_moe": [ { - "accuracy": 0.9010370969772339, + "accuracy": 0.9121735692024231, "total_bits": 3157926400, "w1": { "group_size": { @@ -1176,7 +1176,7 @@ } }, { - "accuracy": 0.9023282527923584, + "accuracy": 0.9166474342346191, "total_bits": 3268026880, "w1": { "group_size": { @@ -1228,7 +1228,7 @@ } }, { - "accuracy": 0.9117254614830017, + "accuracy": 0.9229668974876404, "total_bits": 3652411392, "w1": { "group_size": { @@ -1277,7 +1277,7 @@ } }, { - "accuracy": 0.9140271544456482, + "accuracy": 0.9246082901954651, "total_bits": 4098056192, "w1": { "group_size": { @@ -1326,7 +1326,7 @@ } }, { - "accuracy": 0.9651581048965454, + "accuracy": 0.9710184931755066, "total_bits": 4621411072, "w1": { "group_size": { @@ -1378,7 +1378,7 @@ } }, { - "accuracy": 0.9668064117431641, + "accuracy": 0.973444938659668, "total_bits": 4737212416, "w1": { "group_size": { @@ -1430,7 +1430,7 @@ } }, { - "accuracy": 0.9710707068443298, + "accuracy": 0.9766731262207031, "total_bits": 5093868288, "w1": { "group_size": { @@ -1479,7 +1479,7 @@ } }, { - "accuracy": 0.9821673035621643, + "accuracy": 0.9856899380683899, "total_bits": 5824164608, "w1": { "group_size": { @@ -1522,7 +1522,7 @@ } }, { - "accuracy": 0.9828959703445435, + "accuracy": 0.9860372543334961, "total_bits": 5910044672, "w1": { "group_size": { @@ -1565,7 +1565,7 @@ } }, { - "accuracy": 0.9829830527305603, + "accuracy": 0.9863629341125488, "total_bits": 6006579968, "w1": { "group_size": { @@ -1617,7 +1617,7 @@ } }, { - "accuracy": 0.9841697216033936, + "accuracy": 0.986440896987915, "total_bits": 6122381312, "w1": { "group_size": { @@ -1669,7 +1669,7 @@ } }, { - "accuracy": 0.9909576177597046, + "accuracy": 0.9924905300140381, "total_bits": 7391748864, "w1": { "group_size": { @@ -1721,7 +1721,7 @@ } }, { - "accuracy": 0.9907756447792053, + "accuracy": 0.9924672842025757, "total_bits": 7507550208, "w1": { "group_size": { @@ -1773,7 +1773,7 @@ } }, { - "accuracy": 0.9946053624153137, + "accuracy": 0.9953057169914246, "total_bits": 8550425344, "w1": { "group_size": { @@ -1816,7 +1816,7 @@ } }, { - "accuracy": 0.9948067665100098, + "accuracy": 0.9954277873039246, "total_bits": 8877312000, "w1": { "group_size": { @@ -1865,7 +1865,7 @@ } }, { - "accuracy": 0.9954165816307068, + "accuracy": 0.9958561062812805, "total_bits": 9674229760, "w1": { "group_size": { @@ -1911,7 +1911,7 @@ } }, { - "accuracy": 0.996612548828125, + "accuracy": 0.9966608881950378, "total_bits": 11318396928, "w1": { "group_size": { @@ -1953,7 +1953,7 @@ ], "model.layers.1.self_attn": [ { - "accuracy": 0.9068219661712646, + "accuracy": 0.9268010854721069, "total_bits": 89141248, "q_proj": { "group_size": { @@ -2017,7 +2017,7 @@ } }, { - "accuracy": 0.9158341884613037, + "accuracy": 0.9337272644042969, "total_bits": 91697152, "q_proj": { "group_size": { @@ -2081,7 +2081,7 @@ } }, { - "accuracy": 0.9260196685791016, + "accuracy": 0.9401007294654846, "total_bits": 95234560, "q_proj": { "group_size": { @@ -2145,7 +2145,7 @@ } }, { - "accuracy": 0.951158344745636, + "accuracy": 0.9576236605644226, "total_bits": 111748096, "q_proj": { "group_size": { @@ -2209,7 +2209,7 @@ } }, { - "accuracy": 0.9518294334411621, + "accuracy": 0.9587830901145935, "total_bits": 132388864, "q_proj": { "group_size": { @@ -2273,7 +2273,7 @@ } }, { - "accuracy": 0.9549686312675476, + "accuracy": 0.963056206703186, "total_bits": 132455936, "q_proj": { "group_size": { @@ -2337,7 +2337,7 @@ } }, { - "accuracy": 0.9645552635192871, + "accuracy": 0.9667342901229858, "total_bits": 169089024, "q_proj": { "group_size": { @@ -2389,7 +2389,7 @@ } }, { - "accuracy": 0.9669117331504822, + "accuracy": 0.9710803031921387, "total_bits": 169221632, "q_proj": { "group_size": { @@ -2441,7 +2441,7 @@ } }, { - "accuracy": 0.973613977432251, + "accuracy": 0.974844217300415, "total_bits": 170671104, "q_proj": { "group_size": { @@ -2493,7 +2493,7 @@ } }, { - "accuracy": 0.9746871590614319, + "accuracy": 0.9754443764686584, "total_bits": 173039616, "q_proj": { "group_size": { @@ -2545,7 +2545,7 @@ } }, { - "accuracy": 0.978078305721283, + "accuracy": 0.9786902666091919, "total_bits": 174398976, "q_proj": { "group_size": { @@ -2609,7 +2609,7 @@ } }, { - "accuracy": 0.979792594909668, + "accuracy": 0.9828436374664307, "total_bits": 175225856, "q_proj": { "group_size": { @@ -2673,7 +2673,7 @@ } }, { - "accuracy": 0.9797818660736084, + "accuracy": 0.9796522259712219, "total_bits": 178728960, "q_proj": { "group_size": { @@ -2734,7 +2734,7 @@ } }, { - "accuracy": 0.9815186858177185, + "accuracy": 0.9841315746307373, "total_bits": 181067776, "q_proj": { "group_size": { @@ -2795,7 +2795,7 @@ } }, { - "accuracy": 0.987970232963562, + "accuracy": 0.9882593154907227, "total_bits": 219944960, "q_proj": { "group_size": { @@ -2856,7 +2856,7 @@ } }, { - "accuracy": 0.9881510138511658, + "accuracy": 0.9905059337615967, "total_bits": 223010816, "q_proj": { "group_size": { @@ -2917,7 +2917,7 @@ } }, { - "accuracy": 0.9900265336036682, + "accuracy": 0.9895435571670532, "total_bits": 252975104, "q_proj": { "group_size": { @@ -2969,7 +2969,7 @@ } }, { - "accuracy": 0.9916079640388489, + "accuracy": 0.9933460354804993, "total_bits": 265314304, "q_proj": { "group_size": { @@ -3021,7 +3021,7 @@ } }, { - "accuracy": 0.9941823482513428, + "accuracy": 0.9945288300514221, "total_bits": 336861184, "q_proj": { "group_size": { @@ -3075,7 +3075,7 @@ ], "model.layers.1.block_sparse_moe": [ { - "accuracy": 0.9455899000167847, + "accuracy": 0.9530760049819946, "total_bits": 3157926400, "w1": { "group_size": { @@ -3127,7 +3127,7 @@ } }, { - "accuracy": 0.9476727843284607, + "accuracy": 0.9596723318099976, "total_bits": 3268026880, "w1": { "group_size": { @@ -3179,7 +3179,7 @@ } }, { - "accuracy": 0.9521229863166809, + "accuracy": 0.9608715176582336, "total_bits": 3652411392, "w1": { "group_size": { @@ -3228,7 +3228,7 @@ } }, { - "accuracy": 0.9522091150283813, + "accuracy": 0.9610072374343872, "total_bits": 4098056192, "w1": { "group_size": { @@ -3277,7 +3277,7 @@ } }, { - "accuracy": 0.9905972480773926, + "accuracy": 0.9908962249755859, "total_bits": 4621411072, "w1": { "group_size": { @@ -3329,7 +3329,7 @@ } }, { - "accuracy": 0.992286741733551, + "accuracy": 0.9920239448547363, "total_bits": 4737212416, "w1": { "group_size": { @@ -3381,7 +3381,7 @@ } }, { - "accuracy": 0.9924756288528442, + "accuracy": 0.9922430515289307, "total_bits": 5093868288, "w1": { "group_size": { @@ -3430,7 +3430,7 @@ } }, { - "accuracy": 0.9965330362319946, + "accuracy": 0.9966612458229065, "total_bits": 5824164608, "w1": { "group_size": { @@ -3473,7 +3473,7 @@ } }, { - "accuracy": 0.9961187243461609, + "accuracy": 0.995827853679657, "total_bits": 5910044672, "w1": { "group_size": { @@ -3516,7 +3516,7 @@ } }, { - "accuracy": 0.9971073269844055, + "accuracy": 0.9971485733985901, "total_bits": 6006579968, "w1": { "group_size": { @@ -3568,7 +3568,7 @@ } }, { - "accuracy": 0.9962476491928101, + "accuracy": 0.9962809681892395, "total_bits": 6122381312, "w1": { "group_size": { @@ -3620,7 +3620,7 @@ } }, { - "accuracy": 0.9982835650444031, + "accuracy": 0.9981284737586975, "total_bits": 7391748864, "w1": { "group_size": { @@ -3672,7 +3672,7 @@ } }, { - "accuracy": 0.997922420501709, + "accuracy": 0.997789204120636, "total_bits": 7507550208, "w1": { "group_size": { @@ -3724,7 +3724,7 @@ } }, { - "accuracy": 0.9987847208976746, + "accuracy": 0.998630940914154, "total_bits": 8550425344, "w1": { "group_size": { @@ -3767,7 +3767,7 @@ } }, { - "accuracy": 0.9987327456474304, + "accuracy": 0.9987000226974487, "total_bits": 8877312000, "w1": { "group_size": { @@ -3816,7 +3816,7 @@ } }, { - "accuracy": 0.9987553358078003, + "accuracy": 0.9987300634384155, "total_bits": 9674229760, "w1": { "group_size": { @@ -3862,7 +3862,7 @@ } }, { - "accuracy": 0.9988213181495667, + "accuracy": 0.9989659190177917, "total_bits": 11318396928, "w1": { "group_size": { @@ -3904,7 +3904,7 @@ ], "model.layers.2.self_attn": [ { - "accuracy": 0.9921015501022339, + "accuracy": 0.9907883405685425, "total_bits": 89141248, "q_proj": { "group_size": { @@ -3968,7 +3968,7 @@ } }, { - "accuracy": 0.9924874901771545, + "accuracy": 0.9915486574172974, "total_bits": 91697152, "q_proj": { "group_size": { @@ -4032,7 +4032,7 @@ } }, { - "accuracy": 0.9928646087646484, + "accuracy": 0.9922668933868408, "total_bits": 95234560, "q_proj": { "group_size": { @@ -4096,7 +4096,7 @@ } }, { - "accuracy": 0.9936481714248657, + "accuracy": 0.9926626086235046, "total_bits": 111748096, "q_proj": { "group_size": { @@ -4160,7 +4160,7 @@ } }, { - "accuracy": 0.9965026378631592, + "accuracy": 0.9961651563644409, "total_bits": 132388864, "q_proj": { "group_size": { @@ -4224,7 +4224,7 @@ } }, { - "accuracy": 0.9966097474098206, + "accuracy": 0.996486485004425, "total_bits": 132455936, "q_proj": { "group_size": { @@ -4288,7 +4288,7 @@ } }, { - "accuracy": 0.9970696568489075, + "accuracy": 0.9967833161354065, "total_bits": 169089024, "q_proj": { "group_size": { @@ -4340,7 +4340,7 @@ } }, { - "accuracy": 0.9970926642417908, + "accuracy": 0.9971050024032593, "total_bits": 169221632, "q_proj": { "group_size": { @@ -4392,7 +4392,7 @@ } }, { - "accuracy": 0.9972810745239258, + "accuracy": 0.9971670508384705, "total_bits": 170671104, "q_proj": { "group_size": { @@ -4444,7 +4444,7 @@ } }, { - "accuracy": 0.9973836541175842, + "accuracy": 0.9972885251045227, "total_bits": 173039616, "q_proj": { "group_size": { @@ -4496,7 +4496,7 @@ } }, { - "accuracy": 0.9980794787406921, + "accuracy": 0.998052179813385, "total_bits": 174398976, "q_proj": { "group_size": { @@ -4560,7 +4560,7 @@ } }, { - "accuracy": 0.9982506036758423, + "accuracy": 0.9979001879692078, "total_bits": 175225856, "q_proj": { "group_size": { @@ -4624,7 +4624,7 @@ } }, { - "accuracy": 0.9983083009719849, + "accuracy": 0.9981464147567749, "total_bits": 178728960, "q_proj": { "group_size": { @@ -4685,7 +4685,7 @@ } }, { - "accuracy": 0.9983274936676025, + "accuracy": 0.9981463551521301, "total_bits": 181067776, "q_proj": { "group_size": { @@ -4746,7 +4746,7 @@ } }, { - "accuracy": 0.9988442063331604, + "accuracy": 0.9986666440963745, "total_bits": 219944960, "q_proj": { "group_size": { @@ -4807,7 +4807,7 @@ } }, { - "accuracy": 0.9988612532615662, + "accuracy": 0.998752772808075, "total_bits": 223010816, "q_proj": { "group_size": { @@ -4868,7 +4868,7 @@ } }, { - "accuracy": 0.998910129070282, + "accuracy": 0.9987011551856995, "total_bits": 252975104, "q_proj": { "group_size": { @@ -4920,7 +4920,7 @@ } }, { - "accuracy": 0.9990594983100891, + "accuracy": 0.9988893866539001, "total_bits": 265314304, "q_proj": { "group_size": { @@ -4972,7 +4972,7 @@ } }, { - "accuracy": 0.9991552829742432, + "accuracy": 0.9989694356918335, "total_bits": 336861184, "q_proj": { "group_size": { @@ -5026,7 +5026,7 @@ ], "model.layers.2.block_sparse_moe": [ { - "accuracy": 0.9896231889724731, + "accuracy": 0.990573525428772, "total_bits": 3157926400, "w1": { "group_size": { @@ -5078,7 +5078,7 @@ } }, { - "accuracy": 0.9900255799293518, + "accuracy": 0.9909493327140808, "total_bits": 3268026880, "w1": { "group_size": { @@ -5130,7 +5130,7 @@ } }, { - "accuracy": 0.9911137223243713, + "accuracy": 0.9919142723083496, "total_bits": 3652411392, "w1": { "group_size": { @@ -5179,7 +5179,7 @@ } }, { - "accuracy": 0.9913663268089294, + "accuracy": 0.9921355843544006, "total_bits": 4098056192, "w1": { "group_size": { @@ -5228,7 +5228,7 @@ } }, { - "accuracy": 0.995244026184082, + "accuracy": 0.9956555962562561, "total_bits": 4621411072, "w1": { "group_size": { @@ -5280,7 +5280,7 @@ } }, { - "accuracy": 0.9956021904945374, + "accuracy": 0.9959881901741028, "total_bits": 4737212416, "w1": { "group_size": { @@ -5332,7 +5332,7 @@ } }, { - "accuracy": 0.9959226846694946, + "accuracy": 0.996266782283783, "total_bits": 5093868288, "w1": { "group_size": { @@ -5381,7 +5381,7 @@ } }, { - "accuracy": 0.9976073503494263, + "accuracy": 0.9978137016296387, "total_bits": 5824164608, "w1": { "group_size": { @@ -5424,7 +5424,7 @@ } }, { - "accuracy": 0.997805655002594, + "accuracy": 0.9979930520057678, "total_bits": 5910044672, "w1": { "group_size": { @@ -5467,7 +5467,7 @@ } }, { - "accuracy": 0.9976128935813904, + "accuracy": 0.9978152513504028, "total_bits": 6006579968, "w1": { "group_size": { @@ -5519,7 +5519,7 @@ } }, { - "accuracy": 0.9979081749916077, + "accuracy": 0.9980902671813965, "total_bits": 6122381312, "w1": { "group_size": { @@ -5571,7 +5571,7 @@ } }, { - "accuracy": 0.998801589012146, + "accuracy": 0.9989048838615417, "total_bits": 7391748864, "w1": { "group_size": { @@ -5623,7 +5623,7 @@ } }, { - "accuracy": 0.9988908171653748, + "accuracy": 0.998988926410675, "total_bits": 7507550208, "w1": { "group_size": { @@ -5675,7 +5675,7 @@ } }, { - "accuracy": 0.9993197917938232, + "accuracy": 0.9993789792060852, "total_bits": 8550425344, "w1": { "group_size": { @@ -5718,7 +5718,7 @@ } }, { - "accuracy": 0.999345064163208, + "accuracy": 0.9994022846221924, "total_bits": 8877312000, "w1": { "group_size": { @@ -5767,7 +5767,7 @@ } }, { - "accuracy": 0.9993914365768433, + "accuracy": 0.9994425773620605, "total_bits": 9674229760, "w1": { "group_size": { @@ -5813,7 +5813,7 @@ } }, { - "accuracy": 0.9996505975723267, + "accuracy": 0.9996842741966248, "total_bits": 11318396928, "w1": { "group_size": { @@ -5855,7 +5855,7 @@ ], "model.layers.3.self_attn": [ { - "accuracy": 0.9850022196769714, + "accuracy": 0.9829213619232178, "total_bits": 89141248, "q_proj": { "group_size": { @@ -5919,7 +5919,7 @@ } }, { - "accuracy": 0.9853205680847168, + "accuracy": 0.983039140701294, "total_bits": 91697152, "q_proj": { "group_size": { @@ -5983,7 +5983,7 @@ } }, { - "accuracy": 0.9863635897636414, + "accuracy": 0.983917772769928, "total_bits": 95234560, "q_proj": { "group_size": { @@ -6047,7 +6047,7 @@ } }, { - "accuracy": 0.9875063896179199, + "accuracy": 0.9848746657371521, "total_bits": 111748096, "q_proj": { "group_size": { @@ -6111,7 +6111,7 @@ } }, { - "accuracy": 0.9936282634735107, + "accuracy": 0.9937005639076233, "total_bits": 132388864, "q_proj": { "group_size": { @@ -6175,7 +6175,7 @@ } }, { - "accuracy": 0.9939200282096863, + "accuracy": 0.9938347935676575, "total_bits": 132455936, "q_proj": { "group_size": { @@ -6239,7 +6239,7 @@ } }, { - "accuracy": 0.9946187138557434, + "accuracy": 0.9945495128631592, "total_bits": 169089024, "q_proj": { "group_size": { @@ -6291,7 +6291,7 @@ } }, { - "accuracy": 0.9950005412101746, + "accuracy": 0.9947211742401123, "total_bits": 169221632, "q_proj": { "group_size": { @@ -6343,7 +6343,7 @@ } }, { - "accuracy": 0.9956021308898926, + "accuracy": 0.9950836300849915, "total_bits": 170671104, "q_proj": { "group_size": { @@ -6395,7 +6395,7 @@ } }, { - "accuracy": 0.9957845211029053, + "accuracy": 0.9953095316886902, "total_bits": 173039616, "q_proj": { "group_size": { @@ -6447,7 +6447,7 @@ } }, { - "accuracy": 0.9966782927513123, + "accuracy": 0.9966582655906677, "total_bits": 174398976, "q_proj": { "group_size": { @@ -6511,7 +6511,7 @@ } }, { - "accuracy": 0.9970648884773254, + "accuracy": 0.9969700574874878, "total_bits": 175225856, "q_proj": { "group_size": { @@ -6575,7 +6575,7 @@ } }, { - "accuracy": 0.9969496726989746, + "accuracy": 0.9968969821929932, "total_bits": 178728960, "q_proj": { "group_size": { @@ -6636,7 +6636,7 @@ } }, { - "accuracy": 0.9973318576812744, + "accuracy": 0.997218132019043, "total_bits": 181067776, "q_proj": { "group_size": { @@ -6697,7 +6697,7 @@ } }, { - "accuracy": 0.9982284903526306, + "accuracy": 0.9973342418670654, "total_bits": 219944960, "q_proj": { "group_size": { @@ -6758,7 +6758,7 @@ } }, { - "accuracy": 0.9984152317047119, + "accuracy": 0.9976738691329956, "total_bits": 223010816, "q_proj": { "group_size": { @@ -6819,7 +6819,7 @@ } }, { - "accuracy": 0.9983686804771423, + "accuracy": 0.9974257349967957, "total_bits": 252975104, "q_proj": { "group_size": { @@ -6871,7 +6871,7 @@ } }, { - "accuracy": 0.998618483543396, + "accuracy": 0.9975801706314087, "total_bits": 265314304, "q_proj": { "group_size": { @@ -6923,7 +6923,7 @@ } }, { - "accuracy": 0.9986606240272522, + "accuracy": 0.9977553486824036, "total_bits": 336861184, "q_proj": { "group_size": { @@ -6977,7 +6977,7 @@ ], "model.layers.3.block_sparse_moe": [ { - "accuracy": 0.9871391654014587, + "accuracy": 0.9890608191490173, "total_bits": 3157926400, "w1": { "group_size": { @@ -7029,7 +7029,7 @@ } }, { - "accuracy": 0.9876048564910889, + "accuracy": 0.989456057548523, "total_bits": 3268026880, "w1": { "group_size": { @@ -7081,7 +7081,7 @@ } }, { - "accuracy": 0.9889822602272034, + "accuracy": 0.9906179904937744, "total_bits": 3652411392, "w1": { "group_size": { @@ -7130,7 +7130,7 @@ } }, { - "accuracy": 0.9893051385879517, + "accuracy": 0.9908885955810547, "total_bits": 4098056192, "w1": { "group_size": { @@ -7179,7 +7179,7 @@ } }, { - "accuracy": 0.9939988851547241, + "accuracy": 0.9948912858963013, "total_bits": 4621411072, "w1": { "group_size": { @@ -7231,7 +7231,7 @@ } }, { - "accuracy": 0.9944663643836975, + "accuracy": 0.9952893257141113, "total_bits": 4737212416, "w1": { "group_size": { @@ -7283,7 +7283,7 @@ } }, { - "accuracy": 0.9948609471321106, + "accuracy": 0.9956207275390625, "total_bits": 5093868288, "w1": { "group_size": { @@ -7332,7 +7332,7 @@ } }, { - "accuracy": 0.9969760179519653, + "accuracy": 0.9974277019500732, "total_bits": 5824164608, "w1": { "group_size": { @@ -7375,7 +7375,7 @@ } }, { - "accuracy": 0.9972323179244995, + "accuracy": 0.9976444244384766, "total_bits": 5910044672, "w1": { "group_size": { @@ -7418,7 +7418,7 @@ } }, { - "accuracy": 0.9969810843467712, + "accuracy": 0.9974286556243896, "total_bits": 6006579968, "w1": { "group_size": { @@ -7470,7 +7470,7 @@ } }, { - "accuracy": 0.9973636269569397, + "accuracy": 0.9977566599845886, "total_bits": 6122381312, "w1": { "group_size": { @@ -7522,7 +7522,7 @@ } }, { - "accuracy": 0.998485267162323, + "accuracy": 0.998708963394165, "total_bits": 7391748864, "w1": { "group_size": { @@ -7574,7 +7574,7 @@ } }, { - "accuracy": 0.9986110329627991, + "accuracy": 0.9988176226615906, "total_bits": 7507550208, "w1": { "group_size": { @@ -7626,7 +7626,7 @@ } }, { - "accuracy": 0.9991430044174194, + "accuracy": 0.9992673993110657, "total_bits": 8550425344, "w1": { "group_size": { @@ -7669,7 +7669,7 @@ } }, { - "accuracy": 0.9991754293441772, + "accuracy": 0.9992954730987549, "total_bits": 8877312000, "w1": { "group_size": { @@ -7718,7 +7718,7 @@ } }, { - "accuracy": 0.9992324113845825, + "accuracy": 0.9993429780006409, "total_bits": 9674229760, "w1": { "group_size": { @@ -7764,7 +7764,7 @@ } }, { - "accuracy": 0.9995652437210083, + "accuracy": 0.9996253252029419, "total_bits": 11318396928, "w1": { "group_size": { @@ -7806,7 +7806,7 @@ ], "model.layers.4.self_attn": [ { - "accuracy": 0.985275387763977, + "accuracy": 0.9876067638397217, "total_bits": 89141248, "q_proj": { "group_size": { @@ -7870,7 +7870,7 @@ } }, { - "accuracy": 0.9857286214828491, + "accuracy": 0.9875608086585999, "total_bits": 91697152, "q_proj": { "group_size": { @@ -7934,7 +7934,7 @@ } }, { - "accuracy": 0.9867607355117798, + "accuracy": 0.9891496896743774, "total_bits": 95234560, "q_proj": { "group_size": { @@ -7998,7 +7998,7 @@ } }, { - "accuracy": 0.9882041811943054, + "accuracy": 0.9901385307312012, "total_bits": 111748096, "q_proj": { "group_size": { @@ -8062,7 +8062,7 @@ } }, { - "accuracy": 0.9906193017959595, + "accuracy": 0.9927072525024414, "total_bits": 132388864, "q_proj": { "group_size": { @@ -8126,7 +8126,7 @@ } }, { - "accuracy": 0.9905970692634583, + "accuracy": 0.9920412302017212, "total_bits": 132455936, "q_proj": { "group_size": { @@ -8190,7 +8190,7 @@ } }, { - "accuracy": 0.9917722940444946, + "accuracy": 0.9935618042945862, "total_bits": 169089024, "q_proj": { "group_size": { @@ -8242,7 +8242,7 @@ } }, { - "accuracy": 0.9918293952941895, + "accuracy": 0.9932540655136108, "total_bits": 169221632, "q_proj": { "group_size": { @@ -8294,7 +8294,7 @@ } }, { - "accuracy": 0.9938073754310608, + "accuracy": 0.9939872026443481, "total_bits": 170671104, "q_proj": { "group_size": { @@ -8346,7 +8346,7 @@ } }, { - "accuracy": 0.993617832660675, + "accuracy": 0.994656503200531, "total_bits": 173039616, "q_proj": { "group_size": { @@ -8398,7 +8398,7 @@ } }, { - "accuracy": 0.9956769347190857, + "accuracy": 0.9965835809707642, "total_bits": 174398976, "q_proj": { "group_size": { @@ -8462,7 +8462,7 @@ } }, { - "accuracy": 0.995989203453064, + "accuracy": 0.996671199798584, "total_bits": 175225856, "q_proj": { "group_size": { @@ -8526,7 +8526,7 @@ } }, { - "accuracy": 0.9958775043487549, + "accuracy": 0.996809720993042, "total_bits": 178728960, "q_proj": { "group_size": { @@ -8587,7 +8587,7 @@ } }, { - "accuracy": 0.9962911009788513, + "accuracy": 0.9968702793121338, "total_bits": 181067776, "q_proj": { "group_size": { @@ -8648,7 +8648,7 @@ } }, { - "accuracy": 0.9968873858451843, + "accuracy": 0.997231125831604, "total_bits": 219944960, "q_proj": { "group_size": { @@ -8709,7 +8709,7 @@ } }, { - "accuracy": 0.996955156326294, + "accuracy": 0.9976534247398376, "total_bits": 223010816, "q_proj": { "group_size": { @@ -8770,7 +8770,7 @@ } }, { - "accuracy": 0.9970011115074158, + "accuracy": 0.9973250031471252, "total_bits": 252975104, "q_proj": { "group_size": { @@ -8822,7 +8822,7 @@ } }, { - "accuracy": 0.9971234202384949, + "accuracy": 0.9977023601531982, "total_bits": 265314304, "q_proj": { "group_size": { @@ -8874,7 +8874,7 @@ } }, { - "accuracy": 0.9973219037055969, + "accuracy": 0.997814953327179, "total_bits": 336861184, "q_proj": { "group_size": { @@ -8928,7 +8928,7 @@ ], "model.layers.4.block_sparse_moe": [ { - "accuracy": 0.9825997352600098, + "accuracy": 0.9859257340431213, "total_bits": 3157926400, "w1": { "group_size": { @@ -8980,7 +8980,7 @@ } }, { - "accuracy": 0.9832949638366699, + "accuracy": 0.9864931702613831, "total_bits": 3268026880, "w1": { "group_size": { @@ -9032,7 +9032,7 @@ } }, { - "accuracy": 0.9851880669593811, + "accuracy": 0.9879869222640991, "total_bits": 3652411392, "w1": { "group_size": { @@ -9081,7 +9081,7 @@ } }, { - "accuracy": 0.9856194257736206, + "accuracy": 0.9883233308792114, "total_bits": 4098056192, "w1": { "group_size": { @@ -9130,7 +9130,7 @@ } }, { - "accuracy": 0.991915762424469, + "accuracy": 0.9934606552124023, "total_bits": 4621411072, "w1": { "group_size": { @@ -9182,7 +9182,7 @@ } }, { - "accuracy": 0.9925510883331299, + "accuracy": 0.9939739108085632, "total_bits": 4737212416, "w1": { "group_size": { @@ -9234,7 +9234,7 @@ } }, { - "accuracy": 0.9930846691131592, + "accuracy": 0.9943850040435791, "total_bits": 5093868288, "w1": { "group_size": { @@ -9283,7 +9283,7 @@ } }, { - "accuracy": 0.9958922266960144, + "accuracy": 0.9966764450073242, "total_bits": 5824164608, "w1": { "group_size": { @@ -9326,7 +9326,7 @@ } }, { - "accuracy": 0.9962383508682251, + "accuracy": 0.9969539642333984, "total_bits": 5910044672, "w1": { "group_size": { @@ -9369,7 +9369,7 @@ } }, { - "accuracy": 0.9959345459938049, + "accuracy": 0.9967129826545715, "total_bits": 6006579968, "w1": { "group_size": { @@ -9421,7 +9421,7 @@ } }, { - "accuracy": 0.9964479804039001, + "accuracy": 0.9971298575401306, "total_bits": 6122381312, "w1": { "group_size": { @@ -9473,7 +9473,7 @@ } }, { - "accuracy": 0.9979608058929443, + "accuracy": 0.9983523488044739, "total_bits": 7391748864, "w1": { "group_size": { @@ -9525,7 +9525,7 @@ } }, { - "accuracy": 0.9981251955032349, + "accuracy": 0.9984872341156006, "total_bits": 7507550208, "w1": { "group_size": { @@ -9577,7 +9577,7 @@ } }, { - "accuracy": 0.9988373517990112, + "accuracy": 0.9990604519844055, "total_bits": 8550425344, "w1": { "group_size": { @@ -9620,7 +9620,7 @@ } }, { - "accuracy": 0.9988889694213867, + "accuracy": 0.9991037845611572, "total_bits": 8877312000, "w1": { "group_size": { @@ -9669,7 +9669,7 @@ } }, { - "accuracy": 0.9989659786224365, + "accuracy": 0.999162495136261, "total_bits": 9674229760, "w1": { "group_size": { @@ -9715,7 +9715,7 @@ } }, { - "accuracy": 0.9994115233421326, + "accuracy": 0.9995267391204834, "total_bits": 11318396928, "w1": { "group_size": { @@ -9757,7 +9757,7 @@ ], "model.layers.5.self_attn": [ { - "accuracy": 0.9824051260948181, + "accuracy": 0.9863646626472473, "total_bits": 89141248, "q_proj": { "group_size": { @@ -9821,7 +9821,7 @@ } }, { - "accuracy": 0.9831342697143555, + "accuracy": 0.9868806600570679, "total_bits": 91697152, "q_proj": { "group_size": { @@ -9885,7 +9885,7 @@ } }, { - "accuracy": 0.9843595623970032, + "accuracy": 0.9879058003425598, "total_bits": 95234560, "q_proj": { "group_size": { @@ -9949,7 +9949,7 @@ } }, { - "accuracy": 0.9865956902503967, + "accuracy": 0.9891868829727173, "total_bits": 111748096, "q_proj": { "group_size": { @@ -10013,7 +10013,7 @@ } }, { - "accuracy": 0.9909732937812805, + "accuracy": 0.9934185743331909, "total_bits": 132388864, "q_proj": { "group_size": { @@ -10077,7 +10077,7 @@ } }, { - "accuracy": 0.9912468194961548, + "accuracy": 0.9936191439628601, "total_bits": 132455936, "q_proj": { "group_size": { @@ -10141,7 +10141,7 @@ } }, { - "accuracy": 0.9927248954772949, + "accuracy": 0.994598388671875, "total_bits": 169089024, "q_proj": { "group_size": { @@ -10193,7 +10193,7 @@ } }, { - "accuracy": 0.9932001233100891, + "accuracy": 0.9948163628578186, "total_bits": 169221632, "q_proj": { "group_size": { @@ -10245,7 +10245,7 @@ } }, { - "accuracy": 0.9942808151245117, + "accuracy": 0.995417594909668, "total_bits": 170671104, "q_proj": { "group_size": { @@ -10297,7 +10297,7 @@ } }, { - "accuracy": 0.9946455955505371, + "accuracy": 0.996032178401947, "total_bits": 173039616, "q_proj": { "group_size": { @@ -10349,7 +10349,7 @@ } }, { - "accuracy": 0.9957332611083984, + "accuracy": 0.996302604675293, "total_bits": 174398976, "q_proj": { "group_size": { @@ -10413,7 +10413,7 @@ } }, { - "accuracy": 0.9960319399833679, + "accuracy": 0.9967555999755859, "total_bits": 175225856, "q_proj": { "group_size": { @@ -10477,7 +10477,7 @@ } }, { - "accuracy": 0.9962198734283447, + "accuracy": 0.9965559840202332, "total_bits": 178728960, "q_proj": { "group_size": { @@ -10538,7 +10538,7 @@ } }, { - "accuracy": 0.9965771436691284, + "accuracy": 0.9971303939819336, "total_bits": 181067776, "q_proj": { "group_size": { @@ -10599,7 +10599,7 @@ } }, { - "accuracy": 0.99733966588974, + "accuracy": 0.9973838329315186, "total_bits": 219944960, "q_proj": { "group_size": { @@ -10660,7 +10660,7 @@ } }, { - "accuracy": 0.9977163672447205, + "accuracy": 0.997671365737915, "total_bits": 223010816, "q_proj": { "group_size": { @@ -10721,7 +10721,7 @@ } }, { - "accuracy": 0.9975889325141907, + "accuracy": 0.9974393248558044, "total_bits": 252975104, "q_proj": { "group_size": { @@ -10773,7 +10773,7 @@ } }, { - "accuracy": 0.9980098605155945, + "accuracy": 0.9977273941040039, "total_bits": 265314304, "q_proj": { "group_size": { @@ -10825,7 +10825,7 @@ } }, { - "accuracy": 0.9978362917900085, + "accuracy": 0.9981924295425415, "total_bits": 336861184, "q_proj": { "group_size": { @@ -10879,7 +10879,7 @@ ], "model.layers.5.block_sparse_moe": [ { - "accuracy": 0.9800121188163757, + "accuracy": 0.9845542907714844, "total_bits": 3157926400, "w1": { "group_size": { @@ -10931,7 +10931,7 @@ } }, { - "accuracy": 0.9807761311531067, + "accuracy": 0.9851581454277039, "total_bits": 3268026880, "w1": { "group_size": { @@ -10983,7 +10983,7 @@ } }, { - "accuracy": 0.9831357002258301, + "accuracy": 0.9869306087493896, "total_bits": 3652411392, "w1": { "group_size": { @@ -11032,7 +11032,7 @@ } }, { - "accuracy": 0.9836696982383728, + "accuracy": 0.9873265027999878, "total_bits": 4098056192, "w1": { "group_size": { @@ -11081,7 +11081,7 @@ } }, { - "accuracy": 0.9906938076019287, + "accuracy": 0.9928094148635864, "total_bits": 4621411072, "w1": { "group_size": { @@ -11133,7 +11133,7 @@ } }, { - "accuracy": 0.9914302825927734, + "accuracy": 0.993369460105896, "total_bits": 4737212416, "w1": { "group_size": { @@ -11185,7 +11185,7 @@ } }, { - "accuracy": 0.9921103119850159, + "accuracy": 0.9938710927963257, "total_bits": 5093868288, "w1": { "group_size": { @@ -11234,7 +11234,7 @@ } }, { - "accuracy": 0.9952771663665771, + "accuracy": 0.9963454008102417, "total_bits": 5824164608, "w1": { "group_size": { @@ -11277,7 +11277,7 @@ } }, { - "accuracy": 0.9956773519515991, + "accuracy": 0.9966510534286499, "total_bits": 5910044672, "w1": { "group_size": { @@ -11320,7 +11320,7 @@ } }, { - "accuracy": 0.9953118562698364, + "accuracy": 0.9963803291320801, "total_bits": 6006579968, "w1": { "group_size": { @@ -11372,7 +11372,7 @@ } }, { - "accuracy": 0.9959110021591187, + "accuracy": 0.9968376755714417, "total_bits": 6122381312, "w1": { "group_size": { @@ -11424,7 +11424,7 @@ } }, { - "accuracy": 0.9976473450660706, + "accuracy": 0.998184859752655, "total_bits": 7391748864, "w1": { "group_size": { @@ -11476,7 +11476,7 @@ } }, { - "accuracy": 0.9978458881378174, + "accuracy": 0.9983357787132263, "total_bits": 7507550208, "w1": { "group_size": { @@ -11528,7 +11528,7 @@ } }, { - "accuracy": 0.9986577033996582, + "accuracy": 0.9989680051803589, "total_bits": 8550425344, "w1": { "group_size": { @@ -11571,7 +11571,7 @@ } }, { - "accuracy": 0.9987152814865112, + "accuracy": 0.9990135431289673, "total_bits": 8877312000, "w1": { "group_size": { @@ -11620,7 +11620,7 @@ } }, { - "accuracy": 0.9988142251968384, + "accuracy": 0.9990869760513306, "total_bits": 9674229760, "w1": { "group_size": { @@ -11666,7 +11666,7 @@ } }, { - "accuracy": 0.9993132948875427, + "accuracy": 0.9994814991950989, "total_bits": 11318396928, "w1": { "group_size": { @@ -11708,7 +11708,7 @@ ], "model.layers.6.self_attn": [ { - "accuracy": 0.9827138185501099, + "accuracy": 0.9875385761260986, "total_bits": 89141248, "q_proj": { "group_size": { @@ -11772,7 +11772,7 @@ } }, { - "accuracy": 0.9835598468780518, + "accuracy": 0.9881197214126587, "total_bits": 91697152, "q_proj": { "group_size": { @@ -11836,7 +11836,7 @@ } }, { - "accuracy": 0.9848535060882568, + "accuracy": 0.9890980124473572, "total_bits": 95234560, "q_proj": { "group_size": { @@ -11900,7 +11900,7 @@ } }, { - "accuracy": 0.9876061677932739, + "accuracy": 0.9907206296920776, "total_bits": 111748096, "q_proj": { "group_size": { @@ -11964,7 +11964,7 @@ } }, { - "accuracy": 0.9895716905593872, + "accuracy": 0.9927762746810913, "total_bits": 132388864, "q_proj": { "group_size": { @@ -12028,7 +12028,7 @@ } }, { - "accuracy": 0.9901067018508911, + "accuracy": 0.9927448034286499, "total_bits": 132455936, "q_proj": { "group_size": { @@ -12092,7 +12092,7 @@ } }, { - "accuracy": 0.9917066097259521, + "accuracy": 0.993926465511322, "total_bits": 169089024, "q_proj": { "group_size": { @@ -12144,7 +12144,7 @@ } }, { - "accuracy": 0.992316484451294, + "accuracy": 0.9939043521881104, "total_bits": 169221632, "q_proj": { "group_size": { @@ -12196,7 +12196,7 @@ } }, { - "accuracy": 0.9932737350463867, + "accuracy": 0.9949092864990234, "total_bits": 170671104, "q_proj": { "group_size": { @@ -12248,7 +12248,7 @@ } }, { - "accuracy": 0.9935182929039001, + "accuracy": 0.9951657652854919, "total_bits": 173039616, "q_proj": { "group_size": { @@ -12300,7 +12300,7 @@ } }, { - "accuracy": 0.994907021522522, + "accuracy": 0.9962393641471863, "total_bits": 174398976, "q_proj": { "group_size": { @@ -12364,7 +12364,7 @@ } }, { - "accuracy": 0.995290219783783, + "accuracy": 0.996502697467804, "total_bits": 175225856, "q_proj": { "group_size": { @@ -12428,7 +12428,7 @@ } }, { - "accuracy": 0.9954949617385864, + "accuracy": 0.9965805411338806, "total_bits": 178728960, "q_proj": { "group_size": { @@ -12489,7 +12489,7 @@ } }, { - "accuracy": 0.9957065582275391, + "accuracy": 0.9967541098594666, "total_bits": 181067776, "q_proj": { "group_size": { @@ -12550,7 +12550,7 @@ } }, { - "accuracy": 0.9969029426574707, + "accuracy": 0.9975075721740723, "total_bits": 219944960, "q_proj": { "group_size": { @@ -12611,7 +12611,7 @@ } }, { - "accuracy": 0.9972619414329529, + "accuracy": 0.9975199103355408, "total_bits": 223010816, "q_proj": { "group_size": { @@ -12672,7 +12672,7 @@ } }, { - "accuracy": 0.99713534116745, + "accuracy": 0.9976407885551453, "total_bits": 252975104, "q_proj": { "group_size": { @@ -12724,7 +12724,7 @@ } }, { - "accuracy": 0.997477650642395, + "accuracy": 0.9977498650550842, "total_bits": 265314304, "q_proj": { "group_size": { @@ -12776,7 +12776,7 @@ } }, { - "accuracy": 0.99775630235672, + "accuracy": 0.9980379343032837, "total_bits": 336861184, "q_proj": { "group_size": { @@ -12830,7 +12830,7 @@ ], "model.layers.6.block_sparse_moe": [ { - "accuracy": 0.9781857132911682, + "accuracy": 0.9840726256370544, "total_bits": 3157926400, "w1": { "group_size": { @@ -12882,7 +12882,7 @@ } }, { - "accuracy": 0.9790080189704895, + "accuracy": 0.9846981763839722, "total_bits": 3268026880, "w1": { "group_size": { @@ -12934,7 +12934,7 @@ } }, { - "accuracy": 0.9817783236503601, + "accuracy": 0.9866693615913391, "total_bits": 3652411392, "w1": { "group_size": { @@ -12983,7 +12983,7 @@ } }, { - "accuracy": 0.9824177622795105, + "accuracy": 0.9871244430541992, "total_bits": 4098056192, "w1": { "group_size": { @@ -13032,7 +13032,7 @@ } }, { - "accuracy": 0.989850640296936, + "accuracy": 0.9925897717475891, "total_bits": 4621411072, "w1": { "group_size": { @@ -13084,7 +13084,7 @@ } }, { - "accuracy": 0.9906392097473145, + "accuracy": 0.9931681156158447, "total_bits": 4737212416, "w1": { "group_size": { @@ -13136,7 +13136,7 @@ } }, { - "accuracy": 0.9914728999137878, + "accuracy": 0.9937629699707031, "total_bits": 5093868288, "w1": { "group_size": { @@ -13185,7 +13185,7 @@ } }, { - "accuracy": 0.9948198795318604, + "accuracy": 0.9962121248245239, "total_bits": 5824164608, "w1": { "group_size": { @@ -13228,7 +13228,7 @@ } }, { - "accuracy": 0.9952569007873535, + "accuracy": 0.996528685092926, "total_bits": 5910044672, "w1": { "group_size": { @@ -13271,7 +13271,7 @@ } }, { - "accuracy": 0.9948891401290894, + "accuracy": 0.9962703585624695, "total_bits": 6006579968, "w1": { "group_size": { @@ -13323,7 +13323,7 @@ } }, { - "accuracy": 0.9955287575721741, + "accuracy": 0.9967359900474548, "total_bits": 6122381312, "w1": { "group_size": { @@ -13375,7 +13375,7 @@ } }, { - "accuracy": 0.9974331259727478, + "accuracy": 0.9981252551078796, "total_bits": 7391748864, "w1": { "group_size": { @@ -13427,7 +13427,7 @@ } }, { - "accuracy": 0.997631847858429, + "accuracy": 0.9982702732086182, "total_bits": 7507550208, "w1": { "group_size": { @@ -13479,7 +13479,7 @@ } }, { - "accuracy": 0.9985304474830627, + "accuracy": 0.99892258644104, "total_bits": 8550425344, "w1": { "group_size": { @@ -13522,7 +13522,7 @@ } }, { - "accuracy": 0.9986029267311096, + "accuracy": 0.9989784359931946, "total_bits": 8877312000, "w1": { "group_size": { @@ -13571,7 +13571,7 @@ } }, { - "accuracy": 0.9987285733222961, + "accuracy": 0.9990677833557129, "total_bits": 9674229760, "w1": { "group_size": { @@ -13617,7 +13617,7 @@ } }, { - "accuracy": 0.9992542862892151, + "accuracy": 0.9994519352912903, "total_bits": 11318396928, "w1": { "group_size": { @@ -13659,7 +13659,7 @@ ], "model.layers.7.self_attn": [ { - "accuracy": 0.9756568074226379, + "accuracy": 0.9824022054672241, "total_bits": 89141248, "q_proj": { "group_size": { @@ -13723,7 +13723,7 @@ } }, { - "accuracy": 0.9775936007499695, + "accuracy": 0.9828293919563293, "total_bits": 91697152, "q_proj": { "group_size": { @@ -13787,7 +13787,7 @@ } }, { - "accuracy": 0.9788615703582764, + "accuracy": 0.9837542176246643, "total_bits": 95234560, "q_proj": { "group_size": { @@ -13851,7 +13851,7 @@ } }, { - "accuracy": 0.9814447164535522, + "accuracy": 0.9851705431938171, "total_bits": 111748096, "q_proj": { "group_size": { @@ -13915,7 +13915,7 @@ } }, { - "accuracy": 0.9889373779296875, + "accuracy": 0.9911399483680725, "total_bits": 132388864, "q_proj": { "group_size": { @@ -13979,7 +13979,7 @@ } }, { - "accuracy": 0.9891265034675598, + "accuracy": 0.9913239479064941, "total_bits": 132455936, "q_proj": { "group_size": { @@ -14043,7 +14043,7 @@ } }, { - "accuracy": 0.9913443922996521, + "accuracy": 0.9922885298728943, "total_bits": 169089024, "q_proj": { "group_size": { @@ -14095,7 +14095,7 @@ } }, { - "accuracy": 0.9914593696594238, + "accuracy": 0.9925828576087952, "total_bits": 169221632, "q_proj": { "group_size": { @@ -14147,7 +14147,7 @@ } }, { - "accuracy": 0.9921685457229614, + "accuracy": 0.9935244917869568, "total_bits": 170671104, "q_proj": { "group_size": { @@ -14199,7 +14199,7 @@ } }, { - "accuracy": 0.9922220706939697, + "accuracy": 0.9932875633239746, "total_bits": 173039616, "q_proj": { "group_size": { @@ -14251,7 +14251,7 @@ } }, { - "accuracy": 0.9944911599159241, + "accuracy": 0.9952410459518433, "total_bits": 174398976, "q_proj": { "group_size": { @@ -14315,7 +14315,7 @@ } }, { - "accuracy": 0.9948334693908691, + "accuracy": 0.9952657222747803, "total_bits": 175225856, "q_proj": { "group_size": { @@ -14379,7 +14379,7 @@ } }, { - "accuracy": 0.9950476288795471, + "accuracy": 0.9955123662948608, "total_bits": 178728960, "q_proj": { "group_size": { @@ -14440,7 +14440,7 @@ } }, { - "accuracy": 0.9954417943954468, + "accuracy": 0.9955897331237793, "total_bits": 181067776, "q_proj": { "group_size": { @@ -14501,7 +14501,7 @@ } }, { - "accuracy": 0.9965739846229553, + "accuracy": 0.9966443181037903, "total_bits": 219944960, "q_proj": { "group_size": { @@ -14562,7 +14562,7 @@ } }, { - "accuracy": 0.9965811967849731, + "accuracy": 0.9967734217643738, "total_bits": 223010816, "q_proj": { "group_size": { @@ -14623,7 +14623,7 @@ } }, { - "accuracy": 0.9968956708908081, + "accuracy": 0.9967808127403259, "total_bits": 252975104, "q_proj": { "group_size": { @@ -14675,7 +14675,7 @@ } }, { - "accuracy": 0.9968949556350708, + "accuracy": 0.9969400763511658, "total_bits": 265314304, "q_proj": { "group_size": { @@ -14727,7 +14727,7 @@ } }, { - "accuracy": 0.997576892375946, + "accuracy": 0.9972565770149231, "total_bits": 336861184, "q_proj": { "group_size": { @@ -14781,7 +14781,7 @@ ], "model.layers.7.block_sparse_moe": [ { - "accuracy": 0.9761197566986084, + "accuracy": 0.9833416938781738, "total_bits": 3157926400, "w1": { "group_size": { @@ -14833,7 +14833,7 @@ } }, { - "accuracy": 0.9770138263702393, + "accuracy": 0.9839823246002197, "total_bits": 3268026880, "w1": { "group_size": { @@ -14885,7 +14885,7 @@ } }, { - "accuracy": 0.979978621006012, + "accuracy": 0.9860503673553467, "total_bits": 3652411392, "w1": { "group_size": { @@ -14934,7 +14934,7 @@ } }, { - "accuracy": 0.980651319026947, + "accuracy": 0.9865221977233887, "total_bits": 4098056192, "w1": { "group_size": { @@ -14983,7 +14983,7 @@ } }, { - "accuracy": 0.9888680577278137, + "accuracy": 0.9922327399253845, "total_bits": 4621411072, "w1": { "group_size": { @@ -15035,7 +15035,7 @@ } }, { - "accuracy": 0.9897587299346924, + "accuracy": 0.9928511381149292, "total_bits": 4737212416, "w1": { "group_size": { @@ -15087,7 +15087,7 @@ } }, { - "accuracy": 0.9906314015388489, + "accuracy": 0.9934636950492859, "total_bits": 5093868288, "w1": { "group_size": { @@ -15136,7 +15136,7 @@ } }, { - "accuracy": 0.99434894323349, + "accuracy": 0.9960464835166931, "total_bits": 5824164608, "w1": { "group_size": { @@ -15179,7 +15179,7 @@ } }, { - "accuracy": 0.9948300123214722, + "accuracy": 0.9963796138763428, "total_bits": 5910044672, "w1": { "group_size": { @@ -15222,7 +15222,7 @@ } }, { - "accuracy": 0.9943795800209045, + "accuracy": 0.9960765838623047, "total_bits": 6006579968, "w1": { "group_size": { @@ -15274,7 +15274,7 @@ } }, { - "accuracy": 0.9951010346412659, + "accuracy": 0.9965778589248657, "total_bits": 6122381312, "w1": { "group_size": { @@ -15326,7 +15326,7 @@ } }, { - "accuracy": 0.997167706489563, + "accuracy": 0.9980212450027466, "total_bits": 7391748864, "w1": { "group_size": { @@ -15378,7 +15378,7 @@ } }, { - "accuracy": 0.9973886013031006, + "accuracy": 0.9981759190559387, "total_bits": 7507550208, "w1": { "group_size": { @@ -15430,7 +15430,7 @@ } }, { - "accuracy": 0.9983628392219543, + "accuracy": 0.9988516569137573, "total_bits": 8550425344, "w1": { "group_size": { @@ -15473,7 +15473,7 @@ } }, { - "accuracy": 0.9984325766563416, + "accuracy": 0.998904287815094, "total_bits": 8877312000, "w1": { "group_size": { @@ -15522,7 +15522,7 @@ } }, { - "accuracy": 0.9985613226890564, + "accuracy": 0.9989941716194153, "total_bits": 9674229760, "w1": { "group_size": { @@ -15568,7 +15568,7 @@ } }, { - "accuracy": 0.9991229176521301, + "accuracy": 0.999382495880127, "total_bits": 11318396928, "w1": { "group_size": { @@ -15610,7 +15610,7 @@ ], "model.layers.8.self_attn": [ { - "accuracy": 0.9745641350746155, + "accuracy": 0.9800093770027161, "total_bits": 89141248, "q_proj": { "group_size": { @@ -15674,7 +15674,7 @@ } }, { - "accuracy": 0.9764924049377441, + "accuracy": 0.9808433651924133, "total_bits": 91697152, "q_proj": { "group_size": { @@ -15738,7 +15738,7 @@ } }, { - "accuracy": 0.9776329398155212, + "accuracy": 0.981756329536438, "total_bits": 95234560, "q_proj": { "group_size": { @@ -15802,7 +15802,7 @@ } }, { - "accuracy": 0.9814552664756775, + "accuracy": 0.9837787747383118, "total_bits": 111748096, "q_proj": { "group_size": { @@ -15866,7 +15866,7 @@ } }, { - "accuracy": 0.9869210720062256, + "accuracy": 0.9896422624588013, "total_bits": 132388864, "q_proj": { "group_size": { @@ -15930,7 +15930,7 @@ } }, { - "accuracy": 0.9872050881385803, + "accuracy": 0.9895234107971191, "total_bits": 132455936, "q_proj": { "group_size": { @@ -15994,7 +15994,7 @@ } }, { - "accuracy": 0.9896059036254883, + "accuracy": 0.9909639954566956, "total_bits": 169089024, "q_proj": { "group_size": { @@ -16046,7 +16046,7 @@ } }, { - "accuracy": 0.98982834815979, + "accuracy": 0.9907659292221069, "total_bits": 169221632, "q_proj": { "group_size": { @@ -16098,7 +16098,7 @@ } }, { - "accuracy": 0.9915042519569397, + "accuracy": 0.9927958846092224, "total_bits": 170671104, "q_proj": { "group_size": { @@ -16150,7 +16150,7 @@ } }, { - "accuracy": 0.9923007488250732, + "accuracy": 0.9931433200836182, "total_bits": 173039616, "q_proj": { "group_size": { @@ -16202,7 +16202,7 @@ } }, { - "accuracy": 0.9929978251457214, + "accuracy": 0.9942891001701355, "total_bits": 174398976, "q_proj": { "group_size": { @@ -16266,7 +16266,7 @@ } }, { - "accuracy": 0.9929068684577942, + "accuracy": 0.9946056008338928, "total_bits": 175225856, "q_proj": { "group_size": { @@ -16330,7 +16330,7 @@ } }, { - "accuracy": 0.9931942224502563, + "accuracy": 0.9944790601730347, "total_bits": 178728960, "q_proj": { "group_size": { @@ -16391,7 +16391,7 @@ } }, { - "accuracy": 0.9934830069541931, + "accuracy": 0.9947715997695923, "total_bits": 181067776, "q_proj": { "group_size": { @@ -16452,7 +16452,7 @@ } }, { - "accuracy": 0.9955446720123291, + "accuracy": 0.9958426356315613, "total_bits": 219944960, "q_proj": { "group_size": { @@ -16513,7 +16513,7 @@ } }, { - "accuracy": 0.995071291923523, + "accuracy": 0.9958821535110474, "total_bits": 223010816, "q_proj": { "group_size": { @@ -16574,7 +16574,7 @@ } }, { - "accuracy": 0.9958850145339966, + "accuracy": 0.9960048198699951, "total_bits": 252975104, "q_proj": { "group_size": { @@ -16626,7 +16626,7 @@ } }, { - "accuracy": 0.9954195022583008, + "accuracy": 0.9959654211997986, "total_bits": 265314304, "q_proj": { "group_size": { @@ -16678,7 +16678,7 @@ } }, { - "accuracy": 0.996391236782074, + "accuracy": 0.9965999126434326, "total_bits": 336861184, "q_proj": { "group_size": { @@ -16732,7 +16732,7 @@ ], "model.layers.8.block_sparse_moe": [ { - "accuracy": 0.9741842150688171, + "accuracy": 0.9821083545684814, "total_bits": 3157926400, "w1": { "group_size": { @@ -16784,7 +16784,7 @@ } }, { - "accuracy": 0.9751280546188354, + "accuracy": 0.9827668070793152, "total_bits": 3268026880, "w1": { "group_size": { @@ -16836,7 +16836,7 @@ } }, { - "accuracy": 0.9783428907394409, + "accuracy": 0.98499596118927, "total_bits": 3652411392, "w1": { "group_size": { @@ -16885,7 +16885,7 @@ } }, { - "accuracy": 0.9790996313095093, + "accuracy": 0.9855325818061829, "total_bits": 4098056192, "w1": { "group_size": { @@ -16934,7 +16934,7 @@ } }, { - "accuracy": 0.9879100918769836, + "accuracy": 0.9916037321090698, "total_bits": 4621411072, "w1": { "group_size": { @@ -16986,7 +16986,7 @@ } }, { - "accuracy": 0.9888765215873718, + "accuracy": 0.9922758340835571, "total_bits": 4737212416, "w1": { "group_size": { @@ -17038,7 +17038,7 @@ } }, { - "accuracy": 0.9898669719696045, + "accuracy": 0.9929769039154053, "total_bits": 5093868288, "w1": { "group_size": { @@ -17087,7 +17087,7 @@ } }, { - "accuracy": 0.9938477873802185, + "accuracy": 0.9957173466682434, "total_bits": 5824164608, "w1": { "group_size": { @@ -17130,7 +17130,7 @@ } }, { - "accuracy": 0.9943729043006897, + "accuracy": 0.9960774183273315, "total_bits": 5910044672, "w1": { "group_size": { @@ -17173,7 +17173,7 @@ } }, { - "accuracy": 0.9938919544219971, + "accuracy": 0.9957579374313354, "total_bits": 6006579968, "w1": { "group_size": { @@ -17225,7 +17225,7 @@ } }, { - "accuracy": 0.9946658611297607, + "accuracy": 0.9962946176528931, "total_bits": 6122381312, "w1": { "group_size": { @@ -17277,7 +17277,7 @@ } }, { - "accuracy": 0.9969112873077393, + "accuracy": 0.997848629951477, "total_bits": 7391748864, "w1": { "group_size": { @@ -17329,7 +17329,7 @@ } }, { - "accuracy": 0.9971376657485962, + "accuracy": 0.9980047941207886, "total_bits": 7507550208, "w1": { "group_size": { @@ -17381,7 +17381,7 @@ } }, { - "accuracy": 0.9982024431228638, + "accuracy": 0.9987345933914185, "total_bits": 8550425344, "w1": { "group_size": { @@ -17424,7 +17424,7 @@ } }, { - "accuracy": 0.9982845187187195, + "accuracy": 0.9987975358963013, "total_bits": 8877312000, "w1": { "group_size": { @@ -17473,7 +17473,7 @@ } }, { - "accuracy": 0.998430609703064, + "accuracy": 0.9989006519317627, "total_bits": 9674229760, "w1": { "group_size": { @@ -17519,7 +17519,7 @@ } }, { - "accuracy": 0.9990242123603821, + "accuracy": 0.9993011951446533, "total_bits": 11318396928, "w1": { "group_size": { @@ -17561,7 +17561,7 @@ ], "model.layers.9.self_attn": [ { - "accuracy": 0.9748140573501587, + "accuracy": 0.9819218516349792, "total_bits": 89141248, "q_proj": { "group_size": { @@ -17625,7 +17625,7 @@ } }, { - "accuracy": 0.9754710793495178, + "accuracy": 0.982273519039154, "total_bits": 91697152, "q_proj": { "group_size": { @@ -17689,7 +17689,7 @@ } }, { - "accuracy": 0.9770217537879944, + "accuracy": 0.9833663105964661, "total_bits": 95234560, "q_proj": { "group_size": { @@ -17753,7 +17753,7 @@ } }, { - "accuracy": 0.9806140661239624, + "accuracy": 0.9854419231414795, "total_bits": 111748096, "q_proj": { "group_size": { @@ -17817,7 +17817,7 @@ } }, { - "accuracy": 0.9875363111495972, + "accuracy": 0.9915512800216675, "total_bits": 132388864, "q_proj": { "group_size": { @@ -17881,7 +17881,7 @@ } }, { - "accuracy": 0.9877961874008179, + "accuracy": 0.9917553067207336, "total_bits": 132455936, "q_proj": { "group_size": { @@ -17945,7 +17945,7 @@ } }, { - "accuracy": 0.990748405456543, + "accuracy": 0.9931929111480713, "total_bits": 169089024, "q_proj": { "group_size": { @@ -17997,7 +17997,7 @@ } }, { - "accuracy": 0.9910212755203247, + "accuracy": 0.9934123754501343, "total_bits": 169221632, "q_proj": { "group_size": { @@ -18049,7 +18049,7 @@ } }, { - "accuracy": 0.9920246601104736, + "accuracy": 0.9952439069747925, "total_bits": 170671104, "q_proj": { "group_size": { @@ -18101,7 +18101,7 @@ } }, { - "accuracy": 0.9910418391227722, + "accuracy": 0.9940065741539001, "total_bits": 173039616, "q_proj": { "group_size": { @@ -18153,7 +18153,7 @@ } }, { - "accuracy": 0.9939870834350586, + "accuracy": 0.996045708656311, "total_bits": 174398976, "q_proj": { "group_size": { @@ -18217,7 +18217,7 @@ } }, { - "accuracy": 0.9942471385002136, + "accuracy": 0.9961579442024231, "total_bits": 175225856, "q_proj": { "group_size": { @@ -18281,7 +18281,7 @@ } }, { - "accuracy": 0.9946821928024292, + "accuracy": 0.9964323043823242, "total_bits": 178728960, "q_proj": { "group_size": { @@ -18342,7 +18342,7 @@ } }, { - "accuracy": 0.9949930310249329, + "accuracy": 0.9965447187423706, "total_bits": 181067776, "q_proj": { "group_size": { @@ -18403,7 +18403,7 @@ } }, { - "accuracy": 0.9970627427101135, + "accuracy": 0.9977529644966125, "total_bits": 219944960, "q_proj": { "group_size": { @@ -18464,7 +18464,7 @@ } }, { - "accuracy": 0.9968739748001099, + "accuracy": 0.9975996613502502, "total_bits": 223010816, "q_proj": { "group_size": { @@ -18525,7 +18525,7 @@ } }, { - "accuracy": 0.9976456165313721, + "accuracy": 0.9980825781822205, "total_bits": 252975104, "q_proj": { "group_size": { @@ -18577,7 +18577,7 @@ } }, { - "accuracy": 0.9974823594093323, + "accuracy": 0.9979479908943176, "total_bits": 265314304, "q_proj": { "group_size": { @@ -18629,7 +18629,7 @@ } }, { - "accuracy": 0.9985976219177246, + "accuracy": 0.9982230067253113, "total_bits": 336861184, "q_proj": { "group_size": { @@ -18683,7 +18683,7 @@ ], "model.layers.9.block_sparse_moe": [ { - "accuracy": 0.9730449318885803, + "accuracy": 0.9812453985214233, "total_bits": 3157926400, "w1": { "group_size": { @@ -18735,7 +18735,7 @@ } }, { - "accuracy": 0.9740323424339294, + "accuracy": 0.9819312691688538, "total_bits": 3268026880, "w1": { "group_size": { @@ -18787,7 +18787,7 @@ } }, { - "accuracy": 0.977423369884491, + "accuracy": 0.9842955470085144, "total_bits": 3652411392, "w1": { "group_size": { @@ -18836,7 +18836,7 @@ } }, { - "accuracy": 0.978216826915741, + "accuracy": 0.9848613739013672, "total_bits": 4098056192, "w1": { "group_size": { @@ -18885,7 +18885,7 @@ } }, { - "accuracy": 0.9873700737953186, + "accuracy": 0.9911915063858032, "total_bits": 4621411072, "w1": { "group_size": { @@ -18937,7 +18937,7 @@ } }, { - "accuracy": 0.9883928894996643, + "accuracy": 0.9918870329856873, "total_bits": 4737212416, "w1": { "group_size": { @@ -18989,7 +18989,7 @@ } }, { - "accuracy": 0.9894325137138367, + "accuracy": 0.9926279783248901, "total_bits": 5093868288, "w1": { "group_size": { @@ -19038,7 +19038,7 @@ } }, { - "accuracy": 0.9935753345489502, + "accuracy": 0.9954830408096313, "total_bits": 5824164608, "w1": { "group_size": { @@ -19081,7 +19081,7 @@ } }, { - "accuracy": 0.9941227436065674, + "accuracy": 0.9958162903785706, "total_bits": 5910044672, "w1": { "group_size": { @@ -19124,7 +19124,7 @@ } }, { - "accuracy": 0.9936132431030273, + "accuracy": 0.9955218434333801, "total_bits": 6006579968, "w1": { "group_size": { @@ -19176,7 +19176,7 @@ } }, { - "accuracy": 0.994431734085083, + "accuracy": 0.996074378490448, "total_bits": 6122381312, "w1": { "group_size": { @@ -19228,7 +19228,7 @@ } }, { - "accuracy": 0.996772825717926, + "accuracy": 0.99773770570755, "total_bits": 7391748864, "w1": { "group_size": { @@ -19280,7 +19280,7 @@ } }, { - "accuracy": 0.9970038533210754, + "accuracy": 0.9978908896446228, "total_bits": 7507550208, "w1": { "group_size": { @@ -19332,7 +19332,7 @@ } }, { - "accuracy": 0.9981170296669006, + "accuracy": 0.9986595511436462, "total_bits": 8550425344, "w1": { "group_size": { @@ -19375,7 +19375,7 @@ } }, { - "accuracy": 0.9982072710990906, + "accuracy": 0.9987344145774841, "total_bits": 8877312000, "w1": { "group_size": { @@ -19424,7 +19424,7 @@ } }, { - "accuracy": 0.998359739780426, + "accuracy": 0.9988433718681335, "total_bits": 9674229760, "w1": { "group_size": { @@ -19470,7 +19470,7 @@ } }, { - "accuracy": 0.9989792108535767, + "accuracy": 0.9992566108703613, "total_bits": 11318396928, "w1": { "group_size": { @@ -19512,7 +19512,7 @@ ], "model.layers.10.self_attn": [ { - "accuracy": 0.9718138575553894, + "accuracy": 0.9802050590515137, "total_bits": 89141248, "q_proj": { "group_size": { @@ -19576,7 +19576,7 @@ } }, { - "accuracy": 0.9729256629943848, + "accuracy": 0.9807765483856201, "total_bits": 91697152, "q_proj": { "group_size": { @@ -19640,7 +19640,7 @@ } }, { - "accuracy": 0.9746987819671631, + "accuracy": 0.981882631778717, "total_bits": 95234560, "q_proj": { "group_size": { @@ -19704,7 +19704,7 @@ } }, { - "accuracy": 0.9787259697914124, + "accuracy": 0.9843881726264954, "total_bits": 111748096, "q_proj": { "group_size": { @@ -19768,7 +19768,7 @@ } }, { - "accuracy": 0.9869256615638733, + "accuracy": 0.9902209639549255, "total_bits": 132388864, "q_proj": { "group_size": { @@ -19832,7 +19832,7 @@ } }, { - "accuracy": 0.9871384501457214, + "accuracy": 0.9904692769050598, "total_bits": 132455936, "q_proj": { "group_size": { @@ -19896,7 +19896,7 @@ } }, { - "accuracy": 0.9905244708061218, + "accuracy": 0.99228835105896, "total_bits": 169089024, "q_proj": { "group_size": { @@ -19948,7 +19948,7 @@ } }, { - "accuracy": 0.990810751914978, + "accuracy": 0.9925268888473511, "total_bits": 169221632, "q_proj": { "group_size": { @@ -20000,7 +20000,7 @@ } }, { - "accuracy": 0.9920419454574585, + "accuracy": 0.9938488006591797, "total_bits": 170671104, "q_proj": { "group_size": { @@ -20052,7 +20052,7 @@ } }, { - "accuracy": 0.9926743507385254, + "accuracy": 0.9942900538444519, "total_bits": 173039616, "q_proj": { "group_size": { @@ -20104,7 +20104,7 @@ } }, { - "accuracy": 0.9934741854667664, + "accuracy": 0.9947695136070251, "total_bits": 174398976, "q_proj": { "group_size": { @@ -20168,7 +20168,7 @@ } }, { - "accuracy": 0.9938188195228577, + "accuracy": 0.9950206875801086, "total_bits": 175225856, "q_proj": { "group_size": { @@ -20232,7 +20232,7 @@ } }, { - "accuracy": 0.9941202402114868, + "accuracy": 0.9951378107070923, "total_bits": 178728960, "q_proj": { "group_size": { @@ -20293,7 +20293,7 @@ } }, { - "accuracy": 0.994479238986969, + "accuracy": 0.9954570531845093, "total_bits": 181067776, "q_proj": { "group_size": { @@ -20354,7 +20354,7 @@ } }, { - "accuracy": 0.9964454770088196, + "accuracy": 0.9969031810760498, "total_bits": 219944960, "q_proj": { "group_size": { @@ -20415,7 +20415,7 @@ } }, { - "accuracy": 0.9966941475868225, + "accuracy": 0.9970819354057312, "total_bits": 223010816, "q_proj": { "group_size": { @@ -20476,7 +20476,7 @@ } }, { - "accuracy": 0.997053325176239, + "accuracy": 0.9971845746040344, "total_bits": 252975104, "q_proj": { "group_size": { @@ -20528,7 +20528,7 @@ } }, { - "accuracy": 0.9971901774406433, + "accuracy": 0.9973251819610596, "total_bits": 265314304, "q_proj": { "group_size": { @@ -20580,7 +20580,7 @@ } }, { - "accuracy": 0.997945249080658, + "accuracy": 0.9980986714363098, "total_bits": 336861184, "q_proj": { "group_size": { @@ -20634,7 +20634,7 @@ ], "model.layers.10.block_sparse_moe": [ { - "accuracy": 0.9720174670219421, + "accuracy": 0.9807812571525574, "total_bits": 3157926400, "w1": { "group_size": { @@ -20686,7 +20686,7 @@ } }, { - "accuracy": 0.9731023907661438, + "accuracy": 0.9815208911895752, "total_bits": 3268026880, "w1": { "group_size": { @@ -20738,7 +20738,7 @@ } }, { - "accuracy": 0.9767524003982544, + "accuracy": 0.9840447306632996, "total_bits": 3652411392, "w1": { "group_size": { @@ -20787,7 +20787,7 @@ } }, { - "accuracy": 0.9775943160057068, + "accuracy": 0.9846265912055969, "total_bits": 4098056192, "w1": { "group_size": { @@ -20836,7 +20836,7 @@ } }, { - "accuracy": 0.9869523048400879, + "accuracy": 0.9910433888435364, "total_bits": 4621411072, "w1": { "group_size": { @@ -20888,7 +20888,7 @@ } }, { - "accuracy": 0.9880008697509766, + "accuracy": 0.991754412651062, "total_bits": 4737212416, "w1": { "group_size": { @@ -20940,7 +20940,7 @@ } }, { - "accuracy": 0.9890947341918945, + "accuracy": 0.9925128817558289, "total_bits": 5093868288, "w1": { "group_size": { @@ -20989,7 +20989,7 @@ } }, { - "accuracy": 0.9933146834373474, + "accuracy": 0.9954016804695129, "total_bits": 5824164608, "w1": { "group_size": { @@ -21032,7 +21032,7 @@ } }, { - "accuracy": 0.993896484375, + "accuracy": 0.9957965612411499, "total_bits": 5910044672, "w1": { "group_size": { @@ -21075,7 +21075,7 @@ } }, { - "accuracy": 0.9934161901473999, + "accuracy": 0.9954792261123657, "total_bits": 6006579968, "w1": { "group_size": { @@ -21127,7 +21127,7 @@ } }, { - "accuracy": 0.9942546486854553, + "accuracy": 0.9960489273071289, "total_bits": 6122381312, "w1": { "group_size": { @@ -21179,7 +21179,7 @@ } }, { - "accuracy": 0.9966791868209839, + "accuracy": 0.9977161884307861, "total_bits": 7391748864, "w1": { "group_size": { @@ -21231,7 +21231,7 @@ } }, { - "accuracy": 0.9969216585159302, + "accuracy": 0.997883141040802, "total_bits": 7507550208, "w1": { "group_size": { @@ -21283,7 +21283,7 @@ } }, { - "accuracy": 0.9980678558349609, + "accuracy": 0.9986656308174133, "total_bits": 8550425344, "w1": { "group_size": { @@ -21326,7 +21326,7 @@ } }, { - "accuracy": 0.9981635808944702, + "accuracy": 0.998735785484314, "total_bits": 8877312000, "w1": { "group_size": { @@ -21375,7 +21375,7 @@ } }, { - "accuracy": 0.9983260631561279, + "accuracy": 0.9988484978675842, "total_bits": 9674229760, "w1": { "group_size": { @@ -21421,7 +21421,7 @@ } }, { - "accuracy": 0.9989689588546753, + "accuracy": 0.9992857575416565, "total_bits": 11318396928, "w1": { "group_size": { @@ -21463,7 +21463,7 @@ ], "model.layers.11.self_attn": [ { - "accuracy": 0.9696348905563354, + "accuracy": 0.9783013463020325, "total_bits": 89141248, "q_proj": { "group_size": { @@ -21527,7 +21527,7 @@ } }, { - "accuracy": 0.9705777168273926, + "accuracy": 0.9788656830787659, "total_bits": 91697152, "q_proj": { "group_size": { @@ -21591,7 +21591,7 @@ } }, { - "accuracy": 0.9718760848045349, + "accuracy": 0.9794105887413025, "total_bits": 95234560, "q_proj": { "group_size": { @@ -21655,7 +21655,7 @@ } }, { - "accuracy": 0.9764204621315002, + "accuracy": 0.9819142818450928, "total_bits": 111748096, "q_proj": { "group_size": { @@ -21719,7 +21719,7 @@ } }, { - "accuracy": 0.9840719699859619, + "accuracy": 0.9885290265083313, "total_bits": 132388864, "q_proj": { "group_size": { @@ -21783,7 +21783,7 @@ } }, { - "accuracy": 0.9848573803901672, + "accuracy": 0.9893053770065308, "total_bits": 132455936, "q_proj": { "group_size": { @@ -21847,7 +21847,7 @@ } }, { - "accuracy": 0.988134503364563, + "accuracy": 0.99045330286026, "total_bits": 169089024, "q_proj": { "group_size": { @@ -21899,7 +21899,7 @@ } }, { - "accuracy": 0.9891449213027954, + "accuracy": 0.9914538860321045, "total_bits": 169221632, "q_proj": { "group_size": { @@ -21951,7 +21951,7 @@ } }, { - "accuracy": 0.9887186884880066, + "accuracy": 0.9899231195449829, "total_bits": 170671104, "q_proj": { "group_size": { @@ -22003,7 +22003,7 @@ } }, { - "accuracy": 0.9901347756385803, + "accuracy": 0.9906660318374634, "total_bits": 173039616, "q_proj": { "group_size": { @@ -22055,7 +22055,7 @@ } }, { - "accuracy": 0.9921250939369202, + "accuracy": 0.9941952228546143, "total_bits": 174398976, "q_proj": { "group_size": { @@ -22119,7 +22119,7 @@ } }, { - "accuracy": 0.9926695227622986, + "accuracy": 0.9944469928741455, "total_bits": 175225856, "q_proj": { "group_size": { @@ -22183,7 +22183,7 @@ } }, { - "accuracy": 0.9930039048194885, + "accuracy": 0.9946174025535583, "total_bits": 178728960, "q_proj": { "group_size": { @@ -22244,7 +22244,7 @@ } }, { - "accuracy": 0.9934250116348267, + "accuracy": 0.9948597550392151, "total_bits": 181067776, "q_proj": { "group_size": { @@ -22305,7 +22305,7 @@ } }, { - "accuracy": 0.9954590201377869, + "accuracy": 0.9959769248962402, "total_bits": 219944960, "q_proj": { "group_size": { @@ -22366,7 +22366,7 @@ } }, { - "accuracy": 0.9958265423774719, + "accuracy": 0.9960152506828308, "total_bits": 223010816, "q_proj": { "group_size": { @@ -22427,7 +22427,7 @@ } }, { - "accuracy": 0.9960470199584961, + "accuracy": 0.9962192177772522, "total_bits": 252975104, "q_proj": { "group_size": { @@ -22479,7 +22479,7 @@ } }, { - "accuracy": 0.9963229894638062, + "accuracy": 0.9962271451950073, "total_bits": 265314304, "q_proj": { "group_size": { @@ -22531,7 +22531,7 @@ } }, { - "accuracy": 0.9966380596160889, + "accuracy": 0.9964985847473145, "total_bits": 336861184, "q_proj": { "group_size": { @@ -22585,7 +22585,7 @@ ], "model.layers.11.block_sparse_moe": [ { - "accuracy": 0.9706035256385803, + "accuracy": 0.9801572561264038, "total_bits": 3157926400, "w1": { "group_size": { @@ -22637,7 +22637,7 @@ } }, { - "accuracy": 0.9717403650283813, + "accuracy": 0.9809514284133911, "total_bits": 3268026880, "w1": { "group_size": { @@ -22689,7 +22689,7 @@ } }, { - "accuracy": 0.9756008386611938, + "accuracy": 0.9835654497146606, "total_bits": 3652411392, "w1": { "group_size": { @@ -22738,7 +22738,7 @@ } }, { - "accuracy": 0.9765130877494812, + "accuracy": 0.9841902256011963, "total_bits": 4098056192, "w1": { "group_size": { @@ -22787,7 +22787,7 @@ } }, { - "accuracy": 0.986198365688324, + "accuracy": 0.9906992316246033, "total_bits": 4621411072, "w1": { "group_size": { @@ -22839,7 +22839,7 @@ } }, { - "accuracy": 0.9873296022415161, + "accuracy": 0.991447925567627, "total_bits": 4737212416, "w1": { "group_size": { @@ -22891,7 +22891,7 @@ } }, { - "accuracy": 0.9885213971138, + "accuracy": 0.9922640323638916, "total_bits": 5093868288, "w1": { "group_size": { @@ -22940,7 +22940,7 @@ } }, { - "accuracy": 0.9928700923919678, + "accuracy": 0.9951768517494202, "total_bits": 5824164608, "w1": { "group_size": { @@ -22983,7 +22983,7 @@ } }, { - "accuracy": 0.9935072660446167, + "accuracy": 0.9955940246582031, "total_bits": 5910044672, "w1": { "group_size": { @@ -23026,7 +23026,7 @@ } }, { - "accuracy": 0.9930249452590942, + "accuracy": 0.9952973127365112, "total_bits": 6006579968, "w1": { "group_size": { @@ -23078,7 +23078,7 @@ } }, { - "accuracy": 0.9939168095588684, + "accuracy": 0.9958847165107727, "total_bits": 6122381312, "w1": { "group_size": { @@ -23130,7 +23130,7 @@ } }, { - "accuracy": 0.9964755177497864, + "accuracy": 0.9976138472557068, "total_bits": 7391748864, "w1": { "group_size": { @@ -23182,7 +23182,7 @@ } }, { - "accuracy": 0.99672931432724, + "accuracy": 0.9977735280990601, "total_bits": 7507550208, "w1": { "group_size": { @@ -23234,7 +23234,7 @@ } }, { - "accuracy": 0.9979307651519775, + "accuracy": 0.9985828399658203, "total_bits": 8550425344, "w1": { "group_size": { @@ -23277,7 +23277,7 @@ } }, { - "accuracy": 0.9980494379997253, + "accuracy": 0.9986670017242432, "total_bits": 8877312000, "w1": { "group_size": { @@ -23326,7 +23326,7 @@ } }, { - "accuracy": 0.9982286095619202, + "accuracy": 0.9987879395484924, "total_bits": 9674229760, "w1": { "group_size": { @@ -23372,7 +23372,7 @@ } }, { - "accuracy": 0.9988915920257568, + "accuracy": 0.999220073223114, "total_bits": 11318396928, "w1": { "group_size": { @@ -23414,7 +23414,7 @@ ], "model.layers.12.self_attn": [ { - "accuracy": 0.9659296870231628, + "accuracy": 0.9760562777519226, "total_bits": 89141248, "q_proj": { "group_size": { @@ -23478,7 +23478,7 @@ } }, { - "accuracy": 0.9676147103309631, + "accuracy": 0.9772819876670837, "total_bits": 91697152, "q_proj": { "group_size": { @@ -23542,7 +23542,7 @@ } }, { - "accuracy": 0.9699097275733948, + "accuracy": 0.9788185954093933, "total_bits": 95234560, "q_proj": { "group_size": { @@ -23606,7 +23606,7 @@ } }, { - "accuracy": 0.974396288394928, + "accuracy": 0.9812253713607788, "total_bits": 111748096, "q_proj": { "group_size": { @@ -23670,7 +23670,7 @@ } }, { - "accuracy": 0.9827721118927002, + "accuracy": 0.9869716167449951, "total_bits": 132388864, "q_proj": { "group_size": { @@ -23734,7 +23734,7 @@ } }, { - "accuracy": 0.9829978942871094, + "accuracy": 0.9871600866317749, "total_bits": 132455936, "q_proj": { "group_size": { @@ -23798,7 +23798,7 @@ } }, { - "accuracy": 0.9866425395011902, + "accuracy": 0.9889944791793823, "total_bits": 169089024, "q_proj": { "group_size": { @@ -23850,7 +23850,7 @@ } }, { - "accuracy": 0.9869810342788696, + "accuracy": 0.9892253279685974, "total_bits": 169221632, "q_proj": { "group_size": { @@ -23902,7 +23902,7 @@ } }, { - "accuracy": 0.9882655143737793, + "accuracy": 0.9901496171951294, "total_bits": 170671104, "q_proj": { "group_size": { @@ -23954,7 +23954,7 @@ } }, { - "accuracy": 0.9887760281562805, + "accuracy": 0.9904927015304565, "total_bits": 173039616, "q_proj": { "group_size": { @@ -24006,7 +24006,7 @@ } }, { - "accuracy": 0.9905843734741211, + "accuracy": 0.9922304749488831, "total_bits": 174398976, "q_proj": { "group_size": { @@ -24070,7 +24070,7 @@ } }, { - "accuracy": 0.9911945462226868, + "accuracy": 0.9925956130027771, "total_bits": 175225856, "q_proj": { "group_size": { @@ -24134,7 +24134,7 @@ } }, { - "accuracy": 0.9913910031318665, + "accuracy": 0.9926854968070984, "total_bits": 178728960, "q_proj": { "group_size": { @@ -24195,7 +24195,7 @@ } }, { - "accuracy": 0.9921290874481201, + "accuracy": 0.9933433532714844, "total_bits": 181067776, "q_proj": { "group_size": { @@ -24256,7 +24256,7 @@ } }, { - "accuracy": 0.9936894774436951, + "accuracy": 0.9943298101425171, "total_bits": 219944960, "q_proj": { "group_size": { @@ -24317,7 +24317,7 @@ } }, { - "accuracy": 0.9942216873168945, + "accuracy": 0.9948176741600037, "total_bits": 223010816, "q_proj": { "group_size": { @@ -24378,7 +24378,7 @@ } }, { - "accuracy": 0.9941593408584595, + "accuracy": 0.9945536255836487, "total_bits": 252975104, "q_proj": { "group_size": { @@ -24430,7 +24430,7 @@ } }, { - "accuracy": 0.9947644472122192, + "accuracy": 0.9950072169303894, "total_bits": 265314304, "q_proj": { "group_size": { @@ -24482,7 +24482,7 @@ } }, { - "accuracy": 0.9944745898246765, + "accuracy": 0.9947435855865479, "total_bits": 336861184, "q_proj": { "group_size": { @@ -24536,7 +24536,7 @@ ], "model.layers.12.block_sparse_moe": [ { - "accuracy": 0.9684573411941528, + "accuracy": 0.9786211252212524, "total_bits": 3157926400, "w1": { "group_size": { @@ -24588,7 +24588,7 @@ } }, { - "accuracy": 0.9696992635726929, + "accuracy": 0.9794734120368958, "total_bits": 3268026880, "w1": { "group_size": { @@ -24640,7 +24640,7 @@ } }, { - "accuracy": 0.9736368656158447, + "accuracy": 0.9821404218673706, "total_bits": 3652411392, "w1": { "group_size": { @@ -24689,7 +24689,7 @@ } }, { - "accuracy": 0.9745553135871887, + "accuracy": 0.9827689528465271, "total_bits": 4098056192, "w1": { "group_size": { @@ -24738,7 +24738,7 @@ } }, { - "accuracy": 0.9851550459861755, + "accuracy": 0.9899434447288513, "total_bits": 4621411072, "w1": { "group_size": { @@ -24790,7 +24790,7 @@ } }, { - "accuracy": 0.9863818883895874, + "accuracy": 0.9907681941986084, "total_bits": 4737212416, "w1": { "group_size": { @@ -24842,7 +24842,7 @@ } }, { - "accuracy": 0.9875721335411072, + "accuracy": 0.9915797114372253, "total_bits": 5093868288, "w1": { "group_size": { @@ -24891,7 +24891,7 @@ } }, { - "accuracy": 0.9923911690711975, + "accuracy": 0.9948229789733887, "total_bits": 5824164608, "w1": { "group_size": { @@ -24934,7 +24934,7 @@ } }, { - "accuracy": 0.9930605292320251, + "accuracy": 0.9952641129493713, "total_bits": 5910044672, "w1": { "group_size": { @@ -24977,7 +24977,7 @@ } }, { - "accuracy": 0.9924753308296204, + "accuracy": 0.99489426612854, "total_bits": 6006579968, "w1": { "group_size": { @@ -25029,7 +25029,7 @@ } }, { - "accuracy": 0.9934515953063965, + "accuracy": 0.9955466985702515, "total_bits": 6122381312, "w1": { "group_size": { @@ -25081,7 +25081,7 @@ } }, { - "accuracy": 0.9961940050125122, + "accuracy": 0.9973954558372498, "total_bits": 7391748864, "w1": { "group_size": { @@ -25133,7 +25133,7 @@ } }, { - "accuracy": 0.9964504241943359, + "accuracy": 0.9975629448890686, "total_bits": 7507550208, "w1": { "group_size": { @@ -25185,7 +25185,7 @@ } }, { - "accuracy": 0.9977585077285767, + "accuracy": 0.9984332323074341, "total_bits": 8550425344, "w1": { "group_size": { @@ -25228,7 +25228,7 @@ } }, { - "accuracy": 0.9978651404380798, + "accuracy": 0.9985220432281494, "total_bits": 8877312000, "w1": { "group_size": { @@ -25277,7 +25277,7 @@ } }, { - "accuracy": 0.9980379343032837, + "accuracy": 0.9986375570297241, "total_bits": 9674229760, "w1": { "group_size": { @@ -25323,7 +25323,7 @@ } }, { - "accuracy": 0.9987522959709167, + "accuracy": 0.9991068840026855, "total_bits": 11318396928, "w1": { "group_size": { @@ -25365,7 +25365,7 @@ ], "model.layers.13.self_attn": [ { - "accuracy": 0.9672948718070984, + "accuracy": 0.9771461486816406, "total_bits": 89141248, "q_proj": { "group_size": { @@ -25429,7 +25429,7 @@ } }, { - "accuracy": 0.9689596891403198, + "accuracy": 0.9781214594841003, "total_bits": 91697152, "q_proj": { "group_size": { @@ -25493,7 +25493,7 @@ } }, { - "accuracy": 0.9714821577072144, + "accuracy": 0.9798063635826111, "total_bits": 95234560, "q_proj": { "group_size": { @@ -25557,7 +25557,7 @@ } }, { - "accuracy": 0.9775870442390442, + "accuracy": 0.983347475528717, "total_bits": 111748096, "q_proj": { "group_size": { @@ -25621,7 +25621,7 @@ } }, { - "accuracy": 0.9839168787002563, + "accuracy": 0.9886296391487122, "total_bits": 132388864, "q_proj": { "group_size": { @@ -25685,7 +25685,7 @@ } }, { - "accuracy": 0.9843135476112366, + "accuracy": 0.9888168573379517, "total_bits": 132455936, "q_proj": { "group_size": { @@ -25749,7 +25749,7 @@ } }, { - "accuracy": 0.9893288612365723, + "accuracy": 0.9917493462562561, "total_bits": 169089024, "q_proj": { "group_size": { @@ -25801,7 +25801,7 @@ } }, { - "accuracy": 0.9898593425750732, + "accuracy": 0.9919884204864502, "total_bits": 169221632, "q_proj": { "group_size": { @@ -25853,7 +25853,7 @@ } }, { - "accuracy": 0.9911143183708191, + "accuracy": 0.9934077262878418, "total_bits": 170671104, "q_proj": { "group_size": { @@ -25905,7 +25905,7 @@ } }, { - "accuracy": 0.9915969371795654, + "accuracy": 0.9937195777893066, "total_bits": 173039616, "q_proj": { "group_size": { @@ -25957,7 +25957,7 @@ } }, { - "accuracy": 0.9920886754989624, + "accuracy": 0.9943729639053345, "total_bits": 174398976, "q_proj": { "group_size": { @@ -26021,7 +26021,7 @@ } }, { - "accuracy": 0.9925785064697266, + "accuracy": 0.9947659373283386, "total_bits": 175225856, "q_proj": { "group_size": { @@ -26085,7 +26085,7 @@ } }, { - "accuracy": 0.9930744767189026, + "accuracy": 0.9950062036514282, "total_bits": 178728960, "q_proj": { "group_size": { @@ -26146,7 +26146,7 @@ } }, { - "accuracy": 0.993636965751648, + "accuracy": 0.9954781532287598, "total_bits": 181067776, "q_proj": { "group_size": { @@ -26207,7 +26207,7 @@ } }, { - "accuracy": 0.9959567785263062, + "accuracy": 0.9968833923339844, "total_bits": 219944960, "q_proj": { "group_size": { @@ -26268,7 +26268,7 @@ } }, { - "accuracy": 0.9961870908737183, + "accuracy": 0.9970784783363342, "total_bits": 223010816, "q_proj": { "group_size": { @@ -26329,7 +26329,7 @@ } }, { - "accuracy": 0.9968671798706055, + "accuracy": 0.9973524212837219, "total_bits": 252975104, "q_proj": { "group_size": { @@ -26381,7 +26381,7 @@ } }, { - "accuracy": 0.9970226883888245, + "accuracy": 0.997530996799469, "total_bits": 265314304, "q_proj": { "group_size": { @@ -26433,7 +26433,7 @@ } }, { - "accuracy": 0.9980165958404541, + "accuracy": 0.9981143474578857, "total_bits": 336861184, "q_proj": { "group_size": { @@ -26487,7 +26487,7 @@ ], "model.layers.13.block_sparse_moe": [ { - "accuracy": 0.9650376439094543, + "accuracy": 0.9763058423995972, "total_bits": 3157926400, "w1": { "group_size": { @@ -26539,7 +26539,7 @@ } }, { - "accuracy": 0.9663773775100708, + "accuracy": 0.9772336483001709, "total_bits": 3268026880, "w1": { "group_size": { @@ -26591,7 +26591,7 @@ } }, { - "accuracy": 0.9708361625671387, + "accuracy": 0.9802612066268921, "total_bits": 3652411392, "w1": { "group_size": { @@ -26640,7 +26640,7 @@ } }, { - "accuracy": 0.9719136357307434, + "accuracy": 0.9810017347335815, "total_bits": 4098056192, "w1": { "group_size": { @@ -26689,7 +26689,7 @@ } }, { - "accuracy": 0.9834644198417664, + "accuracy": 0.9888048768043518, "total_bits": 4621411072, "w1": { "group_size": { @@ -26741,7 +26741,7 @@ } }, { - "accuracy": 0.9848337173461914, + "accuracy": 0.9897273778915405, "total_bits": 4737212416, "w1": { "group_size": { @@ -26793,7 +26793,7 @@ } }, { - "accuracy": 0.9862042665481567, + "accuracy": 0.9906653761863708, "total_bits": 5093868288, "w1": { "group_size": { @@ -26842,7 +26842,7 @@ } }, { - "accuracy": 0.9914608001708984, + "accuracy": 0.9941992163658142, "total_bits": 5824164608, "w1": { "group_size": { @@ -26885,7 +26885,7 @@ } }, { - "accuracy": 0.9922345876693726, + "accuracy": 0.9947184920310974, "total_bits": 5910044672, "w1": { "group_size": { @@ -26928,7 +26928,7 @@ } }, { - "accuracy": 0.9916381239891052, + "accuracy": 0.9943406581878662, "total_bits": 6006579968, "w1": { "group_size": { @@ -26980,7 +26980,7 @@ } }, { - "accuracy": 0.9927247166633606, + "accuracy": 0.9950692653656006, "total_bits": 6122381312, "w1": { "group_size": { @@ -27032,7 +27032,7 @@ } }, { - "accuracy": 0.9957790374755859, + "accuracy": 0.9971408843994141, "total_bits": 7391748864, "w1": { "group_size": { @@ -27084,7 +27084,7 @@ } }, { - "accuracy": 0.9961061477661133, + "accuracy": 0.9973626732826233, "total_bits": 7507550208, "w1": { "group_size": { @@ -27136,7 +27136,7 @@ } }, { - "accuracy": 0.9975301623344421, + "accuracy": 0.9983223080635071, "total_bits": 8550425344, "w1": { "group_size": { @@ -27179,7 +27179,7 @@ } }, { - "accuracy": 0.9976721405982971, + "accuracy": 0.9984261989593506, "total_bits": 8877312000, "w1": { "group_size": { @@ -27228,7 +27228,7 @@ } }, { - "accuracy": 0.9978768825531006, + "accuracy": 0.998566746711731, "total_bits": 9674229760, "w1": { "group_size": { @@ -27274,7 +27274,7 @@ } }, { - "accuracy": 0.9986920356750488, + "accuracy": 0.9991169571876526, "total_bits": 11318396928, "w1": { "group_size": { @@ -27316,7 +27316,7 @@ ], "model.layers.14.self_attn": [ { - "accuracy": 0.960807204246521, + "accuracy": 0.9725906252861023, "total_bits": 89141248, "q_proj": { "group_size": { @@ -27380,7 +27380,7 @@ } }, { - "accuracy": 0.9625911712646484, + "accuracy": 0.973698616027832, "total_bits": 91697152, "q_proj": { "group_size": { @@ -27444,7 +27444,7 @@ } }, { - "accuracy": 0.9657725691795349, + "accuracy": 0.9756946563720703, "total_bits": 95234560, "q_proj": { "group_size": { @@ -27508,7 +27508,7 @@ } }, { - "accuracy": 0.972555935382843, + "accuracy": 0.9798251390457153, "total_bits": 111748096, "q_proj": { "group_size": { @@ -27572,7 +27572,7 @@ } }, { - "accuracy": 0.980419397354126, + "accuracy": 0.9863221049308777, "total_bits": 132388864, "q_proj": { "group_size": { @@ -27636,7 +27636,7 @@ } }, { - "accuracy": 0.9809625744819641, + "accuracy": 0.9866092801094055, "total_bits": 132455936, "q_proj": { "group_size": { @@ -27700,7 +27700,7 @@ } }, { - "accuracy": 0.9864667654037476, + "accuracy": 0.9900709390640259, "total_bits": 169089024, "q_proj": { "group_size": { @@ -27752,7 +27752,7 @@ } }, { - "accuracy": 0.9871666431427002, + "accuracy": 0.9904919862747192, "total_bits": 169221632, "q_proj": { "group_size": { @@ -27804,7 +27804,7 @@ } }, { - "accuracy": 0.9888257384300232, + "accuracy": 0.9910094141960144, "total_bits": 170671104, "q_proj": { "group_size": { @@ -27856,7 +27856,7 @@ } }, { - "accuracy": 0.9894418120384216, + "accuracy": 0.9914842247962952, "total_bits": 173039616, "q_proj": { "group_size": { @@ -27908,7 +27908,7 @@ } }, { - "accuracy": 0.9900749325752258, + "accuracy": 0.9930576086044312, "total_bits": 174398976, "q_proj": { "group_size": { @@ -27972,7 +27972,7 @@ } }, { - "accuracy": 0.9907664060592651, + "accuracy": 0.9936020374298096, "total_bits": 175225856, "q_proj": { "group_size": { @@ -28036,7 +28036,7 @@ } }, { - "accuracy": 0.9912841320037842, + "accuracy": 0.9938521385192871, "total_bits": 178728960, "q_proj": { "group_size": { @@ -28097,7 +28097,7 @@ } }, { - "accuracy": 0.9920443296432495, + "accuracy": 0.9944509267807007, "total_bits": 181067776, "q_proj": { "group_size": { @@ -28158,7 +28158,7 @@ } }, { - "accuracy": 0.9945937991142273, + "accuracy": 0.9955963492393494, "total_bits": 219944960, "q_proj": { "group_size": { @@ -28219,7 +28219,7 @@ } }, { - "accuracy": 0.9951828718185425, + "accuracy": 0.9960827827453613, "total_bits": 223010816, "q_proj": { "group_size": { @@ -28280,7 +28280,7 @@ } }, { - "accuracy": 0.9955295324325562, + "accuracy": 0.9960792064666748, "total_bits": 252975104, "q_proj": { "group_size": { @@ -28332,7 +28332,7 @@ } }, { - "accuracy": 0.9960466027259827, + "accuracy": 0.996527910232544, "total_bits": 265314304, "q_proj": { "group_size": { @@ -28384,7 +28384,7 @@ } }, { - "accuracy": 0.9965485334396362, + "accuracy": 0.9965934753417969, "total_bits": 336861184, "q_proj": { "group_size": { @@ -28438,7 +28438,7 @@ ], "model.layers.14.block_sparse_moe": [ { - "accuracy": 0.9638780355453491, + "accuracy": 0.9745654463768005, "total_bits": 3157926400, "w1": { "group_size": { @@ -28490,7 +28490,7 @@ } }, { - "accuracy": 0.9652614593505859, + "accuracy": 0.9755628108978271, "total_bits": 3268026880, "w1": { "group_size": { @@ -28542,7 +28542,7 @@ } }, { - "accuracy": 0.9698053002357483, + "accuracy": 0.9787696599960327, "total_bits": 3652411392, "w1": { "group_size": { @@ -28591,7 +28591,7 @@ } }, { - "accuracy": 0.9709160923957825, + "accuracy": 0.9795659780502319, "total_bits": 4098056192, "w1": { "group_size": { @@ -28640,7 +28640,7 @@ } }, { - "accuracy": 0.9829066395759583, + "accuracy": 0.987964391708374, "total_bits": 4621411072, "w1": { "group_size": { @@ -28692,7 +28692,7 @@ } }, { - "accuracy": 0.984338641166687, + "accuracy": 0.9889666438102722, "total_bits": 4737212416, "w1": { "group_size": { @@ -28744,7 +28744,7 @@ } }, { - "accuracy": 0.9857434630393982, + "accuracy": 0.9899689555168152, "total_bits": 5093868288, "w1": { "group_size": { @@ -28793,7 +28793,7 @@ } }, { - "accuracy": 0.9910886287689209, + "accuracy": 0.9936877489089966, "total_bits": 5824164608, "w1": { "group_size": { @@ -28836,7 +28836,7 @@ } }, { - "accuracy": 0.9919037818908691, + "accuracy": 0.9942588210105896, "total_bits": 5910044672, "w1": { "group_size": { @@ -28879,7 +28879,7 @@ } }, { - "accuracy": 0.9913383722305298, + "accuracy": 0.9938949346542358, "total_bits": 6006579968, "w1": { "group_size": { @@ -28931,7 +28931,7 @@ } }, { - "accuracy": 0.9924570322036743, + "accuracy": 0.994671106338501, "total_bits": 6122381312, "w1": { "group_size": { @@ -28983,7 +28983,7 @@ } }, { - "accuracy": 0.9956002831459045, + "accuracy": 0.9968831539154053, "total_bits": 7391748864, "w1": { "group_size": { @@ -29035,7 +29035,7 @@ } }, { - "accuracy": 0.9959131479263306, + "accuracy": 0.9970993399620056, "total_bits": 7507550208, "w1": { "group_size": { @@ -29087,7 +29087,7 @@ } }, { - "accuracy": 0.9973711371421814, + "accuracy": 0.9981057047843933, "total_bits": 8550425344, "w1": { "group_size": { @@ -29130,7 +29130,7 @@ } }, { - "accuracy": 0.9975496530532837, + "accuracy": 0.9982528686523438, "total_bits": 8877312000, "w1": { "group_size": { @@ -29179,7 +29179,7 @@ } }, { - "accuracy": 0.9977579116821289, + "accuracy": 0.9983998537063599, "total_bits": 9674229760, "w1": { "group_size": { @@ -29225,7 +29225,7 @@ } }, { - "accuracy": 0.9985640048980713, + "accuracy": 0.9989457726478577, "total_bits": 11318396928, "w1": { "group_size": { @@ -29267,7 +29267,7 @@ ], "model.layers.15.self_attn": [ { - "accuracy": 0.958053469657898, + "accuracy": 0.9685735702514648, "total_bits": 89141248, "q_proj": { "group_size": { @@ -29331,7 +29331,7 @@ } }, { - "accuracy": 0.9592480659484863, + "accuracy": 0.9697738289833069, "total_bits": 91697152, "q_proj": { "group_size": { @@ -29395,7 +29395,7 @@ } }, { - "accuracy": 0.9624778032302856, + "accuracy": 0.9709609746932983, "total_bits": 95234560, "q_proj": { "group_size": { @@ -29459,7 +29459,7 @@ } }, { - "accuracy": 0.9688585996627808, + "accuracy": 0.9743157029151917, "total_bits": 111748096, "q_proj": { "group_size": { @@ -29523,7 +29523,7 @@ } }, { - "accuracy": 0.9791529774665833, + "accuracy": 0.9845354557037354, "total_bits": 132388864, "q_proj": { "group_size": { @@ -29587,7 +29587,7 @@ } }, { - "accuracy": 0.9795318841934204, + "accuracy": 0.9850164651870728, "total_bits": 132455936, "q_proj": { "group_size": { @@ -29651,7 +29651,7 @@ } }, { - "accuracy": 0.9848681092262268, + "accuracy": 0.9879919290542603, "total_bits": 169089024, "q_proj": { "group_size": { @@ -29703,7 +29703,7 @@ } }, { - "accuracy": 0.9853696227073669, + "accuracy": 0.9885489344596863, "total_bits": 169221632, "q_proj": { "group_size": { @@ -29755,7 +29755,7 @@ } }, { - "accuracy": 0.9869399666786194, + "accuracy": 0.9899277091026306, "total_bits": 170671104, "q_proj": { "group_size": { @@ -29807,7 +29807,7 @@ } }, { - "accuracy": 0.9876401424407959, + "accuracy": 0.9904948472976685, "total_bits": 173039616, "q_proj": { "group_size": { @@ -29859,7 +29859,7 @@ } }, { - "accuracy": 0.9892436265945435, + "accuracy": 0.9916188716888428, "total_bits": 174398976, "q_proj": { "group_size": { @@ -29923,7 +29923,7 @@ } }, { - "accuracy": 0.9900521636009216, + "accuracy": 0.9920883178710938, "total_bits": 175225856, "q_proj": { "group_size": { @@ -29987,7 +29987,7 @@ } }, { - "accuracy": 0.9903350472450256, + "accuracy": 0.9923012256622314, "total_bits": 178728960, "q_proj": { "group_size": { @@ -30048,7 +30048,7 @@ } }, { - "accuracy": 0.9913142919540405, + "accuracy": 0.9929690957069397, "total_bits": 181067776, "q_proj": { "group_size": { @@ -30109,7 +30109,7 @@ } }, { - "accuracy": 0.9942039847373962, + "accuracy": 0.9956848621368408, "total_bits": 219944960, "q_proj": { "group_size": { @@ -30170,7 +30170,7 @@ } }, { - "accuracy": 0.9949126839637756, + "accuracy": 0.996168851852417, "total_bits": 223010816, "q_proj": { "group_size": { @@ -30231,7 +30231,7 @@ } }, { - "accuracy": 0.9950622320175171, + "accuracy": 0.9961684346199036, "total_bits": 252975104, "q_proj": { "group_size": { @@ -30283,7 +30283,7 @@ } }, { - "accuracy": 0.9958535432815552, + "accuracy": 0.996844470500946, "total_bits": 265314304, "q_proj": { "group_size": { @@ -30335,7 +30335,7 @@ } }, { - "accuracy": 0.9977034330368042, + "accuracy": 0.9982017874717712, "total_bits": 336861184, "q_proj": { "group_size": { @@ -30389,7 +30389,7 @@ ], "model.layers.15.block_sparse_moe": [ { - "accuracy": 0.9615840911865234, + "accuracy": 0.9724915623664856, "total_bits": 3157926400, "w1": { "group_size": { @@ -30441,7 +30441,7 @@ } }, { - "accuracy": 0.9630401134490967, + "accuracy": 0.9735555648803711, "total_bits": 3268026880, "w1": { "group_size": { @@ -30493,7 +30493,7 @@ } }, { - "accuracy": 0.9677709937095642, + "accuracy": 0.976987898349762, "total_bits": 3652411392, "w1": { "group_size": { @@ -30542,7 +30542,7 @@ } }, { - "accuracy": 0.9689173698425293, + "accuracy": 0.9778191447257996, "total_bits": 4098056192, "w1": { "group_size": { @@ -30591,7 +30591,7 @@ } }, { - "accuracy": 0.981805682182312, + "accuracy": 0.9870067238807678, "total_bits": 4621411072, "w1": { "group_size": { @@ -30643,7 +30643,7 @@ } }, { - "accuracy": 0.9832935333251953, + "accuracy": 0.988048791885376, "total_bits": 4737212416, "w1": { "group_size": { @@ -30695,7 +30695,7 @@ } }, { - "accuracy": 0.984751284122467, + "accuracy": 0.9891059994697571, "total_bits": 5093868288, "w1": { "group_size": { @@ -30744,7 +30744,7 @@ } }, { - "accuracy": 0.9905992150306702, + "accuracy": 0.9932613968849182, "total_bits": 5824164608, "w1": { "group_size": { @@ -30787,7 +30787,7 @@ } }, { - "accuracy": 0.9914324283599854, + "accuracy": 0.99385005235672, "total_bits": 5910044672, "w1": { "group_size": { @@ -30830,7 +30830,7 @@ } }, { - "accuracy": 0.9908004403114319, + "accuracy": 0.993430495262146, "total_bits": 6006579968, "w1": { "group_size": { @@ -30882,7 +30882,7 @@ } }, { - "accuracy": 0.9919835925102234, + "accuracy": 0.9942658543586731, "total_bits": 6122381312, "w1": { "group_size": { @@ -30934,7 +30934,7 @@ } }, { - "accuracy": 0.9953620433807373, + "accuracy": 0.996677577495575, "total_bits": 7391748864, "w1": { "group_size": { @@ -30986,7 +30986,7 @@ } }, { - "accuracy": 0.9957222938537598, + "accuracy": 0.9969316124916077, "total_bits": 7507550208, "w1": { "group_size": { @@ -31038,7 +31038,7 @@ } }, { - "accuracy": 0.9972999095916748, + "accuracy": 0.9980505704879761, "total_bits": 8550425344, "w1": { "group_size": { @@ -31081,7 +31081,7 @@ } }, { - "accuracy": 0.9974563717842102, + "accuracy": 0.9981679916381836, "total_bits": 8877312000, "w1": { "group_size": { @@ -31130,7 +31130,7 @@ } }, { - "accuracy": 0.9976755976676941, + "accuracy": 0.998325526714325, "total_bits": 9674229760, "w1": { "group_size": { @@ -31176,7 +31176,7 @@ } }, { - "accuracy": 0.9985916018486023, + "accuracy": 0.9989656805992126, "total_bits": 11318396928, "w1": { "group_size": { @@ -31218,7 +31218,7 @@ ], "model.layers.16.self_attn": [ { - "accuracy": 0.9581506252288818, + "accuracy": 0.9680315852165222, "total_bits": 89141248, "q_proj": { "group_size": { @@ -31282,7 +31282,7 @@ } }, { - "accuracy": 0.9596182703971863, + "accuracy": 0.9688631892204285, "total_bits": 91697152, "q_proj": { "group_size": { @@ -31346,7 +31346,7 @@ } }, { - "accuracy": 0.9629991054534912, + "accuracy": 0.970557451248169, "total_bits": 95234560, "q_proj": { "group_size": { @@ -31410,7 +31410,7 @@ } }, { - "accuracy": 0.9688444137573242, + "accuracy": 0.973793089389801, "total_bits": 111748096, "q_proj": { "group_size": { @@ -31474,7 +31474,7 @@ } }, { - "accuracy": 0.9779772758483887, + "accuracy": 0.9812387824058533, "total_bits": 132388864, "q_proj": { "group_size": { @@ -31538,7 +31538,7 @@ } }, { - "accuracy": 0.9787436127662659, + "accuracy": 0.9822422862052917, "total_bits": 132455936, "q_proj": { "group_size": { @@ -31602,7 +31602,7 @@ } }, { - "accuracy": 0.9833227396011353, + "accuracy": 0.9839109182357788, "total_bits": 169089024, "q_proj": { "group_size": { @@ -31654,7 +31654,7 @@ } }, { - "accuracy": 0.9842470288276672, + "accuracy": 0.9850091338157654, "total_bits": 169221632, "q_proj": { "group_size": { @@ -31706,7 +31706,7 @@ } }, { - "accuracy": 0.9857291579246521, + "accuracy": 0.988382875919342, "total_bits": 170671104, "q_proj": { "group_size": { @@ -31758,7 +31758,7 @@ } }, { - "accuracy": 0.9863946437835693, + "accuracy": 0.9882296323776245, "total_bits": 173039616, "q_proj": { "group_size": { @@ -31810,7 +31810,7 @@ } }, { - "accuracy": 0.9890793561935425, + "accuracy": 0.9908856153488159, "total_bits": 174398976, "q_proj": { "group_size": { @@ -31874,7 +31874,7 @@ } }, { - "accuracy": 0.9899460077285767, + "accuracy": 0.9914112091064453, "total_bits": 175225856, "q_proj": { "group_size": { @@ -31938,7 +31938,7 @@ } }, { - "accuracy": 0.9901812076568604, + "accuracy": 0.9915235042572021, "total_bits": 178728960, "q_proj": { "group_size": { @@ -31999,7 +31999,7 @@ } }, { - "accuracy": 0.9909204840660095, + "accuracy": 0.9920203685760498, "total_bits": 181067776, "q_proj": { "group_size": { @@ -32060,7 +32060,7 @@ } }, { - "accuracy": 0.9943650364875793, + "accuracy": 0.9947265386581421, "total_bits": 219944960, "q_proj": { "group_size": { @@ -32121,7 +32121,7 @@ } }, { - "accuracy": 0.9947282671928406, + "accuracy": 0.9951575398445129, "total_bits": 223010816, "q_proj": { "group_size": { @@ -32182,7 +32182,7 @@ } }, { - "accuracy": 0.9951823353767395, + "accuracy": 0.9951090216636658, "total_bits": 252975104, "q_proj": { "group_size": { @@ -32234,7 +32234,7 @@ } }, { - "accuracy": 0.995464563369751, + "accuracy": 0.9953833222389221, "total_bits": 265314304, "q_proj": { "group_size": { @@ -32286,7 +32286,7 @@ } }, { - "accuracy": 0.997278094291687, + "accuracy": 0.9969714283943176, "total_bits": 336861184, "q_proj": { "group_size": { @@ -32340,7 +32340,7 @@ ], "model.layers.16.block_sparse_moe": [ { - "accuracy": 0.9595106840133667, + "accuracy": 0.9700701236724854, "total_bits": 3157926400, "w1": { "group_size": { @@ -32392,7 +32392,7 @@ } }, { - "accuracy": 0.9610702991485596, + "accuracy": 0.9712930917739868, "total_bits": 3268026880, "w1": { "group_size": { @@ -32444,7 +32444,7 @@ } }, { - "accuracy": 0.9659238457679749, + "accuracy": 0.9748791456222534, "total_bits": 3652411392, "w1": { "group_size": { @@ -32493,7 +32493,7 @@ } }, { - "accuracy": 0.9671376943588257, + "accuracy": 0.9757848381996155, "total_bits": 4098056192, "w1": { "group_size": { @@ -32542,7 +32542,7 @@ } }, { - "accuracy": 0.9808094501495361, + "accuracy": 0.9858589172363281, "total_bits": 4621411072, "w1": { "group_size": { @@ -32594,7 +32594,7 @@ } }, { - "accuracy": 0.9824081063270569, + "accuracy": 0.987036943435669, "total_bits": 4737212416, "w1": { "group_size": { @@ -32646,7 +32646,7 @@ } }, { - "accuracy": 0.9839260578155518, + "accuracy": 0.9881621599197388, "total_bits": 5093868288, "w1": { "group_size": { @@ -32695,7 +32695,7 @@ } }, { - "accuracy": 0.9900029301643372, + "accuracy": 0.9925986528396606, "total_bits": 5824164608, "w1": { "group_size": { @@ -32738,7 +32738,7 @@ } }, { - "accuracy": 0.9908929467201233, + "accuracy": 0.9932480454444885, "total_bits": 5910044672, "w1": { "group_size": { @@ -32781,7 +32781,7 @@ } }, { - "accuracy": 0.9902854561805725, + "accuracy": 0.9928452372550964, "total_bits": 6006579968, "w1": { "group_size": { @@ -32833,7 +32833,7 @@ } }, { - "accuracy": 0.9915395379066467, + "accuracy": 0.9937549829483032, "total_bits": 6122381312, "w1": { "group_size": { @@ -32885,7 +32885,7 @@ } }, { - "accuracy": 0.9950807690620422, + "accuracy": 0.9963698387145996, "total_bits": 7391748864, "w1": { "group_size": { @@ -32937,7 +32937,7 @@ } }, { - "accuracy": 0.9954476356506348, + "accuracy": 0.9966326951980591, "total_bits": 7507550208, "w1": { "group_size": { @@ -32989,7 +32989,7 @@ } }, { - "accuracy": 0.9970826506614685, + "accuracy": 0.9978351593017578, "total_bits": 8550425344, "w1": { "group_size": { @@ -33032,7 +33032,7 @@ } }, { - "accuracy": 0.9972825646400452, + "accuracy": 0.9979920387268066, "total_bits": 8877312000, "w1": { "group_size": { @@ -33081,7 +33081,7 @@ } }, { - "accuracy": 0.9975087642669678, + "accuracy": 0.9981589317321777, "total_bits": 9674229760, "w1": { "group_size": { @@ -33127,7 +33127,7 @@ } }, { - "accuracy": 0.9984418749809265, + "accuracy": 0.998839795589447, "total_bits": 11318396928, "w1": { "group_size": { @@ -33169,7 +33169,7 @@ ], "model.layers.17.self_attn": [ { - "accuracy": 0.9595319628715515, + "accuracy": 0.9691872000694275, "total_bits": 89141248, "q_proj": { "group_size": { @@ -33233,7 +33233,7 @@ } }, { - "accuracy": 0.9613937735557556, + "accuracy": 0.9706101417541504, "total_bits": 91697152, "q_proj": { "group_size": { @@ -33297,7 +33297,7 @@ } }, { - "accuracy": 0.9646023511886597, + "accuracy": 0.9730053544044495, "total_bits": 95234560, "q_proj": { "group_size": { @@ -33361,7 +33361,7 @@ } }, { - "accuracy": 0.9725027084350586, + "accuracy": 0.9784827828407288, "total_bits": 111748096, "q_proj": { "group_size": { @@ -33425,7 +33425,7 @@ } }, { - "accuracy": 0.9795491099357605, + "accuracy": 0.9839038848876953, "total_bits": 132388864, "q_proj": { "group_size": { @@ -33489,7 +33489,7 @@ } }, { - "accuracy": 0.9799134135246277, + "accuracy": 0.9842501878738403, "total_bits": 132455936, "q_proj": { "group_size": { @@ -33553,7 +33553,7 @@ } }, { - "accuracy": 0.9857494235038757, + "accuracy": 0.9877718687057495, "total_bits": 169089024, "q_proj": { "group_size": { @@ -33605,7 +33605,7 @@ } }, { - "accuracy": 0.9861933588981628, + "accuracy": 0.9881938099861145, "total_bits": 169221632, "q_proj": { "group_size": { @@ -33657,7 +33657,7 @@ } }, { - "accuracy": 0.9873442649841309, + "accuracy": 0.9891313910484314, "total_bits": 170671104, "q_proj": { "group_size": { @@ -33709,7 +33709,7 @@ } }, { - "accuracy": 0.9882708191871643, + "accuracy": 0.9896262288093567, "total_bits": 173039616, "q_proj": { "group_size": { @@ -33761,7 +33761,7 @@ } }, { - "accuracy": 0.9891096949577332, + "accuracy": 0.9913157820701599, "total_bits": 174398976, "q_proj": { "group_size": { @@ -33825,7 +33825,7 @@ } }, { - "accuracy": 0.9898838400840759, + "accuracy": 0.9920133352279663, "total_bits": 175225856, "q_proj": { "group_size": { @@ -33889,7 +33889,7 @@ } }, { - "accuracy": 0.9901673197746277, + "accuracy": 0.9920608997344971, "total_bits": 178728960, "q_proj": { "group_size": { @@ -33950,7 +33950,7 @@ } }, { - "accuracy": 0.990960419178009, + "accuracy": 0.9928755760192871, "total_bits": 181067776, "q_proj": { "group_size": { @@ -34011,7 +34011,7 @@ } }, { - "accuracy": 0.9939663410186768, + "accuracy": 0.99488365650177, "total_bits": 219944960, "q_proj": { "group_size": { @@ -34072,7 +34072,7 @@ } }, { - "accuracy": 0.9944807291030884, + "accuracy": 0.9953413605690002, "total_bits": 223010816, "q_proj": { "group_size": { @@ -34133,7 +34133,7 @@ } }, { - "accuracy": 0.9949051737785339, + "accuracy": 0.9954380989074707, "total_bits": 252975104, "q_proj": { "group_size": { @@ -34185,7 +34185,7 @@ } }, { - "accuracy": 0.9954178929328918, + "accuracy": 0.9958968162536621, "total_bits": 265314304, "q_proj": { "group_size": { @@ -34237,7 +34237,7 @@ } }, { - "accuracy": 0.9962821006774902, + "accuracy": 0.9961065053939819, "total_bits": 336861184, "q_proj": { "group_size": { @@ -34291,7 +34291,7 @@ ], "model.layers.17.block_sparse_moe": [ { - "accuracy": 0.9549010992050171, + "accuracy": 0.9663090109825134, "total_bits": 3157926400, "w1": { "group_size": { @@ -34343,7 +34343,7 @@ } }, { - "accuracy": 0.9566483497619629, + "accuracy": 0.9677081108093262, "total_bits": 3268026880, "w1": { "group_size": { @@ -34395,7 +34395,7 @@ } }, { - "accuracy": 0.9620481133460999, + "accuracy": 0.9717456102371216, "total_bits": 3652411392, "w1": { "group_size": { @@ -34444,7 +34444,7 @@ } }, { - "accuracy": 0.9634366631507874, + "accuracy": 0.9727839231491089, "total_bits": 4098056192, "w1": { "group_size": { @@ -34493,7 +34493,7 @@ } }, { - "accuracy": 0.978663980960846, + "accuracy": 0.9841213226318359, "total_bits": 4621411072, "w1": { "group_size": { @@ -34545,7 +34545,7 @@ } }, { - "accuracy": 0.980421245098114, + "accuracy": 0.9854105710983276, "total_bits": 4737212416, "w1": { "group_size": { @@ -34597,7 +34597,7 @@ } }, { - "accuracy": 0.9821406602859497, + "accuracy": 0.9866950511932373, "total_bits": 5093868288, "w1": { "group_size": { @@ -34646,7 +34646,7 @@ } }, { - "accuracy": 0.9888709187507629, + "accuracy": 0.9916892051696777, "total_bits": 5824164608, "w1": { "group_size": { @@ -34689,7 +34689,7 @@ } }, { - "accuracy": 0.9898770451545715, + "accuracy": 0.9924264550209045, "total_bits": 5910044672, "w1": { "group_size": { @@ -34732,7 +34732,7 @@ } }, { - "accuracy": 0.9892256259918213, + "accuracy": 0.9919806122779846, "total_bits": 6006579968, "w1": { "group_size": { @@ -34784,7 +34784,7 @@ } }, { - "accuracy": 0.9905888438224792, + "accuracy": 0.9929841756820679, "total_bits": 6122381312, "w1": { "group_size": { @@ -34836,7 +34836,7 @@ } }, { - "accuracy": 0.9945380687713623, + "accuracy": 0.9959272146224976, "total_bits": 7391748864, "w1": { "group_size": { @@ -34888,7 +34888,7 @@ } }, { - "accuracy": 0.9949381351470947, + "accuracy": 0.9962245225906372, "total_bits": 7507550208, "w1": { "group_size": { @@ -34940,7 +34940,7 @@ } }, { - "accuracy": 0.9967595338821411, + "accuracy": 0.9975702166557312, "total_bits": 8550425344, "w1": { "group_size": { @@ -34983,7 +34983,7 @@ } }, { - "accuracy": 0.9969754219055176, + "accuracy": 0.9977421760559082, "total_bits": 8877312000, "w1": { "group_size": { @@ -35032,7 +35032,7 @@ } }, { - "accuracy": 0.9972313046455383, + "accuracy": 0.9979322552680969, "total_bits": 9674229760, "w1": { "group_size": { @@ -35078,7 +35078,7 @@ } }, { - "accuracy": 0.9982515573501587, + "accuracy": 0.9986857771873474, "total_bits": 11318396928, "w1": { "group_size": { @@ -35120,7 +35120,7 @@ ], "model.layers.18.self_attn": [ { - "accuracy": 0.9579967260360718, + "accuracy": 0.9670279622077942, "total_bits": 89141248, "q_proj": { "group_size": { @@ -35184,7 +35184,7 @@ } }, { - "accuracy": 0.9600228071212769, + "accuracy": 0.9684842228889465, "total_bits": 91697152, "q_proj": { "group_size": { @@ -35248,7 +35248,7 @@ } }, { - "accuracy": 0.9633283615112305, + "accuracy": 0.9709383845329285, "total_bits": 95234560, "q_proj": { "group_size": { @@ -35312,7 +35312,7 @@ } }, { - "accuracy": 0.9696135520935059, + "accuracy": 0.9752210974693298, "total_bits": 111748096, "q_proj": { "group_size": { @@ -35376,7 +35376,7 @@ } }, { - "accuracy": 0.978668749332428, + "accuracy": 0.9831371307373047, "total_bits": 132388864, "q_proj": { "group_size": { @@ -35440,7 +35440,7 @@ } }, { - "accuracy": 0.9792359471321106, + "accuracy": 0.9837689399719238, "total_bits": 132455936, "q_proj": { "group_size": { @@ -35504,7 +35504,7 @@ } }, { - "accuracy": 0.9844383597373962, + "accuracy": 0.9869332909584045, "total_bits": 169089024, "q_proj": { "group_size": { @@ -35556,7 +35556,7 @@ } }, { - "accuracy": 0.9851445555686951, + "accuracy": 0.9877306818962097, "total_bits": 169221632, "q_proj": { "group_size": { @@ -35608,7 +35608,7 @@ } }, { - "accuracy": 0.986348032951355, + "accuracy": 0.9890187382698059, "total_bits": 170671104, "q_proj": { "group_size": { @@ -35660,7 +35660,7 @@ } }, { - "accuracy": 0.9867903590202332, + "accuracy": 0.9895971417427063, "total_bits": 173039616, "q_proj": { "group_size": { @@ -35712,7 +35712,7 @@ } }, { - "accuracy": 0.9893943667411804, + "accuracy": 0.9914676547050476, "total_bits": 174398976, "q_proj": { "group_size": { @@ -35776,7 +35776,7 @@ } }, { - "accuracy": 0.9901924133300781, + "accuracy": 0.9922512173652649, "total_bits": 175225856, "q_proj": { "group_size": { @@ -35840,7 +35840,7 @@ } }, { - "accuracy": 0.9906132221221924, + "accuracy": 0.992324948310852, "total_bits": 178728960, "q_proj": { "group_size": { @@ -35901,7 +35901,7 @@ } }, { - "accuracy": 0.9914442300796509, + "accuracy": 0.9931394457817078, "total_bits": 181067776, "q_proj": { "group_size": { @@ -35962,7 +35962,7 @@ } }, { - "accuracy": 0.9939210414886475, + "accuracy": 0.9949266314506531, "total_bits": 219944960, "q_proj": { "group_size": { @@ -36023,7 +36023,7 @@ } }, { - "accuracy": 0.9948505759239197, + "accuracy": 0.9957647919654846, "total_bits": 223010816, "q_proj": { "group_size": { @@ -36084,7 +36084,7 @@ } }, { - "accuracy": 0.9947323799133301, + "accuracy": 0.9954313635826111, "total_bits": 252975104, "q_proj": { "group_size": { @@ -36136,7 +36136,7 @@ } }, { - "accuracy": 0.9958082437515259, + "accuracy": 0.9963790774345398, "total_bits": 265314304, "q_proj": { "group_size": { @@ -36188,7 +36188,7 @@ } }, { - "accuracy": 0.996877133846283, + "accuracy": 0.9971405267715454, "total_bits": 336861184, "q_proj": { "group_size": { @@ -36242,7 +36242,7 @@ ], "model.layers.18.block_sparse_moe": [ { - "accuracy": 0.9523232579231262, + "accuracy": 0.9632037281990051, "total_bits": 3157926400, "w1": { "group_size": { @@ -36294,7 +36294,7 @@ } }, { - "accuracy": 0.9542242288589478, + "accuracy": 0.9647365808486938, "total_bits": 3268026880, "w1": { "group_size": { @@ -36346,7 +36346,7 @@ } }, { - "accuracy": 0.9598159193992615, + "accuracy": 0.9690589904785156, "total_bits": 3652411392, "w1": { "group_size": { @@ -36395,7 +36395,7 @@ } }, { - "accuracy": 0.9613534212112427, + "accuracy": 0.970241367816925, "total_bits": 4098056192, "w1": { "group_size": { @@ -36444,7 +36444,7 @@ } }, { - "accuracy": 0.9774121642112732, + "accuracy": 0.982640266418457, "total_bits": 4621411072, "w1": { "group_size": { @@ -36496,7 +36496,7 @@ } }, { - "accuracy": 0.9792407751083374, + "accuracy": 0.9840490221977234, "total_bits": 4737212416, "w1": { "group_size": { @@ -36548,7 +36548,7 @@ } }, { - "accuracy": 0.9810943007469177, + "accuracy": 0.9854649305343628, "total_bits": 5093868288, "w1": { "group_size": { @@ -36597,7 +36597,7 @@ } }, { - "accuracy": 0.9880892038345337, + "accuracy": 0.9908138513565063, "total_bits": 5824164608, "w1": { "group_size": { @@ -36640,7 +36640,7 @@ } }, { - "accuracy": 0.9891789555549622, + "accuracy": 0.9916458129882812, "total_bits": 5910044672, "w1": { "group_size": { @@ -36683,7 +36683,7 @@ } }, { - "accuracy": 0.9885771870613098, + "accuracy": 0.9912212491035461, "total_bits": 6006579968, "w1": { "group_size": { @@ -36735,7 +36735,7 @@ } }, { - "accuracy": 0.9900113344192505, + "accuracy": 0.992317795753479, "total_bits": 6122381312, "w1": { "group_size": { @@ -36787,7 +36787,7 @@ } }, { - "accuracy": 0.9941999912261963, + "accuracy": 0.9955354332923889, "total_bits": 7391748864, "w1": { "group_size": { @@ -36839,7 +36839,7 @@ } }, { - "accuracy": 0.9946197271347046, + "accuracy": 0.9958523511886597, "total_bits": 7507550208, "w1": { "group_size": { @@ -36891,7 +36891,7 @@ } }, { - "accuracy": 0.9965316653251648, + "accuracy": 0.9973154664039612, "total_bits": 8550425344, "w1": { "group_size": { @@ -36934,7 +36934,7 @@ } }, { - "accuracy": 0.9967958927154541, + "accuracy": 0.9975311160087585, "total_bits": 8877312000, "w1": { "group_size": { @@ -36983,7 +36983,7 @@ } }, { - "accuracy": 0.9970734715461731, + "accuracy": 0.997742772102356, "total_bits": 9674229760, "w1": { "group_size": { @@ -37029,7 +37029,7 @@ } }, { - "accuracy": 0.9981452822685242, + "accuracy": 0.9985583424568176, "total_bits": 11318396928, "w1": { "group_size": { @@ -37071,7 +37071,7 @@ ], "model.layers.19.self_attn": [ { - "accuracy": 0.9589055776596069, + "accuracy": 0.9662318229675293, "total_bits": 89141248, "q_proj": { "group_size": { @@ -37135,7 +37135,7 @@ } }, { - "accuracy": 0.960173487663269, + "accuracy": 0.9673289656639099, "total_bits": 91697152, "q_proj": { "group_size": { @@ -37199,7 +37199,7 @@ } }, { - "accuracy": 0.9633458256721497, + "accuracy": 0.9689175486564636, "total_bits": 95234560, "q_proj": { "group_size": { @@ -37263,7 +37263,7 @@ } }, { - "accuracy": 0.9694567918777466, + "accuracy": 0.9727931618690491, "total_bits": 111748096, "q_proj": { "group_size": { @@ -37327,7 +37327,7 @@ } }, { - "accuracy": 0.9789343476295471, + "accuracy": 0.9818347096443176, "total_bits": 132388864, "q_proj": { "group_size": { @@ -37391,7 +37391,7 @@ } }, { - "accuracy": 0.9792435765266418, + "accuracy": 0.9828349947929382, "total_bits": 132455936, "q_proj": { "group_size": { @@ -37455,7 +37455,7 @@ } }, { - "accuracy": 0.9847554564476013, + "accuracy": 0.9855864644050598, "total_bits": 169089024, "q_proj": { "group_size": { @@ -37507,7 +37507,7 @@ } }, { - "accuracy": 0.9852094650268555, + "accuracy": 0.9867772459983826, "total_bits": 169221632, "q_proj": { "group_size": { @@ -37559,7 +37559,7 @@ } }, { - "accuracy": 0.9877792596817017, + "accuracy": 0.9888065457344055, "total_bits": 170671104, "q_proj": { "group_size": { @@ -37611,7 +37611,7 @@ } }, { - "accuracy": 0.9883403778076172, + "accuracy": 0.989484429359436, "total_bits": 173039616, "q_proj": { "group_size": { @@ -37663,7 +37663,7 @@ } }, { - "accuracy": 0.989867627620697, + "accuracy": 0.9917547106742859, "total_bits": 174398976, "q_proj": { "group_size": { @@ -37727,7 +37727,7 @@ } }, { - "accuracy": 0.9906592965126038, + "accuracy": 0.9922924041748047, "total_bits": 175225856, "q_proj": { "group_size": { @@ -37791,7 +37791,7 @@ } }, { - "accuracy": 0.9911845326423645, + "accuracy": 0.9925397634506226, "total_bits": 178728960, "q_proj": { "group_size": { @@ -37852,7 +37852,7 @@ } }, { - "accuracy": 0.9917784333229065, + "accuracy": 0.9931692481040955, "total_bits": 181067776, "q_proj": { "group_size": { @@ -37913,7 +37913,7 @@ } }, { - "accuracy": 0.9946079850196838, + "accuracy": 0.9953667521476746, "total_bits": 219944960, "q_proj": { "group_size": { @@ -37974,7 +37974,7 @@ } }, { - "accuracy": 0.9949389100074768, + "accuracy": 0.995967447757721, "total_bits": 223010816, "q_proj": { "group_size": { @@ -38035,7 +38035,7 @@ } }, { - "accuracy": 0.9955418705940247, + "accuracy": 0.9959610104560852, "total_bits": 252975104, "q_proj": { "group_size": { @@ -38087,7 +38087,7 @@ } }, { - "accuracy": 0.9958492517471313, + "accuracy": 0.9965717196464539, "total_bits": 265314304, "q_proj": { "group_size": { @@ -38139,7 +38139,7 @@ } }, { - "accuracy": 0.9972662925720215, + "accuracy": 0.9975069165229797, "total_bits": 336861184, "q_proj": { "group_size": { @@ -38193,7 +38193,7 @@ ], "model.layers.19.block_sparse_moe": [ { - "accuracy": 0.9524264335632324, + "accuracy": 0.9621105194091797, "total_bits": 3157926400, "w1": { "group_size": { @@ -38245,7 +38245,7 @@ } }, { - "accuracy": 0.9543206691741943, + "accuracy": 0.9636759757995605, "total_bits": 3268026880, "w1": { "group_size": { @@ -38297,7 +38297,7 @@ } }, { - "accuracy": 0.9596825242042542, + "accuracy": 0.9679561853408813, "total_bits": 3652411392, "w1": { "group_size": { @@ -38346,7 +38346,7 @@ } }, { - "accuracy": 0.9611428380012512, + "accuracy": 0.969100832939148, "total_bits": 4098056192, "w1": { "group_size": { @@ -38395,7 +38395,7 @@ } }, { - "accuracy": 0.9775032997131348, + "accuracy": 0.9821586012840271, "total_bits": 4621411072, "w1": { "group_size": { @@ -38447,7 +38447,7 @@ } }, { - "accuracy": 0.9792945981025696, + "accuracy": 0.9835729002952576, "total_bits": 4737212416, "w1": { "group_size": { @@ -38499,7 +38499,7 @@ } }, { - "accuracy": 0.9810807704925537, + "accuracy": 0.9849833846092224, "total_bits": 5093868288, "w1": { "group_size": { @@ -38548,7 +38548,7 @@ } }, { - "accuracy": 0.9881852865219116, + "accuracy": 0.9906113743782043, "total_bits": 5824164608, "w1": { "group_size": { @@ -38591,7 +38591,7 @@ } }, { - "accuracy": 0.9892448782920837, + "accuracy": 0.9914286136627197, "total_bits": 5910044672, "w1": { "group_size": { @@ -38634,7 +38634,7 @@ } }, { - "accuracy": 0.988628625869751, + "accuracy": 0.9909750819206238, "total_bits": 6006579968, "w1": { "group_size": { @@ -38686,7 +38686,7 @@ } }, { - "accuracy": 0.9900281429290771, + "accuracy": 0.9920880794525146, "total_bits": 6122381312, "w1": { "group_size": { @@ -38738,7 +38738,7 @@ } }, { - "accuracy": 0.9942350387573242, + "accuracy": 0.995413064956665, "total_bits": 7391748864, "w1": { "group_size": { @@ -38790,7 +38790,7 @@ } }, { - "accuracy": 0.9946066737174988, + "accuracy": 0.9957062602043152, "total_bits": 7507550208, "w1": { "group_size": { @@ -38842,7 +38842,7 @@ } }, { - "accuracy": 0.996561586856842, + "accuracy": 0.9972432255744934, "total_bits": 8550425344, "w1": { "group_size": { @@ -38885,7 +38885,7 @@ } }, { - "accuracy": 0.9968079328536987, + "accuracy": 0.9974492192268372, "total_bits": 8877312000, "w1": { "group_size": { @@ -38934,7 +38934,7 @@ } }, { - "accuracy": 0.9970746040344238, + "accuracy": 0.997657835483551, "total_bits": 9674229760, "w1": { "group_size": { @@ -38980,7 +38980,7 @@ } }, { - "accuracy": 0.9981474876403809, + "accuracy": 0.9984923601150513, "total_bits": 11318396928, "w1": { "group_size": { @@ -39022,7 +39022,7 @@ ], "model.layers.20.self_attn": [ { - "accuracy": 0.9634772539138794, + "accuracy": 0.969146728515625, "total_bits": 89141248, "q_proj": { "group_size": { @@ -39086,7 +39086,7 @@ } }, { - "accuracy": 0.9650835394859314, + "accuracy": 0.9702212810516357, "total_bits": 91697152, "q_proj": { "group_size": { @@ -39150,7 +39150,7 @@ } }, { - "accuracy": 0.968682050704956, + "accuracy": 0.9730066061019897, "total_bits": 95234560, "q_proj": { "group_size": { @@ -39214,7 +39214,7 @@ } }, { - "accuracy": 0.9751076698303223, + "accuracy": 0.9775748252868652, "total_bits": 111748096, "q_proj": { "group_size": { @@ -39278,7 +39278,7 @@ } }, { - "accuracy": 0.9818992614746094, + "accuracy": 0.9847007393836975, "total_bits": 132388864, "q_proj": { "group_size": { @@ -39342,7 +39342,7 @@ } }, { - "accuracy": 0.9823118448257446, + "accuracy": 0.9851861596107483, "total_bits": 132455936, "q_proj": { "group_size": { @@ -39406,7 +39406,7 @@ } }, { - "accuracy": 0.9873146414756775, + "accuracy": 0.9884177446365356, "total_bits": 169089024, "q_proj": { "group_size": { @@ -39458,7 +39458,7 @@ } }, { - "accuracy": 0.9878366589546204, + "accuracy": 0.9890477061271667, "total_bits": 169221632, "q_proj": { "group_size": { @@ -39510,7 +39510,7 @@ } }, { - "accuracy": 0.989426851272583, + "accuracy": 0.9911777973175049, "total_bits": 170671104, "q_proj": { "group_size": { @@ -39562,7 +39562,7 @@ } }, { - "accuracy": 0.9902719855308533, + "accuracy": 0.9917161464691162, "total_bits": 173039616, "q_proj": { "group_size": { @@ -39614,7 +39614,7 @@ } }, { - "accuracy": 0.9907694458961487, + "accuracy": 0.9920873641967773, "total_bits": 174398976, "q_proj": { "group_size": { @@ -39678,7 +39678,7 @@ } }, { - "accuracy": 0.9916902780532837, + "accuracy": 0.9927124977111816, "total_bits": 175225856, "q_proj": { "group_size": { @@ -39742,7 +39742,7 @@ } }, { - "accuracy": 0.991834282875061, + "accuracy": 0.9927735328674316, "total_bits": 178728960, "q_proj": { "group_size": { @@ -39803,7 +39803,7 @@ } }, { - "accuracy": 0.9928995966911316, + "accuracy": 0.9935130476951599, "total_bits": 181067776, "q_proj": { "group_size": { @@ -39864,7 +39864,7 @@ } }, { - "accuracy": 0.9948800206184387, + "accuracy": 0.9946961402893066, "total_bits": 219944960, "q_proj": { "group_size": { @@ -39925,7 +39925,7 @@ } }, { - "accuracy": 0.995415985584259, + "accuracy": 0.9955303072929382, "total_bits": 223010816, "q_proj": { "group_size": { @@ -39986,7 +39986,7 @@ } }, { - "accuracy": 0.9957172870635986, + "accuracy": 0.9951815605163574, "total_bits": 252975104, "q_proj": { "group_size": { @@ -40038,7 +40038,7 @@ } }, { - "accuracy": 0.9963241815567017, + "accuracy": 0.9958614110946655, "total_bits": 265314304, "q_proj": { "group_size": { @@ -40090,7 +40090,7 @@ } }, { - "accuracy": 0.9962058663368225, + "accuracy": 0.9959267377853394, "total_bits": 336861184, "q_proj": { "group_size": { @@ -40144,7 +40144,7 @@ ], "model.layers.20.block_sparse_moe": [ { - "accuracy": 0.9532937407493591, + "accuracy": 0.962333619594574, "total_bits": 3157926400, "w1": { "group_size": { @@ -40196,7 +40196,7 @@ } }, { - "accuracy": 0.9549475908279419, + "accuracy": 0.9637037515640259, "total_bits": 3268026880, "w1": { "group_size": { @@ -40248,7 +40248,7 @@ } }, { - "accuracy": 0.9598697423934937, + "accuracy": 0.9677062034606934, "total_bits": 3652411392, "w1": { "group_size": { @@ -40297,7 +40297,7 @@ } }, { - "accuracy": 0.9611411094665527, + "accuracy": 0.9687215685844421, "total_bits": 4098056192, "w1": { "group_size": { @@ -40346,7 +40346,7 @@ } }, { - "accuracy": 0.9776734709739685, + "accuracy": 0.9821088314056396, "total_bits": 4621411072, "w1": { "group_size": { @@ -40398,7 +40398,7 @@ } }, { - "accuracy": 0.9794823527336121, + "accuracy": 0.9835208058357239, "total_bits": 4737212416, "w1": { "group_size": { @@ -40450,7 +40450,7 @@ } }, { - "accuracy": 0.9810509085655212, + "accuracy": 0.9847807288169861, "total_bits": 5093868288, "w1": { "group_size": { @@ -40499,7 +40499,7 @@ } }, { - "accuracy": 0.9884899854660034, + "accuracy": 0.9907661080360413, "total_bits": 5824164608, "w1": { "group_size": { @@ -40542,7 +40542,7 @@ } }, { - "accuracy": 0.9895279407501221, + "accuracy": 0.9915772080421448, "total_bits": 5910044672, "w1": { "group_size": { @@ -40585,7 +40585,7 @@ } }, { - "accuracy": 0.9887274503707886, + "accuracy": 0.9909729957580566, "total_bits": 6006579968, "w1": { "group_size": { @@ -40637,7 +40637,7 @@ } }, { - "accuracy": 0.9901790618896484, + "accuracy": 0.9921181201934814, "total_bits": 6122381312, "w1": { "group_size": { @@ -40689,7 +40689,7 @@ } }, { - "accuracy": 0.9943393468856812, + "accuracy": 0.9954683780670166, "total_bits": 7391748864, "w1": { "group_size": { @@ -40741,7 +40741,7 @@ } }, { - "accuracy": 0.9948311448097229, + "accuracy": 0.9958519339561462, "total_bits": 7507550208, "w1": { "group_size": { @@ -40793,7 +40793,7 @@ } }, { - "accuracy": 0.9967612624168396, + "accuracy": 0.997406542301178, "total_bits": 8550425344, "w1": { "group_size": { @@ -40836,7 +40836,7 @@ } }, { - "accuracy": 0.9969462752342224, + "accuracy": 0.9975586533546448, "total_bits": 8877312000, "w1": { "group_size": { @@ -40885,7 +40885,7 @@ } }, { - "accuracy": 0.9971849322319031, + "accuracy": 0.9977503418922424, "total_bits": 9674229760, "w1": { "group_size": { @@ -40931,7 +40931,7 @@ } }, { - "accuracy": 0.9983999729156494, + "accuracy": 0.9987314939498901, "total_bits": 11318396928, "w1": { "group_size": { @@ -40973,7 +40973,7 @@ ], "model.layers.21.self_attn": [ { - "accuracy": 0.96848464012146, + "accuracy": 0.9745268821716309, "total_bits": 89141248, "q_proj": { "group_size": { @@ -41037,7 +41037,7 @@ } }, { - "accuracy": 0.9701430797576904, + "accuracy": 0.9759386777877808, "total_bits": 91697152, "q_proj": { "group_size": { @@ -41101,7 +41101,7 @@ } }, { - "accuracy": 0.972919225692749, + "accuracy": 0.9782657623291016, "total_bits": 95234560, "q_proj": { "group_size": { @@ -41165,7 +41165,7 @@ } }, { - "accuracy": 0.9787170886993408, + "accuracy": 0.982458233833313, "total_bits": 111748096, "q_proj": { "group_size": { @@ -41229,7 +41229,7 @@ } }, { - "accuracy": 0.9842249751091003, + "accuracy": 0.9867966175079346, "total_bits": 132388864, "q_proj": { "group_size": { @@ -41293,7 +41293,7 @@ } }, { - "accuracy": 0.984380841255188, + "accuracy": 0.9870901107788086, "total_bits": 132455936, "q_proj": { "group_size": { @@ -41357,7 +41357,7 @@ } }, { - "accuracy": 0.9888150691986084, + "accuracy": 0.9901556968688965, "total_bits": 169089024, "q_proj": { "group_size": { @@ -41409,7 +41409,7 @@ } }, { - "accuracy": 0.9889877438545227, + "accuracy": 0.9905080199241638, "total_bits": 169221632, "q_proj": { "group_size": { @@ -41461,7 +41461,7 @@ } }, { - "accuracy": 0.9907020330429077, + "accuracy": 0.9922755360603333, "total_bits": 170671104, "q_proj": { "group_size": { @@ -41513,7 +41513,7 @@ } }, { - "accuracy": 0.9910759925842285, + "accuracy": 0.9927313923835754, "total_bits": 173039616, "q_proj": { "group_size": { @@ -41565,7 +41565,7 @@ } }, { - "accuracy": 0.9921764135360718, + "accuracy": 0.9934518337249756, "total_bits": 174398976, "q_proj": { "group_size": { @@ -41629,7 +41629,7 @@ } }, { - "accuracy": 0.992807924747467, + "accuracy": 0.9941091537475586, "total_bits": 175225856, "q_proj": { "group_size": { @@ -41693,7 +41693,7 @@ } }, { - "accuracy": 0.9931393265724182, + "accuracy": 0.9941831231117249, "total_bits": 178728960, "q_proj": { "group_size": { @@ -41754,7 +41754,7 @@ } }, { - "accuracy": 0.99369877576828, + "accuracy": 0.994659960269928, "total_bits": 181067776, "q_proj": { "group_size": { @@ -41815,7 +41815,7 @@ } }, { - "accuracy": 0.9959864020347595, + "accuracy": 0.996533215045929, "total_bits": 219944960, "q_proj": { "group_size": { @@ -41876,7 +41876,7 @@ } }, { - "accuracy": 0.9963775277137756, + "accuracy": 0.9969742298126221, "total_bits": 223010816, "q_proj": { "group_size": { @@ -41937,7 +41937,7 @@ } }, { - "accuracy": 0.9967642426490784, + "accuracy": 0.9970519542694092, "total_bits": 252975104, "q_proj": { "group_size": { @@ -41989,7 +41989,7 @@ } }, { - "accuracy": 0.9972371459007263, + "accuracy": 0.9976260662078857, "total_bits": 265314304, "q_proj": { "group_size": { @@ -42041,7 +42041,7 @@ } }, { - "accuracy": 0.9980166554450989, + "accuracy": 0.9982601404190063, "total_bits": 336861184, "q_proj": { "group_size": { @@ -42095,7 +42095,7 @@ ], "model.layers.21.block_sparse_moe": [ { - "accuracy": 0.9546959400177002, + "accuracy": 0.9633269906044006, "total_bits": 3157926400, "w1": { "group_size": { @@ -42147,7 +42147,7 @@ } }, { - "accuracy": 0.9562362432479858, + "accuracy": 0.9645806550979614, "total_bits": 3268026880, "w1": { "group_size": { @@ -42199,7 +42199,7 @@ } }, { - "accuracy": 0.9607701301574707, + "accuracy": 0.9683008193969727, "total_bits": 3652411392, "w1": { "group_size": { @@ -42248,7 +42248,7 @@ } }, { - "accuracy": 0.9619214534759521, + "accuracy": 0.9692257046699524, "total_bits": 4098056192, "w1": { "group_size": { @@ -42297,7 +42297,7 @@ } }, { - "accuracy": 0.978273332118988, + "accuracy": 0.9824628829956055, "total_bits": 4621411072, "w1": { "group_size": { @@ -42349,7 +42349,7 @@ } }, { - "accuracy": 0.9800240397453308, + "accuracy": 0.9838845133781433, "total_bits": 4737212416, "w1": { "group_size": { @@ -42401,7 +42401,7 @@ } }, { - "accuracy": 0.9814215302467346, + "accuracy": 0.98500657081604, "total_bits": 5093868288, "w1": { "group_size": { @@ -42450,7 +42450,7 @@ } }, { - "accuracy": 0.9888672232627869, + "accuracy": 0.9910184144973755, "total_bits": 5824164608, "w1": { "group_size": { @@ -42493,7 +42493,7 @@ } }, { - "accuracy": 0.9898719191551208, + "accuracy": 0.9918175935745239, "total_bits": 5910044672, "w1": { "group_size": { @@ -42536,7 +42536,7 @@ } }, { - "accuracy": 0.9890280961990356, + "accuracy": 0.9911359548568726, "total_bits": 6006579968, "w1": { "group_size": { @@ -42588,7 +42588,7 @@ } }, { - "accuracy": 0.9904477596282959, + "accuracy": 0.9922844767570496, "total_bits": 6122381312, "w1": { "group_size": { @@ -42640,7 +42640,7 @@ } }, { - "accuracy": 0.9944820404052734, + "accuracy": 0.9955405592918396, "total_bits": 7391748864, "w1": { "group_size": { @@ -42692,7 +42692,7 @@ } }, { - "accuracy": 0.9949774146080017, + "accuracy": 0.9959409236907959, "total_bits": 7507550208, "w1": { "group_size": { @@ -42744,7 +42744,7 @@ } }, { - "accuracy": 0.9968459010124207, + "accuracy": 0.9974434971809387, "total_bits": 8550425344, "w1": { "group_size": { @@ -42787,7 +42787,7 @@ } }, { - "accuracy": 0.9970060586929321, + "accuracy": 0.9975698590278625, "total_bits": 8877312000, "w1": { "group_size": { @@ -42836,7 +42836,7 @@ } }, { - "accuracy": 0.9972130060195923, + "accuracy": 0.9977347254753113, "total_bits": 9674229760, "w1": { "group_size": { @@ -42882,7 +42882,7 @@ } }, { - "accuracy": 0.9984056353569031, + "accuracy": 0.9986902475357056, "total_bits": 11318396928, "w1": { "group_size": { @@ -42924,7 +42924,7 @@ ], "model.layers.22.self_attn": [ { - "accuracy": 0.9738297462463379, + "accuracy": 0.9783673286437988, "total_bits": 89141248, "q_proj": { "group_size": { @@ -42988,7 +42988,7 @@ } }, { - "accuracy": 0.9757354855537415, + "accuracy": 0.9794034957885742, "total_bits": 91697152, "q_proj": { "group_size": { @@ -43052,7 +43052,7 @@ } }, { - "accuracy": 0.9781677722930908, + "accuracy": 0.981055498123169, "total_bits": 95234560, "q_proj": { "group_size": { @@ -43116,7 +43116,7 @@ } }, { - "accuracy": 0.9820642471313477, + "accuracy": 0.9839292764663696, "total_bits": 111748096, "q_proj": { "group_size": { @@ -43180,7 +43180,7 @@ } }, { - "accuracy": 0.986693799495697, + "accuracy": 0.9875088930130005, "total_bits": 132388864, "q_proj": { "group_size": { @@ -43244,7 +43244,7 @@ } }, { - "accuracy": 0.9873584508895874, + "accuracy": 0.9888669848442078, "total_bits": 132455936, "q_proj": { "group_size": { @@ -43308,7 +43308,7 @@ } }, { - "accuracy": 0.9899799227714539, + "accuracy": 0.9894881844520569, "total_bits": 169089024, "q_proj": { "group_size": { @@ -43360,7 +43360,7 @@ } }, { - "accuracy": 0.990985631942749, + "accuracy": 0.9911864995956421, "total_bits": 169221632, "q_proj": { "group_size": { @@ -43412,7 +43412,7 @@ } }, { - "accuracy": 0.992416262626648, + "accuracy": 0.9926562905311584, "total_bits": 170671104, "q_proj": { "group_size": { @@ -43464,7 +43464,7 @@ } }, { - "accuracy": 0.9927545785903931, + "accuracy": 0.9929838180541992, "total_bits": 173039616, "q_proj": { "group_size": { @@ -43516,7 +43516,7 @@ } }, { - "accuracy": 0.9935746192932129, + "accuracy": 0.9944120645523071, "total_bits": 174398976, "q_proj": { "group_size": { @@ -43580,7 +43580,7 @@ } }, { - "accuracy": 0.9941512942314148, + "accuracy": 0.9951131343841553, "total_bits": 175225856, "q_proj": { "group_size": { @@ -43644,7 +43644,7 @@ } }, { - "accuracy": 0.9942859411239624, + "accuracy": 0.9950115084648132, "total_bits": 178728960, "q_proj": { "group_size": { @@ -43705,7 +43705,7 @@ } }, { - "accuracy": 0.994888424873352, + "accuracy": 0.9955363273620605, "total_bits": 181067776, "q_proj": { "group_size": { @@ -43766,7 +43766,7 @@ } }, { - "accuracy": 0.996281623840332, + "accuracy": 0.9965630173683167, "total_bits": 219944960, "q_proj": { "group_size": { @@ -43827,7 +43827,7 @@ } }, { - "accuracy": 0.9968441724777222, + "accuracy": 0.9972029328346252, "total_bits": 223010816, "q_proj": { "group_size": { @@ -43888,7 +43888,7 @@ } }, { - "accuracy": 0.9967656135559082, + "accuracy": 0.9968717694282532, "total_bits": 252975104, "q_proj": { "group_size": { @@ -43940,7 +43940,7 @@ } }, { - "accuracy": 0.9974201917648315, + "accuracy": 0.9976836442947388, "total_bits": 265314304, "q_proj": { "group_size": { @@ -43992,7 +43992,7 @@ } }, { - "accuracy": 0.9978474378585815, + "accuracy": 0.9977259635925293, "total_bits": 336861184, "q_proj": { "group_size": { @@ -44046,7 +44046,7 @@ ], "model.layers.22.block_sparse_moe": [ { - "accuracy": 0.952915608882904, + "accuracy": 0.9620071649551392, "total_bits": 3157926400, "w1": { "group_size": { @@ -44098,7 +44098,7 @@ } }, { - "accuracy": 0.9544792771339417, + "accuracy": 0.9632707238197327, "total_bits": 3268026880, "w1": { "group_size": { @@ -44150,7 +44150,7 @@ } }, { - "accuracy": 0.9590243101119995, + "accuracy": 0.9669998288154602, "total_bits": 3652411392, "w1": { "group_size": { @@ -44199,7 +44199,7 @@ } }, { - "accuracy": 0.960167407989502, + "accuracy": 0.9679166078567505, "total_bits": 4098056192, "w1": { "group_size": { @@ -44248,7 +44248,7 @@ } }, { - "accuracy": 0.9773557186126709, + "accuracy": 0.9817866683006287, "total_bits": 4621411072, "w1": { "group_size": { @@ -44300,7 +44300,7 @@ } }, { - "accuracy": 0.9791630506515503, + "accuracy": 0.98322594165802, "total_bits": 4737212416, "w1": { "group_size": { @@ -44352,7 +44352,7 @@ } }, { - "accuracy": 0.9805288910865784, + "accuracy": 0.9843218326568604, "total_bits": 5093868288, "w1": { "group_size": { @@ -44401,7 +44401,7 @@ } }, { - "accuracy": 0.9884136319160461, + "accuracy": 0.9906919598579407, "total_bits": 5824164608, "w1": { "group_size": { @@ -44444,7 +44444,7 @@ } }, { - "accuracy": 0.989465057849884, + "accuracy": 0.9915183186531067, "total_bits": 5910044672, "w1": { "group_size": { @@ -44487,7 +44487,7 @@ } }, { - "accuracy": 0.9885719418525696, + "accuracy": 0.9908120036125183, "total_bits": 6006579968, "w1": { "group_size": { @@ -44539,7 +44539,7 @@ } }, { - "accuracy": 0.9900400638580322, + "accuracy": 0.9919865131378174, "total_bits": 6122381312, "w1": { "group_size": { @@ -44591,7 +44591,7 @@ } }, { - "accuracy": 0.9942722320556641, + "accuracy": 0.9953980445861816, "total_bits": 7391748864, "w1": { "group_size": { @@ -44643,7 +44643,7 @@ } }, { - "accuracy": 0.9948099255561829, + "accuracy": 0.9958260655403137, "total_bits": 7507550208, "w1": { "group_size": { @@ -44695,7 +44695,7 @@ } }, { - "accuracy": 0.9967776536941528, + "accuracy": 0.9974144697189331, "total_bits": 8550425344, "w1": { "group_size": { @@ -44738,7 +44738,7 @@ } }, { - "accuracy": 0.9969325065612793, + "accuracy": 0.9975391626358032, "total_bits": 8877312000, "w1": { "group_size": { @@ -44787,7 +44787,7 @@ } }, { - "accuracy": 0.997136652469635, + "accuracy": 0.9977033138275146, "total_bits": 9674229760, "w1": { "group_size": { @@ -44833,7 +44833,7 @@ } }, { - "accuracy": 0.998450517654419, + "accuracy": 0.998771607875824, "total_bits": 11318396928, "w1": { "group_size": { @@ -44875,7 +44875,7 @@ ], "model.layers.23.self_attn": [ { - "accuracy": 0.9714788794517517, + "accuracy": 0.9764301180839539, "total_bits": 89141248, "q_proj": { "group_size": { @@ -44939,7 +44939,7 @@ } }, { - "accuracy": 0.9731444120407104, + "accuracy": 0.9774771928787231, "total_bits": 91697152, "q_proj": { "group_size": { @@ -45003,7 +45003,7 @@ } }, { - "accuracy": 0.9758467078208923, + "accuracy": 0.9795534014701843, "total_bits": 95234560, "q_proj": { "group_size": { @@ -45067,7 +45067,7 @@ } }, { - "accuracy": 0.9799731373786926, + "accuracy": 0.982509195804596, "total_bits": 111748096, "q_proj": { "group_size": { @@ -45131,7 +45131,7 @@ } }, { - "accuracy": 0.9862102270126343, + "accuracy": 0.9883545637130737, "total_bits": 132388864, "q_proj": { "group_size": { @@ -45195,7 +45195,7 @@ } }, { - "accuracy": 0.9867540001869202, + "accuracy": 0.9888776540756226, "total_bits": 132455936, "q_proj": { "group_size": { @@ -45259,7 +45259,7 @@ } }, { - "accuracy": 0.9900797009468079, + "accuracy": 0.9910563826560974, "total_bits": 169089024, "q_proj": { "group_size": { @@ -45311,7 +45311,7 @@ } }, { - "accuracy": 0.9908032417297363, + "accuracy": 0.9917600750923157, "total_bits": 169221632, "q_proj": { "group_size": { @@ -45363,7 +45363,7 @@ } }, { - "accuracy": 0.9917871356010437, + "accuracy": 0.9927949905395508, "total_bits": 170671104, "q_proj": { "group_size": { @@ -45415,7 +45415,7 @@ } }, { - "accuracy": 0.9922888875007629, + "accuracy": 0.9930222034454346, "total_bits": 173039616, "q_proj": { "group_size": { @@ -45467,7 +45467,7 @@ } }, { - "accuracy": 0.9931047558784485, + "accuracy": 0.9941404461860657, "total_bits": 174398976, "q_proj": { "group_size": { @@ -45531,7 +45531,7 @@ } }, { - "accuracy": 0.9935341477394104, + "accuracy": 0.9945935010910034, "total_bits": 175225856, "q_proj": { "group_size": { @@ -45595,7 +45595,7 @@ } }, { - "accuracy": 0.993904173374176, + "accuracy": 0.9948195815086365, "total_bits": 178728960, "q_proj": { "group_size": { @@ -45656,7 +45656,7 @@ } }, { - "accuracy": 0.9943587779998779, + "accuracy": 0.9950734376907349, "total_bits": 181067776, "q_proj": { "group_size": { @@ -45717,7 +45717,7 @@ } }, { - "accuracy": 0.9964364767074585, + "accuracy": 0.9967532157897949, "total_bits": 219944960, "q_proj": { "group_size": { @@ -45778,7 +45778,7 @@ } }, { - "accuracy": 0.9966956377029419, + "accuracy": 0.9971526861190796, "total_bits": 223010816, "q_proj": { "group_size": { @@ -45839,7 +45839,7 @@ } }, { - "accuracy": 0.9970226287841797, + "accuracy": 0.997127890586853, "total_bits": 252975104, "q_proj": { "group_size": { @@ -45891,7 +45891,7 @@ } }, { - "accuracy": 0.9973453879356384, + "accuracy": 0.9976409673690796, "total_bits": 265314304, "q_proj": { "group_size": { @@ -45943,7 +45943,7 @@ } }, { - "accuracy": 0.9982138872146606, + "accuracy": 0.9982187747955322, "total_bits": 336861184, "q_proj": { "group_size": { @@ -45997,7 +45997,7 @@ ], "model.layers.23.block_sparse_moe": [ { - "accuracy": 0.9515018463134766, + "accuracy": 0.960975170135498, "total_bits": 3157926400, "w1": { "group_size": { @@ -46049,7 +46049,7 @@ } }, { - "accuracy": 0.9531065821647644, + "accuracy": 0.962265133857727, "total_bits": 3268026880, "w1": { "group_size": { @@ -46101,7 +46101,7 @@ } }, { - "accuracy": 0.9576147198677063, + "accuracy": 0.9659532308578491, "total_bits": 3652411392, "w1": { "group_size": { @@ -46150,7 +46150,7 @@ } }, { - "accuracy": 0.9587604999542236, + "accuracy": 0.9668650031089783, "total_bits": 4098056192, "w1": { "group_size": { @@ -46199,7 +46199,7 @@ } }, { - "accuracy": 0.9765921235084534, + "accuracy": 0.9812326431274414, "total_bits": 4621411072, "w1": { "group_size": { @@ -46251,7 +46251,7 @@ } }, { - "accuracy": 0.9784584045410156, + "accuracy": 0.9827163219451904, "total_bits": 4737212416, "w1": { "group_size": { @@ -46303,7 +46303,7 @@ } }, { - "accuracy": 0.9798051714897156, + "accuracy": 0.9837918877601624, "total_bits": 5093868288, "w1": { "group_size": { @@ -46352,7 +46352,7 @@ } }, { - "accuracy": 0.9880173802375793, + "accuracy": 0.9904012084007263, "total_bits": 5824164608, "w1": { "group_size": { @@ -46395,7 +46395,7 @@ } }, { - "accuracy": 0.9891153573989868, + "accuracy": 0.9912562370300293, "total_bits": 5910044672, "w1": { "group_size": { @@ -46438,7 +46438,7 @@ } }, { - "accuracy": 0.9881719350814819, + "accuracy": 0.9905277490615845, "total_bits": 6006579968, "w1": { "group_size": { @@ -46490,7 +46490,7 @@ } }, { - "accuracy": 0.9897036552429199, + "accuracy": 0.9917393922805786, "total_bits": 6122381312, "w1": { "group_size": { @@ -46542,7 +46542,7 @@ } }, { - "accuracy": 0.9940717220306396, + "accuracy": 0.9952512979507446, "total_bits": 7391748864, "w1": { "group_size": { @@ -46594,7 +46594,7 @@ } }, { - "accuracy": 0.9946280717849731, + "accuracy": 0.9956905841827393, "total_bits": 7507550208, "w1": { "group_size": { @@ -46646,7 +46646,7 @@ } }, { - "accuracy": 0.9966521263122559, + "accuracy": 0.9973235130310059, "total_bits": 8550425344, "w1": { "group_size": { @@ -46689,7 +46689,7 @@ } }, { - "accuracy": 0.9968107342720032, + "accuracy": 0.9974522590637207, "total_bits": 8877312000, "w1": { "group_size": { @@ -46738,7 +46738,7 @@ } }, { - "accuracy": 0.9970104694366455, + "accuracy": 0.9976118803024292, "total_bits": 9674229760, "w1": { "group_size": { @@ -46784,7 +46784,7 @@ } }, { - "accuracy": 0.9983636736869812, + "accuracy": 0.9987093806266785, "total_bits": 11318396928, "w1": { "group_size": { @@ -46826,7 +46826,7 @@ ], "model.layers.24.self_attn": [ { - "accuracy": 0.9721423983573914, + "accuracy": 0.9772748351097107, "total_bits": 89141248, "q_proj": { "group_size": { @@ -46890,7 +46890,7 @@ } }, { - "accuracy": 0.9732393026351929, + "accuracy": 0.9780081510543823, "total_bits": 91697152, "q_proj": { "group_size": { @@ -46954,7 +46954,7 @@ } }, { - "accuracy": 0.9760453104972839, + "accuracy": 0.9804617762565613, "total_bits": 95234560, "q_proj": { "group_size": { @@ -47018,7 +47018,7 @@ } }, { - "accuracy": 0.9798814654350281, + "accuracy": 0.9832745790481567, "total_bits": 111748096, "q_proj": { "group_size": { @@ -47082,7 +47082,7 @@ } }, { - "accuracy": 0.9856245517730713, + "accuracy": 0.9881944060325623, "total_bits": 132388864, "q_proj": { "group_size": { @@ -47146,7 +47146,7 @@ } }, { - "accuracy": 0.985847532749176, + "accuracy": 0.988773763179779, "total_bits": 132455936, "q_proj": { "group_size": { @@ -47210,7 +47210,7 @@ } }, { - "accuracy": 0.9894073605537415, + "accuracy": 0.9906878471374512, "total_bits": 169089024, "q_proj": { "group_size": { @@ -47262,7 +47262,7 @@ } }, { - "accuracy": 0.989900529384613, + "accuracy": 0.9913546442985535, "total_bits": 169221632, "q_proj": { "group_size": { @@ -47314,7 +47314,7 @@ } }, { - "accuracy": 0.9913171529769897, + "accuracy": 0.9926949143409729, "total_bits": 170671104, "q_proj": { "group_size": { @@ -47366,7 +47366,7 @@ } }, { - "accuracy": 0.9916481971740723, + "accuracy": 0.9928181171417236, "total_bits": 173039616, "q_proj": { "group_size": { @@ -47418,7 +47418,7 @@ } }, { - "accuracy": 0.9928502440452576, + "accuracy": 0.9942865371704102, "total_bits": 174398976, "q_proj": { "group_size": { @@ -47482,7 +47482,7 @@ } }, { - "accuracy": 0.9936946630477905, + "accuracy": 0.9949796795845032, "total_bits": 175225856, "q_proj": { "group_size": { @@ -47546,7 +47546,7 @@ } }, { - "accuracy": 0.9941575527191162, + "accuracy": 0.9951766133308411, "total_bits": 178728960, "q_proj": { "group_size": { @@ -47607,7 +47607,7 @@ } }, { - "accuracy": 0.9947301149368286, + "accuracy": 0.995607316493988, "total_bits": 181067776, "q_proj": { "group_size": { @@ -47668,7 +47668,7 @@ } }, { - "accuracy": 0.9960257411003113, + "accuracy": 0.9965070486068726, "total_bits": 219944960, "q_proj": { "group_size": { @@ -47729,7 +47729,7 @@ } }, { - "accuracy": 0.996980607509613, + "accuracy": 0.9972579479217529, "total_bits": 223010816, "q_proj": { "group_size": { @@ -47790,7 +47790,7 @@ } }, { - "accuracy": 0.9965118765830994, + "accuracy": 0.9968120455741882, "total_bits": 252975104, "q_proj": { "group_size": { @@ -47842,7 +47842,7 @@ } }, { - "accuracy": 0.9976251125335693, + "accuracy": 0.9976935982704163, "total_bits": 265314304, "q_proj": { "group_size": { @@ -47894,7 +47894,7 @@ } }, { - "accuracy": 0.9975022673606873, + "accuracy": 0.9975559711456299, "total_bits": 336861184, "q_proj": { "group_size": { @@ -47948,7 +47948,7 @@ ], "model.layers.24.block_sparse_moe": [ { - "accuracy": 0.9498496055603027, + "accuracy": 0.9596848487854004, "total_bits": 3157926400, "w1": { "group_size": { @@ -48000,7 +48000,7 @@ } }, { - "accuracy": 0.9514636993408203, + "accuracy": 0.9610143303871155, "total_bits": 3268026880, "w1": { "group_size": { @@ -48052,7 +48052,7 @@ } }, { - "accuracy": 0.9559648633003235, + "accuracy": 0.964694082736969, "total_bits": 3652411392, "w1": { "group_size": { @@ -48101,7 +48101,7 @@ } }, { - "accuracy": 0.9571110010147095, + "accuracy": 0.9656105041503906, "total_bits": 4098056192, "w1": { "group_size": { @@ -48150,7 +48150,7 @@ } }, { - "accuracy": 0.9757158160209656, + "accuracy": 0.9805795550346375, "total_bits": 4621411072, "w1": { "group_size": { @@ -48202,7 +48202,7 @@ } }, { - "accuracy": 0.9776870608329773, + "accuracy": 0.9821289777755737, "total_bits": 4737212416, "w1": { "group_size": { @@ -48254,7 +48254,7 @@ } }, { - "accuracy": 0.9790007472038269, + "accuracy": 0.9831752181053162, "total_bits": 5093868288, "w1": { "group_size": { @@ -48303,7 +48303,7 @@ } }, { - "accuracy": 0.9875743389129639, + "accuracy": 0.9900650978088379, "total_bits": 5824164608, "w1": { "group_size": { @@ -48346,7 +48346,7 @@ } }, { - "accuracy": 0.9887202382087708, + "accuracy": 0.9909588098526001, "total_bits": 5910044672, "w1": { "group_size": { @@ -48389,7 +48389,7 @@ } }, { - "accuracy": 0.9877371788024902, + "accuracy": 0.9902010560035706, "total_bits": 6006579968, "w1": { "group_size": { @@ -48441,7 +48441,7 @@ } }, { - "accuracy": 0.9893316626548767, + "accuracy": 0.9914580583572388, "total_bits": 6122381312, "w1": { "group_size": { @@ -48493,7 +48493,7 @@ } }, { - "accuracy": 0.993844747543335, + "accuracy": 0.9950816631317139, "total_bits": 7391748864, "w1": { "group_size": { @@ -48545,7 +48545,7 @@ } }, { - "accuracy": 0.9944276809692383, + "accuracy": 0.9955431222915649, "total_bits": 7507550208, "w1": { "group_size": { @@ -48597,7 +48597,7 @@ } }, { - "accuracy": 0.9965183734893799, + "accuracy": 0.9972198009490967, "total_bits": 8550425344, "w1": { "group_size": { @@ -48640,7 +48640,7 @@ } }, { - "accuracy": 0.9966829419136047, + "accuracy": 0.9973546862602234, "total_bits": 8877312000, "w1": { "group_size": { @@ -48689,7 +48689,7 @@ } }, { - "accuracy": 0.9968754053115845, + "accuracy": 0.9975083470344543, "total_bits": 9674229760, "w1": { "group_size": { @@ -48735,7 +48735,7 @@ } }, { - "accuracy": 0.9982824325561523, + "accuracy": 0.9986444711685181, "total_bits": 11318396928, "w1": { "group_size": { @@ -48777,7 +48777,7 @@ ], "model.layers.25.self_attn": [ { - "accuracy": 0.973724365234375, + "accuracy": 0.978515088558197, "total_bits": 89141248, "q_proj": { "group_size": { @@ -48841,7 +48841,7 @@ } }, { - "accuracy": 0.9749693870544434, + "accuracy": 0.9793577194213867, "total_bits": 91697152, "q_proj": { "group_size": { @@ -48905,7 +48905,7 @@ } }, { - "accuracy": 0.9778305292129517, + "accuracy": 0.981115996837616, "total_bits": 95234560, "q_proj": { "group_size": { @@ -48969,7 +48969,7 @@ } }, { - "accuracy": 0.9816632270812988, + "accuracy": 0.9836817979812622, "total_bits": 111748096, "q_proj": { "group_size": { @@ -49033,7 +49033,7 @@ } }, { - "accuracy": 0.9867227077484131, + "accuracy": 0.9886379241943359, "total_bits": 132388864, "q_proj": { "group_size": { @@ -49097,7 +49097,7 @@ } }, { - "accuracy": 0.9873135089874268, + "accuracy": 0.989710807800293, "total_bits": 132455936, "q_proj": { "group_size": { @@ -49161,7 +49161,7 @@ } }, { - "accuracy": 0.9908209443092346, + "accuracy": 0.991203248500824, "total_bits": 169089024, "q_proj": { "group_size": { @@ -49213,7 +49213,7 @@ } }, { - "accuracy": 0.9915295243263245, + "accuracy": 0.9925793409347534, "total_bits": 169221632, "q_proj": { "group_size": { @@ -49265,7 +49265,7 @@ } }, { - "accuracy": 0.992247998714447, + "accuracy": 0.993550717830658, "total_bits": 170671104, "q_proj": { "group_size": { @@ -49317,7 +49317,7 @@ } }, { - "accuracy": 0.9926817417144775, + "accuracy": 0.9936278462409973, "total_bits": 173039616, "q_proj": { "group_size": { @@ -49369,7 +49369,7 @@ } }, { - "accuracy": 0.9934728145599365, + "accuracy": 0.9948954582214355, "total_bits": 174398976, "q_proj": { "group_size": { @@ -49433,7 +49433,7 @@ } }, { - "accuracy": 0.9940133094787598, + "accuracy": 0.9952177405357361, "total_bits": 175225856, "q_proj": { "group_size": { @@ -49497,7 +49497,7 @@ } }, { - "accuracy": 0.9945135712623596, + "accuracy": 0.995570182800293, "total_bits": 178728960, "q_proj": { "group_size": { @@ -49558,7 +49558,7 @@ } }, { - "accuracy": 0.9949716925621033, + "accuracy": 0.9958553910255432, "total_bits": 181067776, "q_proj": { "group_size": { @@ -49619,7 +49619,7 @@ } }, { - "accuracy": 0.9967989325523376, + "accuracy": 0.9972278475761414, "total_bits": 219944960, "q_proj": { "group_size": { @@ -49680,7 +49680,7 @@ } }, { - "accuracy": 0.9971025586128235, + "accuracy": 0.9976123571395874, "total_bits": 223010816, "q_proj": { "group_size": { @@ -49741,7 +49741,7 @@ } }, { - "accuracy": 0.9974004030227661, + "accuracy": 0.9975962042808533, "total_bits": 252975104, "q_proj": { "group_size": { @@ -49793,7 +49793,7 @@ } }, { - "accuracy": 0.9977647066116333, + "accuracy": 0.9980898499488831, "total_bits": 265314304, "q_proj": { "group_size": { @@ -49845,7 +49845,7 @@ } }, { - "accuracy": 0.9985503554344177, + "accuracy": 0.9987714290618896, "total_bits": 336861184, "q_proj": { "group_size": { @@ -49899,7 +49899,7 @@ ], "model.layers.25.block_sparse_moe": [ { - "accuracy": 0.9478374123573303, + "accuracy": 0.9577382802963257, "total_bits": 3157926400, "w1": { "group_size": { @@ -49951,7 +49951,7 @@ } }, { - "accuracy": 0.9495270252227783, + "accuracy": 0.9591259956359863, "total_bits": 3268026880, "w1": { "group_size": { @@ -50003,7 +50003,7 @@ } }, { - "accuracy": 0.9541053771972656, + "accuracy": 0.9629094004631042, "total_bits": 3652411392, "w1": { "group_size": { @@ -50052,7 +50052,7 @@ } }, { - "accuracy": 0.9552929401397705, + "accuracy": 0.9638628959655762, "total_bits": 4098056192, "w1": { "group_size": { @@ -50101,7 +50101,7 @@ } }, { - "accuracy": 0.9747385382652283, + "accuracy": 0.9796192646026611, "total_bits": 4621411072, "w1": { "group_size": { @@ -50153,7 +50153,7 @@ } }, { - "accuracy": 0.9767861366271973, + "accuracy": 0.981246292591095, "total_bits": 4737212416, "w1": { "group_size": { @@ -50205,7 +50205,7 @@ } }, { - "accuracy": 0.9781327247619629, + "accuracy": 0.9823306202888489, "total_bits": 5093868288, "w1": { "group_size": { @@ -50254,7 +50254,7 @@ } }, { - "accuracy": 0.9870446920394897, + "accuracy": 0.9895449280738831, "total_bits": 5824164608, "w1": { "group_size": { @@ -50297,7 +50297,7 @@ } }, { - "accuracy": 0.988254964351654, + "accuracy": 0.9905019402503967, "total_bits": 5910044672, "w1": { "group_size": { @@ -50340,7 +50340,7 @@ } }, { - "accuracy": 0.987244725227356, + "accuracy": 0.9897153377532959, "total_bits": 6006579968, "w1": { "group_size": { @@ -50392,7 +50392,7 @@ } }, { - "accuracy": 0.9888932704925537, + "accuracy": 0.991036593914032, "total_bits": 6122381312, "w1": { "group_size": { @@ -50444,7 +50444,7 @@ } }, { - "accuracy": 0.9936016798019409, + "accuracy": 0.9948354363441467, "total_bits": 7391748864, "w1": { "group_size": { @@ -50496,7 +50496,7 @@ } }, { - "accuracy": 0.994202733039856, + "accuracy": 0.9953200221061707, "total_bits": 7507550208, "w1": { "group_size": { @@ -50548,7 +50548,7 @@ } }, { - "accuracy": 0.9963886141777039, + "accuracy": 0.9970769286155701, "total_bits": 8550425344, "w1": { "group_size": { @@ -50591,7 +50591,7 @@ } }, { - "accuracy": 0.9965652227401733, + "accuracy": 0.9972229599952698, "total_bits": 8877312000, "w1": { "group_size": { @@ -50640,7 +50640,7 @@ } }, { - "accuracy": 0.9967638850212097, + "accuracy": 0.9973825216293335, "total_bits": 9674229760, "w1": { "group_size": { @@ -50686,7 +50686,7 @@ } }, { - "accuracy": 0.9982494711875916, + "accuracy": 0.9985783100128174, "total_bits": 11318396928, "w1": { "group_size": { @@ -50728,7 +50728,7 @@ ], "model.layers.26.self_attn": [ { - "accuracy": 0.9722825288772583, + "accuracy": 0.9767425060272217, "total_bits": 89141248, "q_proj": { "group_size": { @@ -50792,7 +50792,7 @@ } }, { - "accuracy": 0.9739075899124146, + "accuracy": 0.9776657819747925, "total_bits": 91697152, "q_proj": { "group_size": { @@ -50856,7 +50856,7 @@ } }, { - "accuracy": 0.976570188999176, + "accuracy": 0.979796826839447, "total_bits": 95234560, "q_proj": { "group_size": { @@ -50920,7 +50920,7 @@ } }, { - "accuracy": 0.9809679985046387, + "accuracy": 0.9829944372177124, "total_bits": 111748096, "q_proj": { "group_size": { @@ -50984,7 +50984,7 @@ } }, { - "accuracy": 0.9860027432441711, + "accuracy": 0.9882611036300659, "total_bits": 132388864, "q_proj": { "group_size": { @@ -51048,7 +51048,7 @@ } }, { - "accuracy": 0.9863536953926086, + "accuracy": 0.9888821244239807, "total_bits": 132455936, "q_proj": { "group_size": { @@ -51112,7 +51112,7 @@ } }, { - "accuracy": 0.990260660648346, + "accuracy": 0.990951418876648, "total_bits": 169089024, "q_proj": { "group_size": { @@ -51164,7 +51164,7 @@ } }, { - "accuracy": 0.9906091094017029, + "accuracy": 0.9917359352111816, "total_bits": 169221632, "q_proj": { "group_size": { @@ -51216,7 +51216,7 @@ } }, { - "accuracy": 0.9918403625488281, + "accuracy": 0.9928454756736755, "total_bits": 170671104, "q_proj": { "group_size": { @@ -51268,7 +51268,7 @@ } }, { - "accuracy": 0.9921053051948547, + "accuracy": 0.9932335019111633, "total_bits": 173039616, "q_proj": { "group_size": { @@ -51320,7 +51320,7 @@ } }, { - "accuracy": 0.9930142164230347, + "accuracy": 0.9941086769104004, "total_bits": 174398976, "q_proj": { "group_size": { @@ -51384,7 +51384,7 @@ } }, { - "accuracy": 0.9935306906700134, + "accuracy": 0.9947384595870972, "total_bits": 175225856, "q_proj": { "group_size": { @@ -51448,7 +51448,7 @@ } }, { - "accuracy": 0.9938754439353943, + "accuracy": 0.9947759509086609, "total_bits": 178728960, "q_proj": { "group_size": { @@ -51509,7 +51509,7 @@ } }, { - "accuracy": 0.9945443272590637, + "accuracy": 0.9952991604804993, "total_bits": 181067776, "q_proj": { "group_size": { @@ -51570,7 +51570,7 @@ } }, { - "accuracy": 0.9962931871414185, + "accuracy": 0.9965145587921143, "total_bits": 219944960, "q_proj": { "group_size": { @@ -51631,7 +51631,7 @@ } }, { - "accuracy": 0.9967128038406372, + "accuracy": 0.9970770478248596, "total_bits": 223010816, "q_proj": { "group_size": { @@ -51692,7 +51692,7 @@ } }, { - "accuracy": 0.9969232678413391, + "accuracy": 0.9968740344047546, "total_bits": 252975104, "q_proj": { "group_size": { @@ -51744,7 +51744,7 @@ } }, { - "accuracy": 0.9973604679107666, + "accuracy": 0.9975170493125916, "total_bits": 265314304, "q_proj": { "group_size": { @@ -51796,7 +51796,7 @@ } }, { - "accuracy": 0.9982743859291077, + "accuracy": 0.9977735877037048, "total_bits": 336861184, "q_proj": { "group_size": { @@ -51850,7 +51850,7 @@ ], "model.layers.26.block_sparse_moe": [ { - "accuracy": 0.9458172917366028, + "accuracy": 0.9559926390647888, "total_bits": 3157926400, "w1": { "group_size": { @@ -51902,7 +51902,7 @@ } }, { - "accuracy": 0.9475877285003662, + "accuracy": 0.9574823379516602, "total_bits": 3268026880, "w1": { "group_size": { @@ -51954,7 +51954,7 @@ } }, { - "accuracy": 0.9522371888160706, + "accuracy": 0.9613052010536194, "total_bits": 3652411392, "w1": { "group_size": { @@ -52003,7 +52003,7 @@ } }, { - "accuracy": 0.9534463286399841, + "accuracy": 0.9622758626937866, "total_bits": 4098056192, "w1": { "group_size": { @@ -52052,7 +52052,7 @@ } }, { - "accuracy": 0.9737756848335266, + "accuracy": 0.9787663221359253, "total_bits": 4621411072, "w1": { "group_size": { @@ -52104,7 +52104,7 @@ } }, { - "accuracy": 0.9758790135383606, + "accuracy": 0.9804754853248596, "total_bits": 4737212416, "w1": { "group_size": { @@ -52156,7 +52156,7 @@ } }, { - "accuracy": 0.9772332310676575, + "accuracy": 0.9815607666969299, "total_bits": 5093868288, "w1": { "group_size": { @@ -52205,7 +52205,7 @@ } }, { - "accuracy": 0.9865396022796631, + "accuracy": 0.9891143441200256, "total_bits": 5824164608, "w1": { "group_size": { @@ -52248,7 +52248,7 @@ } }, { - "accuracy": 0.9877844452857971, + "accuracy": 0.990105152130127, "total_bits": 5910044672, "w1": { "group_size": { @@ -52291,7 +52291,7 @@ } }, { - "accuracy": 0.9867697358131409, + "accuracy": 0.989286482334137, "total_bits": 6006579968, "w1": { "group_size": { @@ -52343,7 +52343,7 @@ } }, { - "accuracy": 0.9884620904922485, + "accuracy": 0.9906608462333679, "total_bits": 6122381312, "w1": { "group_size": { @@ -52395,7 +52395,7 @@ } }, { - "accuracy": 0.9933658838272095, + "accuracy": 0.9946260452270508, "total_bits": 7391748864, "w1": { "group_size": { @@ -52447,7 +52447,7 @@ } }, { - "accuracy": 0.9939833283424377, + "accuracy": 0.9951276183128357, "total_bits": 7507550208, "w1": { "group_size": { @@ -52499,7 +52499,7 @@ } }, { - "accuracy": 0.996260941028595, + "accuracy": 0.9969669580459595, "total_bits": 8550425344, "w1": { "group_size": { @@ -52542,7 +52542,7 @@ } }, { - "accuracy": 0.9964504837989807, + "accuracy": 0.9971233010292053, "total_bits": 8877312000, "w1": { "group_size": { @@ -52591,7 +52591,7 @@ } }, { - "accuracy": 0.9966521263122559, + "accuracy": 0.9972842335700989, "total_bits": 9674229760, "w1": { "group_size": { @@ -52637,7 +52637,7 @@ } }, { - "accuracy": 0.9982143044471741, + "accuracy": 0.9985492825508118, "total_bits": 11318396928, "w1": { "group_size": { @@ -52679,7 +52679,7 @@ ], "model.layers.27.self_attn": [ { - "accuracy": 0.9693523049354553, + "accuracy": 0.9748417139053345, "total_bits": 89141248, "q_proj": { "group_size": { @@ -52743,7 +52743,7 @@ } }, { - "accuracy": 0.9717257022857666, + "accuracy": 0.9759301543235779, "total_bits": 91697152, "q_proj": { "group_size": { @@ -52807,7 +52807,7 @@ } }, { - "accuracy": 0.9749314785003662, + "accuracy": 0.9792391061782837, "total_bits": 95234560, "q_proj": { "group_size": { @@ -52871,7 +52871,7 @@ } }, { - "accuracy": 0.9790597558021545, + "accuracy": 0.9822331666946411, "total_bits": 111748096, "q_proj": { "group_size": { @@ -52935,7 +52935,7 @@ } }, { - "accuracy": 0.9854823350906372, + "accuracy": 0.9876313209533691, "total_bits": 132388864, "q_proj": { "group_size": { @@ -52999,7 +52999,7 @@ } }, { - "accuracy": 0.9862427711486816, + "accuracy": 0.9883215427398682, "total_bits": 132455936, "q_proj": { "group_size": { @@ -53063,7 +53063,7 @@ } }, { - "accuracy": 0.9896723628044128, + "accuracy": 0.9905436635017395, "total_bits": 169089024, "q_proj": { "group_size": { @@ -53115,7 +53115,7 @@ } }, { - "accuracy": 0.9907065033912659, + "accuracy": 0.9914318919181824, "total_bits": 169221632, "q_proj": { "group_size": { @@ -53167,7 +53167,7 @@ } }, { - "accuracy": 0.9912441372871399, + "accuracy": 0.9913925528526306, "total_bits": 170671104, "q_proj": { "group_size": { @@ -53219,7 +53219,7 @@ } }, { - "accuracy": 0.9916640520095825, + "accuracy": 0.9916152358055115, "total_bits": 173039616, "q_proj": { "group_size": { @@ -53271,7 +53271,7 @@ } }, { - "accuracy": 0.9929704666137695, + "accuracy": 0.9940580129623413, "total_bits": 174398976, "q_proj": { "group_size": { @@ -53335,7 +53335,7 @@ } }, { - "accuracy": 0.993415355682373, + "accuracy": 0.9947108626365662, "total_bits": 175225856, "q_proj": { "group_size": { @@ -53399,7 +53399,7 @@ } }, { - "accuracy": 0.9937718510627747, + "accuracy": 0.9946547150611877, "total_bits": 178728960, "q_proj": { "group_size": { @@ -53460,7 +53460,7 @@ } }, { - "accuracy": 0.9942445755004883, + "accuracy": 0.9952250123023987, "total_bits": 181067776, "q_proj": { "group_size": { @@ -53521,7 +53521,7 @@ } }, { - "accuracy": 0.9965182542800903, + "accuracy": 0.9969433546066284, "total_bits": 219944960, "q_proj": { "group_size": { @@ -53582,7 +53582,7 @@ } }, { - "accuracy": 0.9968522787094116, + "accuracy": 0.997398853302002, "total_bits": 223010816, "q_proj": { "group_size": { @@ -53643,7 +53643,7 @@ } }, { - "accuracy": 0.9972480535507202, + "accuracy": 0.9974234104156494, "total_bits": 252975104, "q_proj": { "group_size": { @@ -53695,7 +53695,7 @@ } }, { - "accuracy": 0.9976238012313843, + "accuracy": 0.9980852007865906, "total_bits": 265314304, "q_proj": { "group_size": { @@ -53747,7 +53747,7 @@ } }, { - "accuracy": 0.9984658360481262, + "accuracy": 0.9988439083099365, "total_bits": 336861184, "q_proj": { "group_size": { @@ -53801,7 +53801,7 @@ ], "model.layers.27.block_sparse_moe": [ { - "accuracy": 0.9418197870254517, + "accuracy": 0.9525129795074463, "total_bits": 3157926400, "w1": { "group_size": { @@ -53853,7 +53853,7 @@ } }, { - "accuracy": 0.9437699913978577, + "accuracy": 0.9541512727737427, "total_bits": 3268026880, "w1": { "group_size": { @@ -53905,7 +53905,7 @@ } }, { - "accuracy": 0.948632538318634, + "accuracy": 0.9581472873687744, "total_bits": 3652411392, "w1": { "group_size": { @@ -53954,7 +53954,7 @@ } }, { - "accuracy": 0.9499104022979736, + "accuracy": 0.9591740965843201, "total_bits": 4098056192, "w1": { "group_size": { @@ -54003,7 +54003,7 @@ } }, { - "accuracy": 0.9717760682106018, + "accuracy": 0.9770270586013794, "total_bits": 4621411072, "w1": { "group_size": { @@ -54055,7 +54055,7 @@ } }, { - "accuracy": 0.9740762710571289, + "accuracy": 0.9789137840270996, "total_bits": 4737212416, "w1": { "group_size": { @@ -54107,7 +54107,7 @@ } }, { - "accuracy": 0.975480854511261, + "accuracy": 0.9800542593002319, "total_bits": 5093868288, "w1": { "group_size": { @@ -54156,7 +54156,7 @@ } }, { - "accuracy": 0.9854487180709839, + "accuracy": 0.988162100315094, "total_bits": 5824164608, "w1": { "group_size": { @@ -54199,7 +54199,7 @@ } }, { - "accuracy": 0.9868009686470032, + "accuracy": 0.9892444014549255, "total_bits": 5910044672, "w1": { "group_size": { @@ -54242,7 +54242,7 @@ } }, { - "accuracy": 0.9857349991798401, + "accuracy": 0.9883984327316284, "total_bits": 6006579968, "w1": { "group_size": { @@ -54294,7 +54294,7 @@ } }, { - "accuracy": 0.987581193447113, + "accuracy": 0.9898993968963623, "total_bits": 6122381312, "w1": { "group_size": { @@ -54346,7 +54346,7 @@ } }, { - "accuracy": 0.9928435683250427, + "accuracy": 0.9941725730895996, "total_bits": 7391748864, "w1": { "group_size": { @@ -54398,7 +54398,7 @@ } }, { - "accuracy": 0.9934998154640198, + "accuracy": 0.9947126507759094, "total_bits": 7507550208, "w1": { "group_size": { @@ -54450,7 +54450,7 @@ } }, { - "accuracy": 0.9959344267845154, + "accuracy": 0.9966796040534973, "total_bits": 8550425344, "w1": { "group_size": { @@ -54493,7 +54493,7 @@ } }, { - "accuracy": 0.9961563348770142, + "accuracy": 0.9968685507774353, "total_bits": 8877312000, "w1": { "group_size": { @@ -54542,7 +54542,7 @@ } }, { - "accuracy": 0.9963647127151489, + "accuracy": 0.997036874294281, "total_bits": 9674229760, "w1": { "group_size": { @@ -54588,7 +54588,7 @@ } }, { - "accuracy": 0.9980246424674988, + "accuracy": 0.9983872771263123, "total_bits": 11318396928, "w1": { "group_size": { @@ -54630,7 +54630,7 @@ ], "model.layers.28.self_attn": [ { - "accuracy": 0.9627799987792969, + "accuracy": 0.9678651094436646, "total_bits": 89141248, "q_proj": { "group_size": { @@ -54694,7 +54694,7 @@ } }, { - "accuracy": 0.9655519127845764, + "accuracy": 0.9693048596382141, "total_bits": 91697152, "q_proj": { "group_size": { @@ -54758,7 +54758,7 @@ } }, { - "accuracy": 0.9703478813171387, + "accuracy": 0.9737746715545654, "total_bits": 95234560, "q_proj": { "group_size": { @@ -54822,7 +54822,7 @@ } }, { - "accuracy": 0.9759931564331055, + "accuracy": 0.9777098298072815, "total_bits": 111748096, "q_proj": { "group_size": { @@ -54886,7 +54886,7 @@ } }, { - "accuracy": 0.9824903011322021, + "accuracy": 0.9843841195106506, "total_bits": 132388864, "q_proj": { "group_size": { @@ -54950,7 +54950,7 @@ } }, { - "accuracy": 0.9830102920532227, + "accuracy": 0.984433114528656, "total_bits": 132455936, "q_proj": { "group_size": { @@ -55014,7 +55014,7 @@ } }, { - "accuracy": 0.987653911113739, + "accuracy": 0.9878154993057251, "total_bits": 169089024, "q_proj": { "group_size": { @@ -55066,7 +55066,7 @@ } }, { - "accuracy": 0.9883350133895874, + "accuracy": 0.9878740906715393, "total_bits": 169221632, "q_proj": { "group_size": { @@ -55118,7 +55118,7 @@ } }, { - "accuracy": 0.9897574186325073, + "accuracy": 0.990941047668457, "total_bits": 170671104, "q_proj": { "group_size": { @@ -55170,7 +55170,7 @@ } }, { - "accuracy": 0.9903573393821716, + "accuracy": 0.9911961555480957, "total_bits": 173039616, "q_proj": { "group_size": { @@ -55222,7 +55222,7 @@ } }, { - "accuracy": 0.9909435510635376, + "accuracy": 0.9921673536300659, "total_bits": 174398976, "q_proj": { "group_size": { @@ -55286,7 +55286,7 @@ } }, { - "accuracy": 0.991680383682251, + "accuracy": 0.9926769733428955, "total_bits": 175225856, "q_proj": { "group_size": { @@ -55350,7 +55350,7 @@ } }, { - "accuracy": 0.9922028183937073, + "accuracy": 0.9929895997047424, "total_bits": 178728960, "q_proj": { "group_size": { @@ -55411,7 +55411,7 @@ } }, { - "accuracy": 0.9928052425384521, + "accuracy": 0.9937294125556946, "total_bits": 181067776, "q_proj": { "group_size": { @@ -55472,7 +55472,7 @@ } }, { - "accuracy": 0.9952661991119385, + "accuracy": 0.9954001903533936, "total_bits": 219944960, "q_proj": { "group_size": { @@ -55533,7 +55533,7 @@ } }, { - "accuracy": 0.9954966902732849, + "accuracy": 0.9958739280700684, "total_bits": 223010816, "q_proj": { "group_size": { @@ -55594,7 +55594,7 @@ } }, { - "accuracy": 0.9959366321563721, + "accuracy": 0.995829164981842, "total_bits": 252975104, "q_proj": { "group_size": { @@ -55646,7 +55646,7 @@ } }, { - "accuracy": 0.9961747527122498, + "accuracy": 0.996258020401001, "total_bits": 265314304, "q_proj": { "group_size": { @@ -55698,7 +55698,7 @@ } }, { - "accuracy": 0.9971712231636047, + "accuracy": 0.9965918064117432, "total_bits": 336861184, "q_proj": { "group_size": { @@ -55752,7 +55752,7 @@ ], "model.layers.28.block_sparse_moe": [ { - "accuracy": 0.9358318448066711, + "accuracy": 0.9466860294342041, "total_bits": 3157926400, "w1": { "group_size": { @@ -55804,7 +55804,7 @@ } }, { - "accuracy": 0.9380796551704407, + "accuracy": 0.9486239552497864, "total_bits": 3268026880, "w1": { "group_size": { @@ -55856,7 +55856,7 @@ } }, { - "accuracy": 0.9433961510658264, + "accuracy": 0.9530645608901978, "total_bits": 3652411392, "w1": { "group_size": { @@ -55905,7 +55905,7 @@ } }, { - "accuracy": 0.9448249340057373, + "accuracy": 0.9542340636253357, "total_bits": 4098056192, "w1": { "group_size": { @@ -55954,7 +55954,7 @@ } }, { - "accuracy": 0.968885600566864, + "accuracy": 0.9742810726165771, "total_bits": 4621411072, "w1": { "group_size": { @@ -56006,7 +56006,7 @@ } }, { - "accuracy": 0.9714505672454834, + "accuracy": 0.9763697981834412, "total_bits": 4737212416, "w1": { "group_size": { @@ -56058,7 +56058,7 @@ } }, { - "accuracy": 0.9730249643325806, + "accuracy": 0.9776629209518433, "total_bits": 5093868288, "w1": { "group_size": { @@ -56107,7 +56107,7 @@ } }, { - "accuracy": 0.9838186502456665, + "accuracy": 0.986602246761322, "total_bits": 5824164608, "w1": { "group_size": { @@ -56150,7 +56150,7 @@ } }, { - "accuracy": 0.9853168725967407, + "accuracy": 0.9878194332122803, "total_bits": 5910044672, "w1": { "group_size": { @@ -56193,7 +56193,7 @@ } }, { - "accuracy": 0.984261155128479, + "accuracy": 0.9869940280914307, "total_bits": 6006579968, "w1": { "group_size": { @@ -56245,7 +56245,7 @@ } }, { - "accuracy": 0.9862863421440125, + "accuracy": 0.9886564612388611, "total_bits": 6122381312, "w1": { "group_size": { @@ -56297,7 +56297,7 @@ } }, { - "accuracy": 0.992078423500061, + "accuracy": 0.993452787399292, "total_bits": 7391748864, "w1": { "group_size": { @@ -56349,7 +56349,7 @@ } }, { - "accuracy": 0.9927451014518738, + "accuracy": 0.9940016865730286, "total_bits": 7507550208, "w1": { "group_size": { @@ -56401,7 +56401,7 @@ } }, { - "accuracy": 0.9954136610031128, + "accuracy": 0.9962024092674255, "total_bits": 8550425344, "w1": { "group_size": { @@ -56444,7 +56444,7 @@ } }, { - "accuracy": 0.995699942111969, + "accuracy": 0.9964504241943359, "total_bits": 8877312000, "w1": { "group_size": { @@ -56493,7 +56493,7 @@ } }, { - "accuracy": 0.9959337115287781, + "accuracy": 0.9966419339179993, "total_bits": 9674229760, "w1": { "group_size": { @@ -56539,7 +56539,7 @@ } }, { - "accuracy": 0.9976804852485657, + "accuracy": 0.9980868697166443, "total_bits": 11318396928, "w1": { "group_size": { @@ -56581,7 +56581,7 @@ ], "model.layers.29.self_attn": [ { - "accuracy": 0.9523782134056091, + "accuracy": 0.9589762687683105, "total_bits": 89141248, "q_proj": { "group_size": { @@ -56645,7 +56645,7 @@ } }, { - "accuracy": 0.9540762901306152, + "accuracy": 0.960888683795929, "total_bits": 91697152, "q_proj": { "group_size": { @@ -56709,7 +56709,7 @@ } }, { - "accuracy": 0.9607983827590942, + "accuracy": 0.9668040871620178, "total_bits": 95234560, "q_proj": { "group_size": { @@ -56773,7 +56773,7 @@ } }, { - "accuracy": 0.9685555696487427, + "accuracy": 0.9724057912826538, "total_bits": 111748096, "q_proj": { "group_size": { @@ -56837,7 +56837,7 @@ } }, { - "accuracy": 0.9765552282333374, + "accuracy": 0.9786757230758667, "total_bits": 132388864, "q_proj": { "group_size": { @@ -56901,7 +56901,7 @@ } }, { - "accuracy": 0.9769577980041504, + "accuracy": 0.9795317053794861, "total_bits": 132455936, "q_proj": { "group_size": { @@ -56965,7 +56965,7 @@ } }, { - "accuracy": 0.9842032790184021, + "accuracy": 0.983984112739563, "total_bits": 169089024, "q_proj": { "group_size": { @@ -57017,7 +57017,7 @@ } }, { - "accuracy": 0.9846928119659424, + "accuracy": 0.9846932291984558, "total_bits": 169221632, "q_proj": { "group_size": { @@ -57069,7 +57069,7 @@ } }, { - "accuracy": 0.985408365726471, + "accuracy": 0.9869046211242676, "total_bits": 170671104, "q_proj": { "group_size": { @@ -57121,7 +57121,7 @@ } }, { - "accuracy": 0.9865745902061462, + "accuracy": 0.987400233745575, "total_bits": 173039616, "q_proj": { "group_size": { @@ -57173,7 +57173,7 @@ } }, { - "accuracy": 0.9884350299835205, + "accuracy": 0.989698052406311, "total_bits": 174398976, "q_proj": { "group_size": { @@ -57237,7 +57237,7 @@ } }, { - "accuracy": 0.9892579913139343, + "accuracy": 0.9902897477149963, "total_bits": 175225856, "q_proj": { "group_size": { @@ -57301,7 +57301,7 @@ } }, { - "accuracy": 0.9902172684669495, + "accuracy": 0.9907212257385254, "total_bits": 178728960, "q_proj": { "group_size": { @@ -57362,7 +57362,7 @@ } }, { - "accuracy": 0.9909592866897583, + "accuracy": 0.9917033314704895, "total_bits": 181067776, "q_proj": { "group_size": { @@ -57423,7 +57423,7 @@ } }, { - "accuracy": 0.9943437576293945, + "accuracy": 0.9950379729270935, "total_bits": 219944960, "q_proj": { "group_size": { @@ -57484,7 +57484,7 @@ } }, { - "accuracy": 0.9948164820671082, + "accuracy": 0.995669960975647, "total_bits": 223010816, "q_proj": { "group_size": { @@ -57545,7 +57545,7 @@ } }, { - "accuracy": 0.9955139756202698, + "accuracy": 0.9958292841911316, "total_bits": 252975104, "q_proj": { "group_size": { @@ -57597,7 +57597,7 @@ } }, { - "accuracy": 0.9961320757865906, + "accuracy": 0.9966702461242676, "total_bits": 265314304, "q_proj": { "group_size": { @@ -57649,7 +57649,7 @@ } }, { - "accuracy": 0.9978858828544617, + "accuracy": 0.9979846477508545, "total_bits": 336861184, "q_proj": { "group_size": { @@ -57703,7 +57703,7 @@ ], "model.layers.29.block_sparse_moe": [ { - "accuracy": 0.9346504211425781, + "accuracy": 0.9436861276626587, "total_bits": 3157926400, "w1": { "group_size": { @@ -57755,7 +57755,7 @@ } }, { - "accuracy": 0.9371471405029297, + "accuracy": 0.9459205269813538, "total_bits": 3268026880, "w1": { "group_size": { @@ -57807,7 +57807,7 @@ } }, { - "accuracy": 0.9426423907279968, + "accuracy": 0.950649619102478, "total_bits": 3652411392, "w1": { "group_size": { @@ -57856,7 +57856,7 @@ } }, { - "accuracy": 0.9441152811050415, + "accuracy": 0.9519066214561462, "total_bits": 4098056192, "w1": { "group_size": { @@ -57905,7 +57905,7 @@ } }, { - "accuracy": 0.9683999419212341, + "accuracy": 0.9728496670722961, "total_bits": 4621411072, "w1": { "group_size": { @@ -57957,7 +57957,7 @@ } }, { - "accuracy": 0.9710190892219543, + "accuracy": 0.9750910401344299, "total_bits": 4737212416, "w1": { "group_size": { @@ -58009,7 +58009,7 @@ } }, { - "accuracy": 0.9727044105529785, + "accuracy": 0.976554811000824, "total_bits": 5093868288, "w1": { "group_size": { @@ -58058,7 +58058,7 @@ } }, { - "accuracy": 0.9834861755371094, + "accuracy": 0.9857460260391235, "total_bits": 5824164608, "w1": { "group_size": { @@ -58101,7 +58101,7 @@ } }, { - "accuracy": 0.984974205493927, + "accuracy": 0.9870029091835022, "total_bits": 5910044672, "w1": { "group_size": { @@ -58144,7 +58144,7 @@ } }, { - "accuracy": 0.9840195178985596, + "accuracy": 0.9862585663795471, "total_bits": 6006579968, "w1": { "group_size": { @@ -58196,7 +58196,7 @@ } }, { - "accuracy": 0.9860685467720032, + "accuracy": 0.9879990816116333, "total_bits": 6122381312, "w1": { "group_size": { @@ -58248,7 +58248,7 @@ } }, { - "accuracy": 0.9919497966766357, + "accuracy": 0.9930547475814819, "total_bits": 7391748864, "w1": { "group_size": { @@ -58300,7 +58300,7 @@ } }, { - "accuracy": 0.9926316738128662, + "accuracy": 0.9936461448669434, "total_bits": 7507550208, "w1": { "group_size": { @@ -58352,7 +58352,7 @@ } }, { - "accuracy": 0.9953070878982544, + "accuracy": 0.9958972334861755, "total_bits": 8550425344, "w1": { "group_size": { @@ -58395,7 +58395,7 @@ } }, { - "accuracy": 0.9956223368644714, + "accuracy": 0.996192991733551, "total_bits": 8877312000, "w1": { "group_size": { @@ -58444,7 +58444,7 @@ } }, { - "accuracy": 0.9958757162094116, + "accuracy": 0.9964084029197693, "total_bits": 9674229760, "w1": { "group_size": { @@ -58490,7 +58490,7 @@ } }, { - "accuracy": 0.997626006603241, + "accuracy": 0.9978636503219604, "total_bits": 11318396928, "w1": { "group_size": { @@ -58532,7 +58532,7 @@ ], "model.layers.30.self_attn": [ { - "accuracy": 0.9556474685668945, + "accuracy": 0.9627819061279297, "total_bits": 89141248, "q_proj": { "group_size": { @@ -58596,7 +58596,7 @@ } }, { - "accuracy": 0.9580041766166687, + "accuracy": 0.9643409848213196, "total_bits": 91697152, "q_proj": { "group_size": { @@ -58660,7 +58660,7 @@ } }, { - "accuracy": 0.9644206762313843, + "accuracy": 0.9680662155151367, "total_bits": 95234560, "q_proj": { "group_size": { @@ -58724,7 +58724,7 @@ } }, { - "accuracy": 0.9726014137268066, + "accuracy": 0.9745588302612305, "total_bits": 111748096, "q_proj": { "group_size": { @@ -58788,7 +58788,7 @@ } }, { - "accuracy": 0.9783593416213989, + "accuracy": 0.9792680144309998, "total_bits": 132388864, "q_proj": { "group_size": { @@ -58852,7 +58852,7 @@ } }, { - "accuracy": 0.9788323640823364, + "accuracy": 0.9822074174880981, "total_bits": 132455936, "q_proj": { "group_size": { @@ -58916,7 +58916,7 @@ } }, { - "accuracy": 0.9856012463569641, + "accuracy": 0.9843823909759521, "total_bits": 169089024, "q_proj": { "group_size": { @@ -58968,7 +58968,7 @@ } }, { - "accuracy": 0.9860390424728394, + "accuracy": 0.9877564311027527, "total_bits": 169221632, "q_proj": { "group_size": { @@ -59020,7 +59020,7 @@ } }, { - "accuracy": 0.9875908493995667, + "accuracy": 0.9889821410179138, "total_bits": 170671104, "q_proj": { "group_size": { @@ -59072,7 +59072,7 @@ } }, { - "accuracy": 0.9878588318824768, + "accuracy": 0.9889690279960632, "total_bits": 173039616, "q_proj": { "group_size": { @@ -59124,7 +59124,7 @@ } }, { - "accuracy": 0.9893385767936707, + "accuracy": 0.990949809551239, "total_bits": 174398976, "q_proj": { "group_size": { @@ -59188,7 +59188,7 @@ } }, { - "accuracy": 0.9900846481323242, + "accuracy": 0.991570234298706, "total_bits": 175225856, "q_proj": { "group_size": { @@ -59252,7 +59252,7 @@ } }, { - "accuracy": 0.9910157322883606, + "accuracy": 0.9922645092010498, "total_bits": 178728960, "q_proj": { "group_size": { @@ -59313,7 +59313,7 @@ } }, { - "accuracy": 0.9914267659187317, + "accuracy": 0.9923339486122131, "total_bits": 181067776, "q_proj": { "group_size": { @@ -59374,7 +59374,7 @@ } }, { - "accuracy": 0.9947958588600159, + "accuracy": 0.9953801035881042, "total_bits": 219944960, "q_proj": { "group_size": { @@ -59435,7 +59435,7 @@ } }, { - "accuracy": 0.9948793649673462, + "accuracy": 0.995000958442688, "total_bits": 223010816, "q_proj": { "group_size": { @@ -59496,7 +59496,7 @@ } }, { - "accuracy": 0.995858371257782, + "accuracy": 0.99615877866745, "total_bits": 252975104, "q_proj": { "group_size": { @@ -59548,7 +59548,7 @@ } }, { - "accuracy": 0.99592125415802, + "accuracy": 0.9957394599914551, "total_bits": 265314304, "q_proj": { "group_size": { @@ -59600,7 +59600,7 @@ } }, { - "accuracy": 0.9977831244468689, + "accuracy": 0.9980906248092651, "total_bits": 336861184, "q_proj": { "group_size": { @@ -59654,7 +59654,7 @@ ], "model.layers.30.block_sparse_moe": [ { - "accuracy": 0.9333481788635254, + "accuracy": 0.9386821985244751, "total_bits": 3157926400, "w1": { "group_size": { @@ -59706,7 +59706,7 @@ } }, { - "accuracy": 0.9358075261116028, + "accuracy": 0.9409767985343933, "total_bits": 3268026880, "w1": { "group_size": { @@ -59758,7 +59758,7 @@ } }, { - "accuracy": 0.9411581158638, + "accuracy": 0.945797324180603, "total_bits": 3652411392, "w1": { "group_size": { @@ -59807,7 +59807,7 @@ } }, { - "accuracy": 0.9426649808883667, + "accuracy": 0.9471363425254822, "total_bits": 4098056192, "w1": { "group_size": { @@ -59856,7 +59856,7 @@ } }, { - "accuracy": 0.9675707221031189, + "accuracy": 0.9706138372421265, "total_bits": 4621411072, "w1": { "group_size": { @@ -59908,7 +59908,7 @@ } }, { - "accuracy": 0.9702476859092712, + "accuracy": 0.9730033874511719, "total_bits": 4737212416, "w1": { "group_size": { @@ -59960,7 +59960,7 @@ } }, { - "accuracy": 0.9719544053077698, + "accuracy": 0.9745664596557617, "total_bits": 5093868288, "w1": { "group_size": { @@ -60009,7 +60009,7 @@ } }, { - "accuracy": 0.9828491806983948, + "accuracy": 0.9841045141220093, "total_bits": 5824164608, "w1": { "group_size": { @@ -60052,7 +60052,7 @@ } }, { - "accuracy": 0.9843778014183044, + "accuracy": 0.9854028224945068, "total_bits": 5910044672, "w1": { "group_size": { @@ -60095,7 +60095,7 @@ } }, { - "accuracy": 0.9835219383239746, + "accuracy": 0.9850661754608154, "total_bits": 6006579968, "w1": { "group_size": { @@ -60147,7 +60147,7 @@ } }, { - "accuracy": 0.9856252670288086, + "accuracy": 0.9867846369743347, "total_bits": 6122381312, "w1": { "group_size": { @@ -60199,7 +60199,7 @@ } }, { - "accuracy": 0.9916804432868958, + "accuracy": 0.9924027919769287, "total_bits": 7391748864, "w1": { "group_size": { @@ -60251,7 +60251,7 @@ } }, { - "accuracy": 0.9922588467597961, + "accuracy": 0.9925949573516846, "total_bits": 7507550208, "w1": { "group_size": { @@ -60303,7 +60303,7 @@ } }, { - "accuracy": 0.9950485229492188, + "accuracy": 0.9953537583351135, "total_bits": 8550425344, "w1": { "group_size": { @@ -60346,7 +60346,7 @@ } }, { - "accuracy": 0.9954189658164978, + "accuracy": 0.9956691861152649, "total_bits": 8877312000, "w1": { "group_size": { @@ -60395,7 +60395,7 @@ } }, { - "accuracy": 0.9956814050674438, + "accuracy": 0.9959005117416382, "total_bits": 9674229760, "w1": { "group_size": { @@ -60441,7 +60441,7 @@ } }, { - "accuracy": 0.9973710179328918, + "accuracy": 0.9973199963569641, "total_bits": 11318396928, "w1": { "group_size": { @@ -60483,7 +60483,7 @@ ], "model.layers.31.self_attn": [ { - "accuracy": 0.9539216756820679, + "accuracy": 0.9622859954833984, "total_bits": 89141248, "q_proj": { "group_size": { @@ -60547,7 +60547,7 @@ } }, { - "accuracy": 0.9550794363021851, + "accuracy": 0.9646103382110596, "total_bits": 91697152, "q_proj": { "group_size": { @@ -60611,7 +60611,7 @@ } }, { - "accuracy": 0.9615412354469299, + "accuracy": 0.9705355763435364, "total_bits": 95234560, "q_proj": { "group_size": { @@ -60675,7 +60675,7 @@ } }, { - "accuracy": 0.9680007100105286, + "accuracy": 0.9764381051063538, "total_bits": 111748096, "q_proj": { "group_size": { @@ -60739,7 +60739,7 @@ } }, { - "accuracy": 0.9774889945983887, + "accuracy": 0.9811333417892456, "total_bits": 132388864, "q_proj": { "group_size": { @@ -60803,7 +60803,7 @@ } }, { - "accuracy": 0.9783854484558105, + "accuracy": 0.9817309975624084, "total_bits": 132455936, "q_proj": { "group_size": { @@ -60867,7 +60867,7 @@ } }, { - "accuracy": 0.984660804271698, + "accuracy": 0.9864557385444641, "total_bits": 169089024, "q_proj": { "group_size": { @@ -60919,7 +60919,7 @@ } }, { - "accuracy": 0.9856386184692383, + "accuracy": 0.9875764846801758, "total_bits": 169221632, "q_proj": { "group_size": { @@ -60971,7 +60971,7 @@ } }, { - "accuracy": 0.9872140884399414, + "accuracy": 0.9891262054443359, "total_bits": 170671104, "q_proj": { "group_size": { @@ -61023,7 +61023,7 @@ } }, { - "accuracy": 0.988021969795227, + "accuracy": 0.9896167516708374, "total_bits": 173039616, "q_proj": { "group_size": { @@ -61075,7 +61075,7 @@ } }, { - "accuracy": 0.9889504909515381, + "accuracy": 0.9912109375, "total_bits": 174398976, "q_proj": { "group_size": { @@ -61139,7 +61139,7 @@ } }, { - "accuracy": 0.9898900389671326, + "accuracy": 0.9917938113212585, "total_bits": 175225856, "q_proj": { "group_size": { @@ -61203,7 +61203,7 @@ } }, { - "accuracy": 0.990544855594635, + "accuracy": 0.9925633072853088, "total_bits": 178728960, "q_proj": { "group_size": { @@ -61264,7 +61264,7 @@ } }, { - "accuracy": 0.991233766078949, + "accuracy": 0.9930638074874878, "total_bits": 181067776, "q_proj": { "group_size": { @@ -61325,7 +61325,7 @@ } }, { - "accuracy": 0.994594931602478, + "accuracy": 0.9955713748931885, "total_bits": 219944960, "q_proj": { "group_size": { @@ -61386,7 +61386,7 @@ } }, { - "accuracy": 0.9947410225868225, + "accuracy": 0.9955939054489136, "total_bits": 223010816, "q_proj": { "group_size": { @@ -61447,7 +61447,7 @@ } }, { - "accuracy": 0.9955139756202698, + "accuracy": 0.9962779879570007, "total_bits": 252975104, "q_proj": { "group_size": { @@ -61499,7 +61499,7 @@ } }, { - "accuracy": 0.995727002620697, + "accuracy": 0.9963361620903015, "total_bits": 265314304, "q_proj": { "group_size": { @@ -61551,7 +61551,7 @@ } }, { - "accuracy": 0.9973432421684265, + "accuracy": 0.9976822137832642, "total_bits": 336861184, "q_proj": { "group_size": { @@ -61605,7 +61605,7 @@ ], "model.layers.31.block_sparse_moe": [ { - "accuracy": 0.9467880129814148, + "accuracy": 0.9419407248497009, "total_bits": 3157926400, "w1": { "group_size": { @@ -61657,7 +61657,7 @@ } }, { - "accuracy": 0.9485058188438416, + "accuracy": 0.9438601136207581, "total_bits": 3268026880, "w1": { "group_size": { @@ -61709,7 +61709,7 @@ } }, { - "accuracy": 0.9520227909088135, + "accuracy": 0.9479536414146423, "total_bits": 3652411392, "w1": { "group_size": { @@ -61758,7 +61758,7 @@ } }, { - "accuracy": 0.953081488609314, + "accuracy": 0.949176013469696, "total_bits": 4098056192, "w1": { "group_size": { @@ -61807,7 +61807,7 @@ } }, { - "accuracy": 0.9742975234985352, + "accuracy": 0.9722312092781067, "total_bits": 4621411072, "w1": { "group_size": { @@ -61859,7 +61859,7 @@ } }, { - "accuracy": 0.9763757586479187, + "accuracy": 0.9744215607643127, "total_bits": 4737212416, "w1": { "group_size": { @@ -61911,7 +61911,7 @@ } }, { - "accuracy": 0.9775354862213135, + "accuracy": 0.9758678674697876, "total_bits": 5093868288, "w1": { "group_size": { @@ -61960,7 +61960,7 @@ } }, { - "accuracy": 0.986388623714447, + "accuracy": 0.9851397275924683, "total_bits": 5824164608, "w1": { "group_size": { @@ -62003,7 +62003,7 @@ } }, { - "accuracy": 0.987713098526001, + "accuracy": 0.9865877628326416, "total_bits": 5910044672, "w1": { "group_size": { @@ -62046,7 +62046,7 @@ } }, { - "accuracy": 0.9869077205657959, + "accuracy": 0.9858447909355164, "total_bits": 6006579968, "w1": { "group_size": { @@ -62098,7 +62098,7 @@ } }, { - "accuracy": 0.9885329604148865, + "accuracy": 0.9875437021255493, "total_bits": 6122381312, "w1": { "group_size": { @@ -62150,7 +62150,7 @@ } }, { - "accuracy": 0.9932999610900879, + "accuracy": 0.9926580786705017, "total_bits": 7391748864, "w1": { "group_size": { @@ -62202,7 +62202,7 @@ } }, { - "accuracy": 0.9937098622322083, + "accuracy": 0.9930426478385925, "total_bits": 7507550208, "w1": { "group_size": { @@ -62254,7 +62254,7 @@ } }, { - "accuracy": 0.9959242939949036, + "accuracy": 0.9953939914703369, "total_bits": 8550425344, "w1": { "group_size": { @@ -62297,7 +62297,7 @@ } }, { - "accuracy": 0.9962433576583862, + "accuracy": 0.9958451390266418, "total_bits": 8877312000, "w1": { "group_size": { @@ -62346,7 +62346,7 @@ } }, { - "accuracy": 0.9964237809181213, + "accuracy": 0.9960691332817078, "total_bits": 9674229760, "w1": { "group_size": { @@ -62392,7 +62392,7 @@ } }, { - "accuracy": 0.9977166652679443, + "accuracy": 0.9973559379577637, "total_bits": 11318396928, "w1": { "group_size": { @@ -62436,4 +62436,4 @@ "lm_head.linear": null }, "last_module_idx": 66 -} +} \ No newline at end of file