diff --git "a/measurement.json" "b/measurement.json" --- "a/measurement.json" +++ "b/measurement.json" @@ -3,7 +3,7 @@ "model.layers.0.parallel_decoder": { "attn": [ { - "accuracy": 0.9148618798506887, + "accuracy": 0.9140855952313072, "total_bits": 320757760, "q_proj": { "group_size": { @@ -67,7 +67,7 @@ } }, { - "accuracy": 0.9212212562561035, + "accuracy": 0.9207217442361932, "total_bits": 329080832, "q_proj": { "group_size": { @@ -131,7 +131,7 @@ } }, { - "accuracy": 0.9285732382222226, + "accuracy": 0.9280620876111483, "total_bits": 336024576, "q_proj": { "group_size": { @@ -195,7 +195,7 @@ } }, { - "accuracy": 0.954961707717494, + "accuracy": 0.9547668601337232, "total_bits": 401557504, "q_proj": { "group_size": { @@ -259,7 +259,7 @@ } }, { - "accuracy": 0.9592786525425158, + "accuracy": 0.9592708411969637, "total_bits": 475279360, "q_proj": { "group_size": { @@ -323,7 +323,7 @@ } }, { - "accuracy": 0.9600444938007154, + "accuracy": 0.959985469516955, "total_bits": 475479040, "q_proj": { "group_size": { @@ -387,7 +387,7 @@ } }, { - "accuracy": 0.9759351187630704, + "accuracy": 0.9760297976042095, "total_bits": 609759232, "q_proj": { "group_size": { @@ -439,7 +439,7 @@ } }, { - "accuracy": 0.976346376695131, + "accuracy": 0.9763396601927908, "total_bits": 610024448, "q_proj": { "group_size": { @@ -491,7 +491,7 @@ } }, { - "accuracy": 0.9778126572307787, + "accuracy": 0.9778116094438654, "total_bits": 615020544, "q_proj": { "group_size": { @@ -543,7 +543,7 @@ } }, { - "accuracy": 0.9792208295119437, + "accuracy": 0.9794328055883709, "total_bits": 623951872, "q_proj": { "group_size": { @@ -595,7 +595,7 @@ } }, { - "accuracy": 0.9797297700455314, + "accuracy": 0.9796852312589946, "total_bits": 626473984, "q_proj": { "group_size": { @@ -659,7 +659,7 @@ } }, { - "accuracy": 0.98100356836068, + "accuracy": 0.9809927171782443, "total_bits": 630355968, "q_proj": { "group_size": { @@ -723,7 +723,7 @@ } }, { - "accuracy": 0.9824305035566029, + "accuracy": 0.9824270841322447, "total_bits": 637362176, "q_proj": { "group_size": { @@ -784,7 +784,7 @@ } }, { - "accuracy": 0.9838083041341681, + "accuracy": 0.983949789875432, "total_bits": 646823936, "q_proj": { "group_size": { @@ -845,7 +845,7 @@ } }, { - "accuracy": 0.9902881203513396, + "accuracy": 0.9902837417627636, "total_bits": 784740352, "q_proj": { "group_size": { @@ -906,7 +906,7 @@ } }, { - "accuracy": 0.9916621093687258, + "accuracy": 0.9917169409362894, "total_bits": 797818880, "q_proj": { "group_size": { @@ -967,7 +967,7 @@ } }, { - "accuracy": 0.9933599037559409, + "accuracy": 0.9933641207845587, "total_bits": 911749120, "q_proj": { "group_size": { @@ -1019,7 +1019,7 @@ } }, { - "accuracy": 0.9946525587063086, + "accuracy": 0.9947254391093003, "total_bits": 942718976, "q_proj": { "group_size": { @@ -1071,7 +1071,7 @@ } }, { - "accuracy": 0.9975743599628147, + "accuracy": 0.9975496730522105, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -1125,7 +1125,7 @@ ], "mlp": [ { - "accuracy": 0.880216824380975, + "accuracy": 0.8805205194573653, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -1177,7 +1177,7 @@ } }, { - "accuracy": 0.8858449208109003, + "accuracy": 0.8857519250167043, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -1229,7 +1229,7 @@ } }, { - "accuracy": 0.9039369821548462, + "accuracy": 0.9033228660884657, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -1278,7 +1278,7 @@ } }, { - "accuracy": 0.9151375481956884, + "accuracy": 0.9145180363404124, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -1327,7 +1327,7 @@ } }, { - "accuracy": 0.9503516711686787, + "accuracy": 0.9505731839882701, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -1379,7 +1379,7 @@ } }, { - "accuracy": 0.955256932660153, + "accuracy": 0.9551108855950206, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -1431,7 +1431,7 @@ } }, { - "accuracy": 0.9658011574494212, + "accuracy": 0.9656621186356795, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -1480,7 +1480,7 @@ } }, { - "accuracy": 0.9715999364852905, + "accuracy": 0.971536824577733, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -1523,7 +1523,7 @@ } }, { - "accuracy": 0.9747953007095739, + "accuracy": 0.9748744462665758, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -1566,7 +1566,7 @@ } }, { - "accuracy": 0.9761234866945367, + "accuracy": 0.9761318438931516, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -1618,7 +1618,7 @@ } }, { - "accuracy": 0.9793694238913687, + "accuracy": 0.9793389016076138, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -1670,7 +1670,7 @@ } }, { - "accuracy": 0.987905447420321, + "accuracy": 0.987908022184121, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -1722,7 +1722,7 @@ } }, { - "accuracy": 0.9895817221779573, + "accuracy": 0.9894836795957465, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -1774,7 +1774,7 @@ } }, { - "accuracy": 0.9922544077823037, + "accuracy": 0.9922513059879604, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -1817,7 +1817,7 @@ } }, { - "accuracy": 0.9939032718539238, + "accuracy": 0.9938187473698666, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -1866,7 +1866,7 @@ } }, { - "accuracy": 0.9966286444350293, + "accuracy": 0.9965213854846201, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -1912,7 +1912,7 @@ } }, { - "accuracy": 0.9972587363108208, + "accuracy": 0.9972598203702977, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -1956,7 +1956,7 @@ "model.layers.1.parallel_decoder": { "attn": [ { - "accuracy": 0.7749305022390265, + "accuracy": 0.7743922785708779, "total_bits": 320757760, "q_proj": { "group_size": { @@ -2020,7 +2020,7 @@ } }, { - "accuracy": 0.7853039691322728, + "accuracy": 0.7843314973931563, "total_bits": 329080832, "q_proj": { "group_size": { @@ -2084,7 +2084,7 @@ } }, { - "accuracy": 0.8044946068211606, + "accuracy": 0.8032986239383095, "total_bits": 336024576, "q_proj": { "group_size": { @@ -2148,7 +2148,7 @@ } }, { - "accuracy": 0.8765778165114553, + "accuracy": 0.8755485258604351, "total_bits": 401557504, "q_proj": { "group_size": { @@ -2212,7 +2212,7 @@ } }, { - "accuracy": 0.8824432147176642, + "accuracy": 0.881537349600541, "total_bits": 475279360, "q_proj": { "group_size": { @@ -2276,7 +2276,7 @@ } }, { - "accuracy": 0.884987191150063, + "accuracy": 0.8845383242556923, "total_bits": 475479040, "q_proj": { "group_size": { @@ -2340,7 +2340,7 @@ } }, { - "accuracy": 0.9163917303085327, + "accuracy": 0.9158447102496499, "total_bits": 609759232, "q_proj": { "group_size": { @@ -2392,7 +2392,7 @@ } }, { - "accuracy": 0.9194006606152183, + "accuracy": 0.9192913206000077, "total_bits": 610024448, "q_proj": { "group_size": { @@ -2444,7 +2444,7 @@ } }, { - "accuracy": 0.930199616833737, + "accuracy": 0.9297467972102919, "total_bits": 615020544, "q_proj": { "group_size": { @@ -2496,7 +2496,7 @@ } }, { - "accuracy": 0.9328478135560688, + "accuracy": 0.9324013935892206, "total_bits": 623951872, "q_proj": { "group_size": { @@ -2548,7 +2548,7 @@ } }, { - "accuracy": 0.9371077951632047, + "accuracy": 0.9368059886129279, "total_bits": 626473984, "q_proj": { "group_size": { @@ -2612,7 +2612,7 @@ } }, { - "accuracy": 0.9457099186746698, + "accuracy": 0.9453848349420648, "total_bits": 630355968, "q_proj": { "group_size": { @@ -2676,7 +2676,7 @@ } }, { - "accuracy": 0.9412997710077387, + "accuracy": 0.9408984247006869, "total_bits": 637362176, "q_proj": { "group_size": { @@ -2737,7 +2737,7 @@ } }, { - "accuracy": 0.950646205952293, + "accuracy": 0.9504235041768927, "total_bits": 646823936, "q_proj": { "group_size": { @@ -2798,7 +2798,7 @@ } }, { - "accuracy": 0.9657574107772425, + "accuracy": 0.9656113543008503, "total_bits": 784740352, "q_proj": { "group_size": { @@ -2859,7 +2859,7 @@ } }, { - "accuracy": 0.9756752127095273, + "accuracy": 0.9755218013336784, "total_bits": 797818880, "q_proj": { "group_size": { @@ -2920,7 +2920,7 @@ } }, { - "accuracy": 0.9723216546209235, + "accuracy": 0.9722178892085427, "total_bits": 911749120, "q_proj": { "group_size": { @@ -2972,7 +2972,7 @@ } }, { - "accuracy": 0.9882454989772094, + "accuracy": 0.9881813118332311, "total_bits": 942718976, "q_proj": { "group_size": { @@ -3024,7 +3024,7 @@ } }, { - "accuracy": 0.9925710849071804, + "accuracy": 0.9925417578534076, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -3078,7 +3078,7 @@ ], "mlp": [ { - "accuracy": 0.8422798859445673, + "accuracy": 0.8417473466772782, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -3130,7 +3130,7 @@ } }, { - "accuracy": 0.8490798473358154, + "accuracy": 0.8486776477412173, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -3182,7 +3182,7 @@ } }, { - "accuracy": 0.8838400464308889, + "accuracy": 0.8835478958330656, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -3231,7 +3231,7 @@ } }, { - "accuracy": 0.897012660377904, + "accuracy": 0.8967625467400802, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -3280,7 +3280,7 @@ } }, { - "accuracy": 0.9231332916962474, + "accuracy": 0.9229485925875212, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -3332,7 +3332,7 @@ } }, { - "accuracy": 0.9297127033534803, + "accuracy": 0.9295259651384855, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -3384,7 +3384,7 @@ } }, { - "accuracy": 0.9452464517794157, + "accuracy": 0.9451070208298533, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -3433,7 +3433,7 @@ } }, { - "accuracy": 0.9573037938067788, + "accuracy": 0.9571960788024099, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -3476,7 +3476,7 @@ } }, { - "accuracy": 0.961987715018423, + "accuracy": 0.9618876858761436, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -3519,7 +3519,7 @@ } }, { - "accuracy": 0.9610987525237233, + "accuracy": 0.961003485478853, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -3571,7 +3571,7 @@ } }, { - "accuracy": 0.9660393093761644, + "accuracy": 0.9659512733158312, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -3623,7 +3623,7 @@ } }, { - "accuracy": 0.9800714081839511, + "accuracy": 0.9800160464487577, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -3675,7 +3675,7 @@ } }, { - "accuracy": 0.9829936184381184, + "accuracy": 0.9829516175546145, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -3727,7 +3727,7 @@ } }, { - "accuracy": 0.988497964645687, + "accuracy": 0.9884548202941292, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -3770,7 +3770,7 @@ } }, { - "accuracy": 0.9898763534269834, + "accuracy": 0.9898566438963539, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -3819,7 +3819,7 @@ } }, { - "accuracy": 0.9933434324829202, + "accuracy": 0.9933349952885979, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -3865,7 +3865,7 @@ } }, { - "accuracy": 0.9966983430479702, + "accuracy": 0.996695887101324, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -3909,7 +3909,7 @@ "model.layers.2.parallel_decoder": { "attn": [ { - "accuracy": 0.7457731397528398, + "accuracy": 0.7467384338378906, "total_bits": 320757760, "q_proj": { "group_size": { @@ -3973,7 +3973,7 @@ } }, { - "accuracy": 0.7550155740035207, + "accuracy": 0.7554104955572831, "total_bits": 329080832, "q_proj": { "group_size": { @@ -4037,7 +4037,7 @@ } }, { - "accuracy": 0.782204176250257, + "accuracy": 0.7832093991731343, "total_bits": 336024576, "q_proj": { "group_size": { @@ -4101,7 +4101,7 @@ } }, { - "accuracy": 0.8575657417899684, + "accuracy": 0.8582129227487665, "total_bits": 401557504, "q_proj": { "group_size": { @@ -4165,7 +4165,7 @@ } }, { - "accuracy": 0.8666364393736187, + "accuracy": 0.8674148007443077, "total_bits": 475279360, "q_proj": { "group_size": { @@ -4229,7 +4229,7 @@ } }, { - "accuracy": 0.8688146817056757, + "accuracy": 0.869330042286923, "total_bits": 475479040, "q_proj": { "group_size": { @@ -4293,7 +4293,7 @@ } }, { - "accuracy": 0.9086183121329859, + "accuracy": 0.9087321381819875, "total_bits": 609759232, "q_proj": { "group_size": { @@ -4345,7 +4345,7 @@ } }, { - "accuracy": 0.9113351483094065, + "accuracy": 0.9117607442956221, "total_bits": 610024448, "q_proj": { "group_size": { @@ -4397,7 +4397,7 @@ } }, { - "accuracy": 0.9199023058539942, + "accuracy": 0.9197892515282882, "total_bits": 615020544, "q_proj": { "group_size": { @@ -4449,7 +4449,7 @@ } }, { - "accuracy": 0.9222247977005809, + "accuracy": 0.9222189752679122, "total_bits": 623951872, "q_proj": { "group_size": { @@ -4501,7 +4501,7 @@ } }, { - "accuracy": 0.9294914446379009, + "accuracy": 0.9294542387912148, "total_bits": 626473984, "q_proj": { "group_size": { @@ -4565,7 +4565,7 @@ } }, { - "accuracy": 0.9365826656943873, + "accuracy": 0.9367357555188631, "total_bits": 630355968, "q_proj": { "group_size": { @@ -4629,7 +4629,7 @@ } }, { - "accuracy": 0.9356422487058138, + "accuracy": 0.9359238963378103, "total_bits": 637362176, "q_proj": { "group_size": { @@ -4690,7 +4690,7 @@ } }, { - "accuracy": 0.9431792497634888, + "accuracy": 0.943388863613731, "total_bits": 646823936, "q_proj": { "group_size": { @@ -4751,7 +4751,7 @@ } }, { - "accuracy": 0.9634665564486855, + "accuracy": 0.9635975392241227, "total_bits": 784740352, "q_proj": { "group_size": { @@ -4812,7 +4812,7 @@ } }, { - "accuracy": 0.9714557120674535, + "accuracy": 0.9714127339814839, "total_bits": 797818880, "q_proj": { "group_size": { @@ -4873,7 +4873,7 @@ } }, { - "accuracy": 0.9709367908929524, + "accuracy": 0.9710333472804019, "total_bits": 911749120, "q_proj": { "group_size": { @@ -4925,7 +4925,7 @@ } }, { - "accuracy": 0.9870813716399042, + "accuracy": 0.9870994945890025, "total_bits": 942718976, "q_proj": { "group_size": { @@ -4977,7 +4977,7 @@ } }, { - "accuracy": 0.9922419896251277, + "accuracy": 0.9922710970828408, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -5031,7 +5031,7 @@ ], "mlp": [ { - "accuracy": 0.9390830868168881, + "accuracy": 0.9468584186152408, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -5083,7 +5083,7 @@ } }, { - "accuracy": 0.942437818175868, + "accuracy": 0.95125433959459, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -5135,7 +5135,7 @@ } }, { - "accuracy": 0.9472080782840127, + "accuracy": 0.9538980722427368, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -5184,7 +5184,7 @@ } }, { - "accuracy": 0.9496647307747289, + "accuracy": 0.9556239968851993, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -5233,7 +5233,7 @@ } }, { - "accuracy": 0.9789467799036127, + "accuracy": 0.9830655141880638, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -5285,7 +5285,7 @@ } }, { - "accuracy": 0.9819381174288297, + "accuracy": 0.9847731621641862, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -5337,7 +5337,7 @@ } }, { - "accuracy": 0.9852789436516008, + "accuracy": 0.9872092290928489, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -5386,7 +5386,7 @@ } }, { - "accuracy": 0.9897885267671785, + "accuracy": 0.9920393926532645, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -5429,7 +5429,7 @@ } }, { - "accuracy": 0.9896940322298753, + "accuracy": 0.9915137863472888, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -5472,7 +5472,7 @@ } }, { - "accuracy": 0.988681814388225, + "accuracy": 0.9907007131137346, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -5524,7 +5524,7 @@ } }, { - "accuracy": 0.9912901442301901, + "accuracy": 0.9931373055043974, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -5576,7 +5576,7 @@ } }, { - "accuracy": 0.9941360425007971, + "accuracy": 0.9952193682915286, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -5628,7 +5628,7 @@ } }, { - "accuracy": 0.9946705110763249, + "accuracy": 0.9955943324848225, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -5680,7 +5680,7 @@ } }, { - "accuracy": 0.9956990758839407, + "accuracy": 0.9962758970888037, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -5723,7 +5723,7 @@ } }, { - "accuracy": 0.9956254339531848, + "accuracy": 0.9962633757999069, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -5772,7 +5772,7 @@ } }, { - "accuracy": 0.9961313385712472, + "accuracy": 0.9966084768897608, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -5818,7 +5818,7 @@ } }, { - "accuracy": 0.9965897053480148, + "accuracy": 0.9968424310025416, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -5862,7 +5862,7 @@ "model.layers.3.parallel_decoder": { "attn": [ { - "accuracy": 0.7197601418746145, + "accuracy": 0.7182418672662032, "total_bits": 320757760, "q_proj": { "group_size": { @@ -5926,7 +5926,7 @@ } }, { - "accuracy": 0.7329102817334627, + "accuracy": 0.732528385363127, "total_bits": 329080832, "q_proj": { "group_size": { @@ -5990,7 +5990,7 @@ } }, { - "accuracy": 0.7689958873548006, + "accuracy": 0.768824953781931, "total_bits": 336024576, "q_proj": { "group_size": { @@ -6054,7 +6054,7 @@ } }, { - "accuracy": 0.8481734300914564, + "accuracy": 0.8477119270123934, "total_bits": 401557504, "q_proj": { "group_size": { @@ -6118,7 +6118,7 @@ } }, { - "accuracy": 0.8553708728991056, + "accuracy": 0.8549032336787173, "total_bits": 475279360, "q_proj": { "group_size": { @@ -6182,7 +6182,7 @@ } }, { - "accuracy": 0.8609382102363988, + "accuracy": 0.8609359138890317, "total_bits": 475479040, "q_proj": { "group_size": { @@ -6246,7 +6246,7 @@ } }, { - "accuracy": 0.8971573930037648, + "accuracy": 0.8968532399127358, "total_bits": 609759232, "q_proj": { "group_size": { @@ -6298,7 +6298,7 @@ } }, { - "accuracy": 0.9045648888537758, + "accuracy": 0.9041534662246704, "total_bits": 610024448, "q_proj": { "group_size": { @@ -6350,7 +6350,7 @@ } }, { - "accuracy": 0.9184136453427767, + "accuracy": 0.9179240088713796, "total_bits": 615020544, "q_proj": { "group_size": { @@ -6402,7 +6402,7 @@ } }, { - "accuracy": 0.9208827269704718, + "accuracy": 0.9203440641102038, "total_bits": 623951872, "q_proj": { "group_size": { @@ -6454,7 +6454,7 @@ } }, { - "accuracy": 0.9280510576147782, + "accuracy": 0.927986879097788, "total_bits": 626473984, "q_proj": { "group_size": { @@ -6518,7 +6518,7 @@ } }, { - "accuracy": 0.9360642558649966, + "accuracy": 0.936189243668004, "total_bits": 630355968, "q_proj": { "group_size": { @@ -6582,7 +6582,7 @@ } }, { - "accuracy": 0.9342943116238243, + "accuracy": 0.9343017590673346, "total_bits": 637362176, "q_proj": { "group_size": { @@ -6643,7 +6643,7 @@ } }, { - "accuracy": 0.9432374552676552, + "accuracy": 0.9433243023721796, "total_bits": 646823936, "q_proj": { "group_size": { @@ -6704,7 +6704,7 @@ } }, { - "accuracy": 0.9624269447828594, + "accuracy": 0.9624733611157066, "total_bits": 784740352, "q_proj": { "group_size": { @@ -6765,7 +6765,7 @@ } }, { - "accuracy": 0.9713142200520164, + "accuracy": 0.9713680242237291, "total_bits": 797818880, "q_proj": { "group_size": { @@ -6826,7 +6826,7 @@ } }, { - "accuracy": 0.9702661225670263, + "accuracy": 0.9702352097159938, "total_bits": 911749120, "q_proj": { "group_size": { @@ -6878,7 +6878,7 @@ } }, { - "accuracy": 0.9867110864112252, + "accuracy": 0.9866822738396493, "total_bits": 942718976, "q_proj": { "group_size": { @@ -6930,7 +6930,7 @@ } }, { - "accuracy": 0.9920638211463627, + "accuracy": 0.991980158184704, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -6984,7 +6984,7 @@ ], "mlp": [ { - "accuracy": 0.7463277264645225, + "accuracy": 0.7458561847084446, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -7036,7 +7036,7 @@ } }, { - "accuracy": 0.7522660807559365, + "accuracy": 0.7518451088353207, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -7088,7 +7088,7 @@ } }, { - "accuracy": 0.7977258531670821, + "accuracy": 0.7973859059183221, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -7137,7 +7137,7 @@ } }, { - "accuracy": 0.8121238758689479, + "accuracy": 0.8117927752043071, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -7186,7 +7186,7 @@ } }, { - "accuracy": 0.8746355834760164, + "accuracy": 0.8744184970855713, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -7238,7 +7238,7 @@ } }, { - "accuracy": 0.8842681207154927, + "accuracy": 0.8840965848220022, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -7290,7 +7290,7 @@ } }, { - "accuracy": 0.9037873619481137, + "accuracy": 0.9036106059425756, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -7339,7 +7339,7 @@ } }, { - "accuracy": 0.9350037512026335, + "accuracy": 0.934896889485811, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -7382,7 +7382,7 @@ } }, { - "accuracy": 0.9410960862511083, + "accuracy": 0.9409892119859394, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -7425,7 +7425,7 @@ } }, { - "accuracy": 0.9365650980096114, + "accuracy": 0.9364629607451589, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -7477,7 +7477,7 @@ } }, { - "accuracy": 0.9443752075496473, + "accuracy": 0.9442797271828902, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -7529,7 +7529,7 @@ } }, { - "accuracy": 0.967752080214651, + "accuracy": 0.9677029283423173, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -7581,7 +7581,7 @@ } }, { - "accuracy": 0.9723186994853773, + "accuracy": 0.972270385215157, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -7633,7 +7633,7 @@ } }, { - "accuracy": 0.9826015469275022, + "accuracy": 0.9825810115588339, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -7676,7 +7676,7 @@ } }, { - "accuracy": 0.9836172806589227, + "accuracy": 0.9835958857285348, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -7725,7 +7725,7 @@ } }, { - "accuracy": 0.9876306982416856, + "accuracy": 0.987611671811656, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -7771,7 +7771,7 @@ } }, { - "accuracy": 0.9953924354753996, + "accuracy": 0.9953873996671877, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -7815,7 +7815,7 @@ "model.layers.4.parallel_decoder": { "attn": [ { - "accuracy": 0.7227413277877004, + "accuracy": 0.722165961014597, "total_bits": 320757760, "q_proj": { "group_size": { @@ -7879,7 +7879,7 @@ } }, { - "accuracy": 0.7398587778994912, + "accuracy": 0.7405803329066226, "total_bits": 329080832, "q_proj": { "group_size": { @@ -7943,7 +7943,7 @@ } }, { - "accuracy": 0.7855915270353618, + "accuracy": 0.785059276380037, "total_bits": 336024576, "q_proj": { "group_size": { @@ -8007,7 +8007,7 @@ } }, { - "accuracy": 0.845549696370175, + "accuracy": 0.8446984542043585, "total_bits": 401557504, "q_proj": { "group_size": { @@ -8071,7 +8071,7 @@ } }, { - "accuracy": 0.8603413983395225, + "accuracy": 0.8599175152025724, "total_bits": 475279360, "q_proj": { "group_size": { @@ -8135,7 +8135,7 @@ } }, { - "accuracy": 0.8658977056804457, + "accuracy": 0.8658137697922557, "total_bits": 475479040, "q_proj": { "group_size": { @@ -8199,7 +8199,7 @@ } }, { - "accuracy": 0.8941417744285182, + "accuracy": 0.8937784245139674, "total_bits": 609759232, "q_proj": { "group_size": { @@ -8251,7 +8251,7 @@ } }, { - "accuracy": 0.9004948327415868, + "accuracy": 0.9004578590393066, "total_bits": 610024448, "q_proj": { "group_size": { @@ -8303,7 +8303,7 @@ } }, { - "accuracy": 0.9129835304461027, + "accuracy": 0.9126326096685309, "total_bits": 615020544, "q_proj": { "group_size": { @@ -8355,7 +8355,7 @@ } }, { - "accuracy": 0.9151108139439633, + "accuracy": 0.9146056049748471, "total_bits": 623951872, "q_proj": { "group_size": { @@ -8407,7 +8407,7 @@ } }, { - "accuracy": 0.9282636203263935, + "accuracy": 0.9283205145283749, "total_bits": 626473984, "q_proj": { "group_size": { @@ -8471,7 +8471,7 @@ } }, { - "accuracy": 0.9370579970510382, + "accuracy": 0.9369634766327708, "total_bits": 630355968, "q_proj": { "group_size": { @@ -8535,7 +8535,7 @@ } }, { - "accuracy": 0.9340574176687944, + "accuracy": 0.9340654172395405, "total_bits": 637362176, "q_proj": { "group_size": { @@ -8596,7 +8596,7 @@ } }, { - "accuracy": 0.9433055676912007, + "accuracy": 0.9433649966591283, "total_bits": 646823936, "q_proj": { "group_size": { @@ -8657,7 +8657,7 @@ } }, { - "accuracy": 0.9620921705898485, + "accuracy": 0.9619215569998089, "total_bits": 784740352, "q_proj": { "group_size": { @@ -8718,7 +8718,7 @@ } }, { - "accuracy": 0.9710192241166767, + "accuracy": 0.9709166225634123, "total_bits": 797818880, "q_proj": { "group_size": { @@ -8779,7 +8779,7 @@ } }, { - "accuracy": 0.9678432972807633, + "accuracy": 0.9676051390798468, "total_bits": 911749120, "q_proj": { "group_size": { @@ -8831,7 +8831,7 @@ } }, { - "accuracy": 0.9870310543399108, + "accuracy": 0.9870264020405317, "total_bits": 942718976, "q_proj": { "group_size": { @@ -8883,7 +8883,7 @@ } }, { - "accuracy": 0.9916198457542219, + "accuracy": 0.9915946329894819, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -8937,7 +8937,7 @@ ], "mlp": [ { - "accuracy": 0.7176906184146279, + "accuracy": 0.7165854353653758, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -8989,7 +8989,7 @@ } }, { - "accuracy": 0.7238146882308156, + "accuracy": 0.7227619572689659, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -9041,7 +9041,7 @@ } }, { - "accuracy": 0.7762043601588199, + "accuracy": 0.7753018329018041, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -9090,7 +9090,7 @@ } }, { - "accuracy": 0.7928132383446944, + "accuracy": 0.7919516312448602, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -9139,7 +9139,7 @@ } }, { - "accuracy": 0.8595876191791735, + "accuracy": 0.8590562469080875, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -9191,7 +9191,7 @@ } }, { - "accuracy": 0.8702575533013595, + "accuracy": 0.8697515161413896, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -9243,7 +9243,7 @@ } }, { - "accuracy": 0.8932579442074424, + "accuracy": 0.8928211488221821, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -9292,7 +9292,7 @@ } }, { - "accuracy": 0.9275473356246948, + "accuracy": 0.9272909164428711, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -9335,7 +9335,7 @@ } }, { - "accuracy": 0.9343615519373041, + "accuracy": 0.9341011549297132, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -9378,7 +9378,7 @@ } }, { - "accuracy": 0.9289786690159848, + "accuracy": 0.9287177073328119, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -9430,7 +9430,7 @@ } }, { - "accuracy": 0.9376585546292757, + "accuracy": 0.93741312152461, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -9482,7 +9482,7 @@ } }, { - "accuracy": 0.9639564909433064, + "accuracy": 0.9638283691908184, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -9534,7 +9534,7 @@ } }, { - "accuracy": 0.9690175370166176, + "accuracy": 0.9688971419083444, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -9586,7 +9586,7 @@ } }, { - "accuracy": 0.9807308143691013, + "accuracy": 0.9806650688773707, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -9629,7 +9629,7 @@ } }, { - "accuracy": 0.9817034555108923, + "accuracy": 0.981631233503944, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -9678,7 +9678,7 @@ } }, { - "accuracy": 0.9864201451602735, + "accuracy": 0.9863636995616712, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -9724,7 +9724,7 @@ } }, { - "accuracy": 0.9949426141224409, + "accuracy": 0.9949234994618517, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -9768,7 +9768,7 @@ "model.layers.5.parallel_decoder": { "attn": [ { - "accuracy": 0.7008124401694851, + "accuracy": 0.6984933551989103, "total_bits": 320757760, "q_proj": { "group_size": { @@ -9832,7 +9832,7 @@ } }, { - "accuracy": 0.7139415991933722, + "accuracy": 0.7102624491641396, "total_bits": 329080832, "q_proj": { "group_size": { @@ -9896,7 +9896,7 @@ } }, { - "accuracy": 0.7680911013954564, + "accuracy": 0.7666101706655402, "total_bits": 336024576, "q_proj": { "group_size": { @@ -9960,7 +9960,7 @@ } }, { - "accuracy": 0.8218460584941664, + "accuracy": 0.8205091325860274, "total_bits": 401557504, "q_proj": { "group_size": { @@ -10024,7 +10024,7 @@ } }, { - "accuracy": 0.8442275900589793, + "accuracy": 0.8428686041581004, "total_bits": 475279360, "q_proj": { "group_size": { @@ -10088,7 +10088,7 @@ } }, { - "accuracy": 0.8503365140212209, + "accuracy": 0.8501461932533666, "total_bits": 475479040, "q_proj": { "group_size": { @@ -10152,7 +10152,7 @@ } }, { - "accuracy": 0.877457016392758, + "accuracy": 0.8754885196685791, "total_bits": 609759232, "q_proj": { "group_size": { @@ -10204,7 +10204,7 @@ } }, { - "accuracy": 0.8846545595871775, + "accuracy": 0.8839873012743498, "total_bits": 610024448, "q_proj": { "group_size": { @@ -10256,7 +10256,7 @@ } }, { - "accuracy": 0.8954976357911762, + "accuracy": 0.8947758109946, "total_bits": 615020544, "q_proj": { "group_size": { @@ -10308,7 +10308,7 @@ } }, { - "accuracy": 0.8963029823805156, + "accuracy": 0.8958088473269814, "total_bits": 623951872, "q_proj": { "group_size": { @@ -10360,7 +10360,7 @@ } }, { - "accuracy": 0.9190350331758198, + "accuracy": 0.9192430031927008, "total_bits": 626473984, "q_proj": { "group_size": { @@ -10424,7 +10424,7 @@ } }, { - "accuracy": 0.924234948660198, + "accuracy": 0.9238532091441908, "total_bits": 630355968, "q_proj": { "group_size": { @@ -10488,7 +10488,7 @@ } }, { - "accuracy": 0.9244901130073949, + "accuracy": 0.9244710646177593, "total_bits": 637362176, "q_proj": { "group_size": { @@ -10549,7 +10549,7 @@ } }, { - "accuracy": 0.9298704423402485, + "accuracy": 0.9297026207572535, "total_bits": 646823936, "q_proj": { "group_size": { @@ -10610,7 +10610,7 @@ } }, { - "accuracy": 0.9615941141781054, + "accuracy": 0.9612778707554466, "total_bits": 784740352, "q_proj": { "group_size": { @@ -10671,7 +10671,7 @@ } }, { - "accuracy": 0.9679191520339564, + "accuracy": 0.9676413692926106, "total_bits": 797818880, "q_proj": { "group_size": { @@ -10732,7 +10732,7 @@ } }, { - "accuracy": 0.9678152360414204, + "accuracy": 0.967487843413102, "total_bits": 911749120, "q_proj": { "group_size": { @@ -10784,7 +10784,7 @@ } }, { - "accuracy": 0.9862690191519888, + "accuracy": 0.9861601528368498, "total_bits": 942718976, "q_proj": { "group_size": { @@ -10836,7 +10836,7 @@ } }, { - "accuracy": 0.9908282811704435, + "accuracy": 0.9907523189720354, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -10890,7 +10890,7 @@ ], "mlp": [ { - "accuracy": 0.699706755186382, + "accuracy": 0.6991309617695056, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -10942,7 +10942,7 @@ } }, { - "accuracy": 0.7064296069898104, + "accuracy": 0.7058564738223427, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -10994,7 +10994,7 @@ } }, { - "accuracy": 0.7572035789489746, + "accuracy": 0.7566944674441689, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -11043,7 +11043,7 @@ } }, { - "accuracy": 0.7728706911990517, + "accuracy": 0.7723744041041324, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -11092,7 +11092,7 @@ } }, { - "accuracy": 0.8500366336420963, + "accuracy": 0.8498798922488564, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -11144,7 +11144,7 @@ } }, { - "accuracy": 0.8615679866389224, + "accuracy": 0.8613221770838687, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -11196,7 +11196,7 @@ } }, { - "accuracy": 0.8835433282350239, + "accuracy": 0.8833026885986328, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -11245,7 +11245,7 @@ } }, { - "accuracy": 0.9229622075432226, + "accuracy": 0.9229009967101247, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -11288,7 +11288,7 @@ } }, { - "accuracy": 0.9299600689034713, + "accuracy": 0.9298233860417416, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -11331,7 +11331,7 @@ } }, { - "accuracy": 0.9242575356834813, + "accuracy": 0.9241542816162109, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -11383,7 +11383,7 @@ } }, { - "accuracy": 0.9334442364542108, + "accuracy": 0.93333343455666, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -11435,7 +11435,7 @@ } }, { - "accuracy": 0.9616082718497828, + "accuracy": 0.9615324390561957, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -11487,7 +11487,7 @@ } }, { - "accuracy": 0.9669082447102195, + "accuracy": 0.966855482051247, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -11539,7 +11539,7 @@ } }, { - "accuracy": 0.9794896668509433, + "accuracy": 0.9794374327910574, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -11582,7 +11582,7 @@ } }, { - "accuracy": 0.9804868039331938, + "accuracy": 0.9804609998276359, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -11631,7 +11631,7 @@ } }, { - "accuracy": 0.984886475299534, + "accuracy": 0.9848499360837435, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -11677,7 +11677,7 @@ } }, { - "accuracy": 0.9945823263965154, + "accuracy": 0.994569506692259, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -11721,7 +11721,7 @@ "model.layers.6.parallel_decoder": { "attn": [ { - "accuracy": 0.7184301677503084, + "accuracy": 0.7171090527584678, "total_bits": 320757760, "q_proj": { "group_size": { @@ -11785,7 +11785,7 @@ } }, { - "accuracy": 0.7352707260533383, + "accuracy": 0.7340499978316457, "total_bits": 329080832, "q_proj": { "group_size": { @@ -11849,7 +11849,7 @@ } }, { - "accuracy": 0.7796402479472914, + "accuracy": 0.7779284276460346, "total_bits": 336024576, "q_proj": { "group_size": { @@ -11913,7 +11913,7 @@ } }, { - "accuracy": 0.8465726250096371, + "accuracy": 0.8446460021169562, "total_bits": 401557504, "q_proj": { "group_size": { @@ -11977,7 +11977,7 @@ } }, { - "accuracy": 0.861799114628842, + "accuracy": 0.8603013942116186, "total_bits": 475279360, "q_proj": { "group_size": { @@ -12041,7 +12041,7 @@ } }, { - "accuracy": 0.8686199188232422, + "accuracy": 0.8685734522970099, "total_bits": 475479040, "q_proj": { "group_size": { @@ -12105,7 +12105,7 @@ } }, { - "accuracy": 0.9046373304567839, + "accuracy": 0.9037018826133326, "total_bits": 609759232, "q_proj": { "group_size": { @@ -12157,7 +12157,7 @@ } }, { - "accuracy": 0.9150624463432714, + "accuracy": 0.9145684054023341, "total_bits": 610024448, "q_proj": { "group_size": { @@ -12209,7 +12209,7 @@ } }, { - "accuracy": 0.9275906399676674, + "accuracy": 0.927028310926337, "total_bits": 615020544, "q_proj": { "group_size": { @@ -12261,7 +12261,7 @@ } }, { - "accuracy": 0.930868851511102, + "accuracy": 0.9300624947798879, "total_bits": 623951872, "q_proj": { "group_size": { @@ -12313,7 +12313,7 @@ } }, { - "accuracy": 0.9334792526144731, + "accuracy": 0.9331959423265959, "total_bits": 626473984, "q_proj": { "group_size": { @@ -12377,7 +12377,7 @@ } }, { - "accuracy": 0.9393957602350336, + "accuracy": 0.9387579277942055, "total_bits": 630355968, "q_proj": { "group_size": { @@ -12441,7 +12441,7 @@ } }, { - "accuracy": 0.9413814419194272, + "accuracy": 0.9410730349390131, "total_bits": 637362176, "q_proj": { "group_size": { @@ -12502,7 +12502,7 @@ } }, { - "accuracy": 0.9481056050250405, + "accuracy": 0.9475394236414056, "total_bits": 646823936, "q_proj": { "group_size": { @@ -12563,7 +12563,7 @@ } }, { - "accuracy": 0.9661842804206044, + "accuracy": 0.9659898814402128, "total_bits": 784740352, "q_proj": { "group_size": { @@ -12624,7 +12624,7 @@ } }, { - "accuracy": 0.9740267631254698, + "accuracy": 0.9737915553544697, "total_bits": 797818880, "q_proj": { "group_size": { @@ -12685,7 +12685,7 @@ } }, { - "accuracy": 0.97420608056219, + "accuracy": 0.9739777085028196, "total_bits": 911749120, "q_proj": { "group_size": { @@ -12737,7 +12737,7 @@ } }, { - "accuracy": 0.9873696440144589, + "accuracy": 0.9873301975036922, "total_bits": 942718976, "q_proj": { "group_size": { @@ -12789,7 +12789,7 @@ } }, { - "accuracy": 0.9935640645654578, + "accuracy": 0.9934940624393915, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -12843,7 +12843,7 @@ ], "mlp": [ { - "accuracy": 0.685275253496672, + "accuracy": 0.6837203377171567, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -12895,7 +12895,7 @@ } }, { - "accuracy": 0.6924628458525005, + "accuracy": 0.6909211058365672, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -12947,7 +12947,7 @@ } }, { - "accuracy": 0.7435152405186704, + "accuracy": 0.7422634676883095, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -12996,7 +12996,7 @@ } }, { - "accuracy": 0.7589905889410722, + "accuracy": 0.7578439210590563, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -13045,7 +13045,7 @@ } }, { - "accuracy": 0.8418459515822561, + "accuracy": 0.8410728354203074, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -13097,7 +13097,7 @@ } }, { - "accuracy": 0.853980729454442, + "accuracy": 0.8532597893162778, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -13149,7 +13149,7 @@ } }, { - "accuracy": 0.8757366883127313, + "accuracy": 0.8751464266526072, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -13198,7 +13198,7 @@ } }, { - "accuracy": 0.9188842020536724, + "accuracy": 0.9184649994498805, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -13241,7 +13241,7 @@ } }, { - "accuracy": 0.9259668902346962, + "accuracy": 0.9255850754286113, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -13284,7 +13284,7 @@ } }, { - "accuracy": 0.9199023623215525, + "accuracy": 0.9195253221612227, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -13336,7 +13336,7 @@ } }, { - "accuracy": 0.9296267283590216, + "accuracy": 0.9292710768549066, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -13388,7 +13388,7 @@ } }, { - "accuracy": 0.9593644863680789, + "accuracy": 0.9591610651267202, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -13440,7 +13440,7 @@ } }, { - "accuracy": 0.9650173500964516, + "accuracy": 0.9648434331542567, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -13492,7 +13492,7 @@ } }, { - "accuracy": 0.9783104877722891, + "accuracy": 0.9782083222740575, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -13535,7 +13535,7 @@ } }, { - "accuracy": 0.9793287578381991, + "accuracy": 0.9792162117205168, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -13584,7 +13584,7 @@ } }, { - "accuracy": 0.9837040995296679, + "accuracy": 0.9836208365465465, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -13630,7 +13630,7 @@ } }, { - "accuracy": 0.9943266299210096, + "accuracy": 0.9942956355057264, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -13674,7 +13674,7 @@ "model.layers.7.parallel_decoder": { "attn": [ { - "accuracy": 0.6973479923449064, + "accuracy": 0.6947617530822754, "total_bits": 320757760, "q_proj": { "group_size": { @@ -13738,7 +13738,7 @@ } }, { - "accuracy": 0.7130626126339561, + "accuracy": 0.7106325249922902, "total_bits": 329080832, "q_proj": { "group_size": { @@ -13802,7 +13802,7 @@ } }, { - "accuracy": 0.7618667953892758, + "accuracy": 0.7608420221429122, "total_bits": 336024576, "q_proj": { "group_size": { @@ -13866,7 +13866,7 @@ } }, { - "accuracy": 0.8325401481829191, + "accuracy": 0.8318253567344264, "total_bits": 401557504, "q_proj": { "group_size": { @@ -13930,7 +13930,7 @@ } }, { - "accuracy": 0.8498281303204989, + "accuracy": 0.8494088273299367, "total_bits": 475279360, "q_proj": { "group_size": { @@ -13994,7 +13994,7 @@ } }, { - "accuracy": 0.8553873237810636, + "accuracy": 0.8548634679693925, "total_bits": 475479040, "q_proj": { "group_size": { @@ -14058,7 +14058,7 @@ } }, { - "accuracy": 0.8955688853012889, + "accuracy": 0.8944068331467478, "total_bits": 609759232, "q_proj": { "group_size": { @@ -14110,7 +14110,7 @@ } }, { - "accuracy": 0.9023165075402511, + "accuracy": 0.9022515698483116, "total_bits": 610024448, "q_proj": { "group_size": { @@ -14162,7 +14162,7 @@ } }, { - "accuracy": 0.9170402664887278, + "accuracy": 0.916485679777045, "total_bits": 615020544, "q_proj": { "group_size": { @@ -14214,7 +14214,7 @@ } }, { - "accuracy": 0.9204934208016646, + "accuracy": 0.920021232805754, "total_bits": 623951872, "q_proj": { "group_size": { @@ -14266,7 +14266,7 @@ } }, { - "accuracy": 0.9267715905842028, + "accuracy": 0.925903176006518, "total_bits": 626473984, "q_proj": { "group_size": { @@ -14330,7 +14330,7 @@ } }, { - "accuracy": 0.9343336444152028, + "accuracy": 0.933968186378479, "total_bits": 630355968, "q_proj": { "group_size": { @@ -14394,7 +14394,7 @@ } }, { - "accuracy": 0.9344904234534815, + "accuracy": 0.9341778127770675, "total_bits": 637362176, "q_proj": { "group_size": { @@ -14455,7 +14455,7 @@ } }, { - "accuracy": 0.9428238805971647, + "accuracy": 0.9425164774844521, "total_bits": 646823936, "q_proj": { "group_size": { @@ -14516,7 +14516,7 @@ } }, { - "accuracy": 0.9623189097956607, + "accuracy": 0.9620891809463501, "total_bits": 784740352, "q_proj": { "group_size": { @@ -14577,7 +14577,7 @@ } }, { - "accuracy": 0.9712523880757784, + "accuracy": 0.9711103376589323, "total_bits": 797818880, "q_proj": { "group_size": { @@ -14638,7 +14638,7 @@ } }, { - "accuracy": 0.9706006708898043, + "accuracy": 0.9703365940796701, "total_bits": 911749120, "q_proj": { "group_size": { @@ -14690,7 +14690,7 @@ } }, { - "accuracy": 0.9860006915895563, + "accuracy": 0.9859914732606787, "total_bits": 942718976, "q_proj": { "group_size": { @@ -14742,7 +14742,7 @@ } }, { - "accuracy": 0.9925781245294371, + "accuracy": 0.992503683818014, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -14796,7 +14796,7 @@ ], "mlp": [ { - "accuracy": 0.6844172226755243, + "accuracy": 0.6836129238730984, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -14848,7 +14848,7 @@ } }, { - "accuracy": 0.6913746783607884, + "accuracy": 0.6906411773280093, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -14900,7 +14900,7 @@ } }, { - "accuracy": 0.7482556041918302, + "accuracy": 0.7475717946102745, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -14949,7 +14949,7 @@ } }, { - "accuracy": 0.7649227192527369, + "accuracy": 0.764268348091527, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -14998,7 +14998,7 @@ } }, { - "accuracy": 0.8421635753230045, + "accuracy": 0.8417501951518812, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -15050,7 +15050,7 @@ } }, { - "accuracy": 0.8542898203197279, + "accuracy": 0.8539358816648784, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -15102,7 +15102,7 @@ } }, { - "accuracy": 0.8781870791786596, + "accuracy": 0.8778771099291349, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -15151,7 +15151,7 @@ } }, { - "accuracy": 0.919031676493193, + "accuracy": 0.9188188753629986, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -15194,7 +15194,7 @@ } }, { - "accuracy": 0.926277010064376, + "accuracy": 0.9260690525958413, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -15237,7 +15237,7 @@ } }, { - "accuracy": 0.9200108866942556, + "accuracy": 0.9198071705667596, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -15289,7 +15289,7 @@ } }, { - "accuracy": 0.9297922535946495, + "accuracy": 0.9296059984909861, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -15341,7 +15341,7 @@ } }, { - "accuracy": 0.9593842876584906, + "accuracy": 0.9592811747601158, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -15393,7 +15393,7 @@ } }, { - "accuracy": 0.9650809702120329, + "accuracy": 0.9649907915215743, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -15445,7 +15445,7 @@ } }, { - "accuracy": 0.9783481218312916, + "accuracy": 0.9782886803150177, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -15488,7 +15488,7 @@ } }, { - "accuracy": 0.9793106741026828, + "accuracy": 0.9792689571255132, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -15537,7 +15537,7 @@ } }, { - "accuracy": 0.9841450656715193, + "accuracy": 0.9841038271000511, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -15583,7 +15583,7 @@ } }, { - "accuracy": 0.9943224070103545, + "accuracy": 0.9943109633106935, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -15627,7 +15627,7 @@ "model.layers.8.parallel_decoder": { "attn": [ { - "accuracy": 0.7229892078198885, + "accuracy": 0.7178720173082853, "total_bits": 320757760, "q_proj": { "group_size": { @@ -15691,7 +15691,7 @@ } }, { - "accuracy": 0.7361911221554405, + "accuracy": 0.7325401808086194, "total_bits": 329080832, "q_proj": { "group_size": { @@ -15755,7 +15755,7 @@ } }, { - "accuracy": 0.7804997343766062, + "accuracy": 0.7790354929472271, "total_bits": 336024576, "q_proj": { "group_size": { @@ -15819,7 +15819,7 @@ } }, { - "accuracy": 0.8412762315649736, + "accuracy": 0.840303759825857, "total_bits": 401557504, "q_proj": { "group_size": { @@ -15883,7 +15883,7 @@ } }, { - "accuracy": 0.8637462666160182, + "accuracy": 0.8627012403387773, "total_bits": 475279360, "q_proj": { "group_size": { @@ -15947,7 +15947,7 @@ } }, { - "accuracy": 0.8686705639487818, + "accuracy": 0.8681517651206568, "total_bits": 475479040, "q_proj": { "group_size": { @@ -16011,7 +16011,7 @@ } }, { - "accuracy": 0.9091550801929674, + "accuracy": 0.9081941215615523, "total_bits": 609759232, "q_proj": { "group_size": { @@ -16063,7 +16063,7 @@ } }, { - "accuracy": 0.9155886235990023, + "accuracy": 0.915100423913253, "total_bits": 610024448, "q_proj": { "group_size": { @@ -16115,7 +16115,7 @@ } }, { - "accuracy": 0.9199970270458021, + "accuracy": 0.9197991270768016, "total_bits": 615020544, "q_proj": { "group_size": { @@ -16167,7 +16167,7 @@ } }, { - "accuracy": 0.9233627068369012, + "accuracy": 0.9230885756643195, "total_bits": 623951872, "q_proj": { "group_size": { @@ -16219,7 +16219,7 @@ } }, { - "accuracy": 0.9329912223313984, + "accuracy": 0.9325782123364901, "total_bits": 626473984, "q_proj": { "group_size": { @@ -16283,7 +16283,7 @@ } }, { - "accuracy": 0.937253619495191, + "accuracy": 0.9369118652845684, "total_bits": 630355968, "q_proj": { "group_size": { @@ -16347,7 +16347,7 @@ } }, { - "accuracy": 0.9414079440267462, + "accuracy": 0.9410765171051025, "total_bits": 637362176, "q_proj": { "group_size": { @@ -16408,7 +16408,7 @@ } }, { - "accuracy": 0.9454405935187089, + "accuracy": 0.9450675751033583, "total_bits": 646823936, "q_proj": { "group_size": { @@ -16469,7 +16469,7 @@ } }, { - "accuracy": 0.967809608108119, + "accuracy": 0.967576337488074, "total_bits": 784740352, "q_proj": { "group_size": { @@ -16530,7 +16530,7 @@ } }, { - "accuracy": 0.9729384654446652, + "accuracy": 0.972736026111402, "total_bits": 797818880, "q_proj": { "group_size": { @@ -16591,7 +16591,7 @@ } }, { - "accuracy": 0.9761649730958437, + "accuracy": 0.9759950857413443, "total_bits": 911749120, "q_proj": { "group_size": { @@ -16643,7 +16643,7 @@ } }, { - "accuracy": 0.98737722556842, + "accuracy": 0.9872688556972303, "total_bits": 942718976, "q_proj": { "group_size": { @@ -16695,7 +16695,7 @@ } }, { - "accuracy": 0.9936483227892926, + "accuracy": 0.9936070591211319, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -16749,7 +16749,7 @@ ], "mlp": [ { - "accuracy": 0.6741455730639005, + "accuracy": 0.6734043171531277, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -16801,7 +16801,7 @@ } }, { - "accuracy": 0.6812077070537366, + "accuracy": 0.6803999951011257, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -16853,7 +16853,7 @@ } }, { - "accuracy": 0.7410611604389392, + "accuracy": 0.7403162153143632, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -16902,7 +16902,7 @@ } }, { - "accuracy": 0.7586897799843236, + "accuracy": 0.7579722655446905, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -16951,7 +16951,7 @@ } }, { - "accuracy": 0.8361682013461464, + "accuracy": 0.835775049109208, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -17003,7 +17003,7 @@ } }, { - "accuracy": 0.8490070543791118, + "accuracy": 0.8486005507017437, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -17055,7 +17055,7 @@ } }, { - "accuracy": 0.8742049242320814, + "accuracy": 0.8738488272616738, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -17104,7 +17104,7 @@ } }, { - "accuracy": 0.9156801700592041, + "accuracy": 0.9154892908899408, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -17147,7 +17147,7 @@ } }, { - "accuracy": 0.923499439892016, + "accuracy": 0.9232889539317081, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -17190,7 +17190,7 @@ } }, { - "accuracy": 0.9168610823781866, + "accuracy": 0.9166822935405531, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -17242,7 +17242,7 @@ } }, { - "accuracy": 0.9271265582034462, + "accuracy": 0.9269414951926783, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -17294,7 +17294,7 @@ } }, { - "accuracy": 0.9577250512022721, + "accuracy": 0.957649428593485, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -17346,7 +17346,7 @@ } }, { - "accuracy": 0.9637361294344852, + "accuracy": 0.963640222423955, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -17398,7 +17398,7 @@ } }, { - "accuracy": 0.9774517667920966, + "accuracy": 0.977402936471136, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -17441,7 +17441,7 @@ } }, { - "accuracy": 0.9784741150705438, + "accuracy": 0.9784469651548486, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -17490,7 +17490,7 @@ } }, { - "accuracy": 0.983677183326922, + "accuracy": 0.9836429577124747, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -17536,7 +17536,7 @@ } }, { - "accuracy": 0.9940968936211184, + "accuracy": 0.994084537813538, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -17580,7 +17580,7 @@ "model.layers.9.parallel_decoder": { "attn": [ { - "accuracy": 0.7229996229472913, + "accuracy": 0.7222779926500822, "total_bits": 320757760, "q_proj": { "group_size": { @@ -17644,7 +17644,7 @@ } }, { - "accuracy": 0.7360302774529708, + "accuracy": 0.7357871406956723, "total_bits": 329080832, "q_proj": { "group_size": { @@ -17708,7 +17708,7 @@ } }, { - "accuracy": 0.7739863395690918, + "accuracy": 0.7739204607511821, "total_bits": 336024576, "q_proj": { "group_size": { @@ -17772,7 +17772,7 @@ } }, { - "accuracy": 0.8382024639531186, + "accuracy": 0.838586079446893, "total_bits": 401557504, "q_proj": { "group_size": { @@ -17836,7 +17836,7 @@ } }, { - "accuracy": 0.8668210631922671, + "accuracy": 0.8666386604309082, "total_bits": 475279360, "q_proj": { "group_size": { @@ -17900,7 +17900,7 @@ } }, { - "accuracy": 0.8683956547787315, + "accuracy": 0.8684832798807245, "total_bits": 475479040, "q_proj": { "group_size": { @@ -17964,7 +17964,7 @@ } }, { - "accuracy": 0.9173774970205206, + "accuracy": 0.9173508192363539, "total_bits": 609759232, "q_proj": { "group_size": { @@ -18016,7 +18016,7 @@ } }, { - "accuracy": 0.919261844534623, + "accuracy": 0.9192371807600322, "total_bits": 610024448, "q_proj": { "group_size": { @@ -18068,7 +18068,7 @@ } }, { - "accuracy": 0.9246192354904978, + "accuracy": 0.9246824038656134, "total_bits": 615020544, "q_proj": { "group_size": { @@ -18120,7 +18120,7 @@ } }, { - "accuracy": 0.9281639174411171, + "accuracy": 0.9283511575899626, "total_bits": 623951872, "q_proj": { "group_size": { @@ -18172,7 +18172,7 @@ } }, { - "accuracy": 0.93344877895556, + "accuracy": 0.9334932753914281, "total_bits": 626473984, "q_proj": { "group_size": { @@ -18236,7 +18236,7 @@ } }, { - "accuracy": 0.9376044775310316, + "accuracy": 0.9375480727145546, "total_bits": 630355968, "q_proj": { "group_size": { @@ -18300,7 +18300,7 @@ } }, { - "accuracy": 0.9424656190370259, + "accuracy": 0.9424798927809063, "total_bits": 637362176, "q_proj": { "group_size": { @@ -18361,7 +18361,7 @@ } }, { - "accuracy": 0.946555589374743, + "accuracy": 0.9464302376696938, "total_bits": 646823936, "q_proj": { "group_size": { @@ -18422,7 +18422,7 @@ } }, { - "accuracy": 0.9688654353744105, + "accuracy": 0.9689148664474487, "total_bits": 784740352, "q_proj": { "group_size": { @@ -18483,7 +18483,7 @@ } }, { - "accuracy": 0.9734061266246595, + "accuracy": 0.9733534831749765, "total_bits": 797818880, "q_proj": { "group_size": { @@ -18544,7 +18544,7 @@ } }, { - "accuracy": 0.9786784899862189, + "accuracy": 0.9788225622553575, "total_bits": 911749120, "q_proj": { "group_size": { @@ -18596,7 +18596,7 @@ } }, { - "accuracy": 0.9863154809725913, + "accuracy": 0.9862490751241383, "total_bits": 942718976, "q_proj": { "group_size": { @@ -18648,7 +18648,7 @@ } }, { - "accuracy": 0.9945238373781505, + "accuracy": 0.994455046559635, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -18702,7 +18702,7 @@ ], "mlp": [ { - "accuracy": 0.6649294401469983, + "accuracy": 0.6625137831035413, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -18754,7 +18754,7 @@ } }, { - "accuracy": 0.6722078323364258, + "accuracy": 0.6699484273007041, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -18806,7 +18806,7 @@ } }, { - "accuracy": 0.7308139048124614, + "accuracy": 0.72889706962987, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -18855,7 +18855,7 @@ } }, { - "accuracy": 0.7478083560341282, + "accuracy": 0.7460091992428428, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -18904,7 +18904,7 @@ } }, { - "accuracy": 0.8308923495443243, + "accuracy": 0.8296701406177721, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -18956,7 +18956,7 @@ } }, { - "accuracy": 0.8440813892766049, + "accuracy": 0.8429561665183619, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -19008,7 +19008,7 @@ } }, { - "accuracy": 0.8685500872762579, + "accuracy": 0.8675749050943475, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -19057,7 +19057,7 @@ } }, { - "accuracy": 0.9130764070310091, + "accuracy": 0.9124365417580855, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -19100,7 +19100,7 @@ } }, { - "accuracy": 0.9208055232700548, + "accuracy": 0.9201928188926295, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -19143,7 +19143,7 @@ } }, { - "accuracy": 0.9141742242010016, + "accuracy": 0.9135454140211406, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -19195,7 +19195,7 @@ } }, { - "accuracy": 0.9246984971197028, + "accuracy": 0.92414433077762, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -19247,7 +19247,7 @@ } }, { - "accuracy": 0.9564068066446405, + "accuracy": 0.9560979698833666, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -19299,7 +19299,7 @@ } }, { - "accuracy": 0.9625383019447327, + "accuracy": 0.9622555657436973, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -19351,7 +19351,7 @@ } }, { - "accuracy": 0.9767056264375386, + "accuracy": 0.9765310726667705, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -19394,7 +19394,7 @@ } }, { - "accuracy": 0.977818854545292, + "accuracy": 0.9776657333499507, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -19443,7 +19443,7 @@ } }, { - "accuracy": 0.9827762120648434, + "accuracy": 0.9826436199639973, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -19489,7 +19489,7 @@ } }, { - "accuracy": 0.9939040482828492, + "accuracy": 0.9938579429137079, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -19533,7 +19533,7 @@ "model.layers.10.parallel_decoder": { "attn": [ { - "accuracy": 0.7135610580444336, + "accuracy": 0.7131323061491314, "total_bits": 320757760, "q_proj": { "group_size": { @@ -19597,7 +19597,7 @@ } }, { - "accuracy": 0.7237266741300884, + "accuracy": 0.7240671860544305, "total_bits": 329080832, "q_proj": { "group_size": { @@ -19661,7 +19661,7 @@ } }, { - "accuracy": 0.7587997787877133, + "accuracy": 0.7587884350826866, "total_bits": 336024576, "q_proj": { "group_size": { @@ -19725,7 +19725,7 @@ } }, { - "accuracy": 0.8273733289618241, + "accuracy": 0.8271311333304957, "total_bits": 401557504, "q_proj": { "group_size": { @@ -19789,7 +19789,7 @@ } }, { - "accuracy": 0.8577963176526522, + "accuracy": 0.8578661868446752, "total_bits": 475279360, "q_proj": { "group_size": { @@ -19853,7 +19853,7 @@ } }, { - "accuracy": 0.85966914578488, + "accuracy": 0.8598044169576544, "total_bits": 475479040, "q_proj": { "group_size": { @@ -19917,7 +19917,7 @@ } }, { - "accuracy": 0.9124309891148618, + "accuracy": 0.9123753811183729, "total_bits": 609759232, "q_proj": { "group_size": { @@ -19969,7 +19969,7 @@ } }, { - "accuracy": 0.9152478983527735, + "accuracy": 0.9150966907802381, "total_bits": 610024448, "q_proj": { "group_size": { @@ -20021,7 +20021,7 @@ } }, { - "accuracy": 0.9209858806509721, + "accuracy": 0.9204901080382497, "total_bits": 615020544, "q_proj": { "group_size": { @@ -20073,7 +20073,7 @@ } }, { - "accuracy": 0.9246927938963238, + "accuracy": 0.924307151844627, "total_bits": 623951872, "q_proj": { "group_size": { @@ -20125,7 +20125,7 @@ } }, { - "accuracy": 0.9285727864817569, + "accuracy": 0.9288596855966669, "total_bits": 626473984, "q_proj": { "group_size": { @@ -20189,7 +20189,7 @@ } }, { - "accuracy": 0.9334276538146169, + "accuracy": 0.933487804312455, "total_bits": 630355968, "q_proj": { "group_size": { @@ -20253,7 +20253,7 @@ } }, { - "accuracy": 0.9375092355828536, + "accuracy": 0.9376008008655748, "total_bits": 637362176, "q_proj": { "group_size": { @@ -20314,7 +20314,7 @@ } }, { - "accuracy": 0.9423441322226274, + "accuracy": 0.9424179353212055, "total_bits": 646823936, "q_proj": { "group_size": { @@ -20375,7 +20375,7 @@ } }, { - "accuracy": 0.9660672482691313, + "accuracy": 0.9660602306064806, "total_bits": 784740352, "q_proj": { "group_size": { @@ -20436,7 +20436,7 @@ } }, { - "accuracy": 0.9709239382492868, + "accuracy": 0.9710563044799002, "total_bits": 797818880, "q_proj": { "group_size": { @@ -20497,7 +20497,7 @@ } }, { - "accuracy": 0.9767157890294728, + "accuracy": 0.9767474030193529, "total_bits": 911749120, "q_proj": { "group_size": { @@ -20549,7 +20549,7 @@ } }, { - "accuracy": 0.9858045091754511, + "accuracy": 0.9858621829434445, "total_bits": 942718976, "q_proj": { "group_size": { @@ -20601,7 +20601,7 @@ } }, { - "accuracy": 0.9939995392372734, + "accuracy": 0.9939952578983808, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -20655,7 +20655,7 @@ ], "mlp": [ { - "accuracy": 0.6493196738393683, + "accuracy": 0.6480170300132351, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -20707,7 +20707,7 @@ } }, { - "accuracy": 0.6572474680448833, + "accuracy": 0.6560789911370528, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -20759,7 +20759,7 @@ } }, { - "accuracy": 0.7167663323251825, + "accuracy": 0.7157074526736611, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -20808,7 +20808,7 @@ } }, { - "accuracy": 0.7337377949764854, + "accuracy": 0.7326942745007967, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -20857,7 +20857,7 @@ } }, { - "accuracy": 0.8237701340725547, + "accuracy": 0.8232135772705078, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -20909,7 +20909,7 @@ } }, { - "accuracy": 0.8375040606448525, + "accuracy": 0.8369740561435097, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -20961,7 +20961,7 @@ } }, { - "accuracy": 0.8619221260673121, + "accuracy": 0.861406263552214, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -21010,7 +21010,7 @@ } }, { - "accuracy": 0.9099128434532567, + "accuracy": 0.9095792833127474, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -21053,7 +21053,7 @@ } }, { - "accuracy": 0.9176687880566246, + "accuracy": 0.9173780679702759, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -21096,7 +21096,7 @@ } }, { - "accuracy": 0.9106724262237549, + "accuracy": 0.9103588116796393, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -21148,7 +21148,7 @@ } }, { - "accuracy": 0.9216151237487793, + "accuracy": 0.921336148914538, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -21200,7 +21200,7 @@ } }, { - "accuracy": 0.9546481653263694, + "accuracy": 0.9544960323132967, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -21252,7 +21252,7 @@ } }, { - "accuracy": 0.9610045081690738, + "accuracy": 0.9608698204944008, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -21304,7 +21304,7 @@ } }, { - "accuracy": 0.9758103207538003, + "accuracy": 0.9757211271085238, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -21347,7 +21347,7 @@ } }, { - "accuracy": 0.9768906078840557, + "accuracy": 0.9768041278186598, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -21396,7 +21396,7 @@ } }, { - "accuracy": 0.9817496757758292, + "accuracy": 0.9816816229569284, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -21442,7 +21442,7 @@ } }, { - "accuracy": 0.9935998998974499, + "accuracy": 0.9935754306222263, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -21486,7 +21486,7 @@ "model.layers.11.parallel_decoder": { "attn": [ { - "accuracy": 0.6922715086685984, + "accuracy": 0.6893722885533383, "total_bits": 320757760, "q_proj": { "group_size": { @@ -21550,7 +21550,7 @@ } }, { - "accuracy": 0.7062296114469829, + "accuracy": 0.7035648948267886, "total_bits": 329080832, "q_proj": { "group_size": { @@ -21614,7 +21614,7 @@ } }, { - "accuracy": 0.7488801353856137, + "accuracy": 0.7487627330579256, "total_bits": 336024576, "q_proj": { "group_size": { @@ -21678,7 +21678,7 @@ } }, { - "accuracy": 0.8159366406892475, + "accuracy": 0.8152556419372559, "total_bits": 401557504, "q_proj": { "group_size": { @@ -21742,7 +21742,7 @@ } }, { - "accuracy": 0.8417310338271291, + "accuracy": 0.8422885944968775, "total_bits": 475279360, "q_proj": { "group_size": { @@ -21806,7 +21806,7 @@ } }, { - "accuracy": 0.847347472843371, + "accuracy": 0.8471947720176295, "total_bits": 475479040, "q_proj": { "group_size": { @@ -21870,7 +21870,7 @@ } }, { - "accuracy": 0.8921878337860107, + "accuracy": 0.8920546079936781, "total_bits": 609759232, "q_proj": { "group_size": { @@ -21922,7 +21922,7 @@ } }, { - "accuracy": 0.8997265853379902, + "accuracy": 0.8993541127757022, "total_bits": 610024448, "q_proj": { "group_size": { @@ -21974,7 +21974,7 @@ } }, { - "accuracy": 0.9045609486730475, + "accuracy": 0.9049367465470967, "total_bits": 615020544, "q_proj": { "group_size": { @@ -22026,7 +22026,7 @@ } }, { - "accuracy": 0.9072253327620656, + "accuracy": 0.9072816873851576, "total_bits": 623951872, "q_proj": { "group_size": { @@ -22078,7 +22078,7 @@ } }, { - "accuracy": 0.9221781053041157, + "accuracy": 0.9223666693034925, "total_bits": 626473984, "q_proj": { "group_size": { @@ -22142,7 +22142,7 @@ } }, { - "accuracy": 0.926545262336731, + "accuracy": 0.9255400456880268, "total_bits": 630355968, "q_proj": { "group_size": { @@ -22206,7 +22206,7 @@ } }, { - "accuracy": 0.9303231490285773, + "accuracy": 0.9304186921370656, "total_bits": 637362176, "q_proj": { "group_size": { @@ -22267,7 +22267,7 @@ } }, { - "accuracy": 0.9359793913991827, + "accuracy": 0.9354439785605982, "total_bits": 646823936, "q_proj": { "group_size": { @@ -22328,7 +22328,7 @@ } }, { - "accuracy": 0.9607316067344264, + "accuracy": 0.9607107043266296, "total_bits": 784740352, "q_proj": { "group_size": { @@ -22389,7 +22389,7 @@ } }, { - "accuracy": 0.967981943958684, + "accuracy": 0.9677688479423523, "total_bits": 797818880, "q_proj": { "group_size": { @@ -22450,7 +22450,7 @@ } }, { - "accuracy": 0.9700206831881875, + "accuracy": 0.9699787466149581, "total_bits": 911749120, "q_proj": { "group_size": { @@ -22502,7 +22502,7 @@ } }, { - "accuracy": 0.9853722986422087, + "accuracy": 0.9850820961751436, "total_bits": 942718976, "q_proj": { "group_size": { @@ -22554,7 +22554,7 @@ } }, { - "accuracy": 0.9923194599779028, + "accuracy": 0.9922447447714052, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -22608,7 +22608,7 @@ ], "mlp": [ { - "accuracy": 0.6605804342972605, + "accuracy": 0.6606684985913729, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -22660,7 +22660,7 @@ } }, { - "accuracy": 0.6684230754249973, + "accuracy": 0.6684305040459884, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -22712,7 +22712,7 @@ } }, { - "accuracy": 0.7277434248673289, + "accuracy": 0.7277284170451916, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -22761,7 +22761,7 @@ } }, { - "accuracy": 0.7446123926263106, + "accuracy": 0.7445629019486277, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -22810,7 +22810,7 @@ } }, { - "accuracy": 0.8299119597987125, + "accuracy": 0.8299586396468313, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -22862,7 +22862,7 @@ } }, { - "accuracy": 0.8431510674326044, + "accuracy": 0.8431671042191355, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -22914,7 +22914,7 @@ } }, { - "accuracy": 0.8674780193128084, + "accuracy": 0.8674198953728927, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -22963,7 +22963,7 @@ } }, { - "accuracy": 0.9129310532620079, + "accuracy": 0.9129218866950587, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -23006,7 +23006,7 @@ } }, { - "accuracy": 0.9204476946278622, + "accuracy": 0.9204608139238859, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -23049,7 +23049,7 @@ } }, { - "accuracy": 0.9137608189331858, + "accuracy": 0.913766013948541, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -23101,7 +23101,7 @@ } }, { - "accuracy": 0.9243151702378926, + "accuracy": 0.924329262030752, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -23153,7 +23153,7 @@ } }, { - "accuracy": 0.956234473931162, + "accuracy": 0.9562352174206784, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -23205,7 +23205,7 @@ } }, { - "accuracy": 0.9623597640740245, + "accuracy": 0.9623708724975586, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -23257,7 +23257,7 @@ } }, { - "accuracy": 0.9766719388334375, + "accuracy": 0.976664778433348, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -23300,7 +23300,7 @@ } }, { - "accuracy": 0.9777011243920577, + "accuracy": 0.9776978053544697, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -23349,7 +23349,7 @@ } }, { - "accuracy": 0.982569401201449, + "accuracy": 0.9825538992881775, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -23395,7 +23395,7 @@ } }, { - "accuracy": 0.9939382978175816, + "accuracy": 0.993928930869228, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -23439,7 +23439,7 @@ "model.layers.12.parallel_decoder": { "attn": [ { - "accuracy": 0.7184821680972451, + "accuracy": 0.7172175959536904, "total_bits": 320757760, "q_proj": { "group_size": { @@ -23503,7 +23503,7 @@ } }, { - "accuracy": 0.727851340645238, + "accuracy": 0.727256323161878, "total_bits": 329080832, "q_proj": { "group_size": { @@ -23567,7 +23567,7 @@ } }, { - "accuracy": 0.7613092221711811, + "accuracy": 0.7612814903259277, "total_bits": 336024576, "q_proj": { "group_size": { @@ -23631,7 +23631,7 @@ } }, { - "accuracy": 0.8310658429798327, + "accuracy": 0.8308232458014237, "total_bits": 401557504, "q_proj": { "group_size": { @@ -23695,7 +23695,7 @@ } }, { - "accuracy": 0.8610975742340088, + "accuracy": 0.8609854171150609, "total_bits": 475279360, "q_proj": { "group_size": { @@ -23759,7 +23759,7 @@ } }, { - "accuracy": 0.863173246383667, + "accuracy": 0.8630021873273348, "total_bits": 475479040, "q_proj": { "group_size": { @@ -23823,7 +23823,7 @@ } }, { - "accuracy": 0.9212835086019415, + "accuracy": 0.9209087334181133, "total_bits": 609759232, "q_proj": { "group_size": { @@ -23875,7 +23875,7 @@ } }, { - "accuracy": 0.9236211651249936, + "accuracy": 0.9230942349684865, "total_bits": 610024448, "q_proj": { "group_size": { @@ -23927,7 +23927,7 @@ } }, { - "accuracy": 0.9277842985956293, + "accuracy": 0.9281583836204127, "total_bits": 615020544, "q_proj": { "group_size": { @@ -23979,7 +23979,7 @@ } }, { - "accuracy": 0.931805460076583, + "accuracy": 0.9316993387121904, "total_bits": 623951872, "q_proj": { "group_size": { @@ -24031,7 +24031,7 @@ } }, { - "accuracy": 0.9305801328859831, + "accuracy": 0.9306499456104479, "total_bits": 626473984, "q_proj": { "group_size": { @@ -24095,7 +24095,7 @@ } }, { - "accuracy": 0.9347809302179437, + "accuracy": 0.9348810848436857, "total_bits": 630355968, "q_proj": { "group_size": { @@ -24159,7 +24159,7 @@ } }, { - "accuracy": 0.9407110402458593, + "accuracy": 0.9406138595781828, "total_bits": 637362176, "q_proj": { "group_size": { @@ -24220,7 +24220,7 @@ } }, { - "accuracy": 0.9450614264136866, + "accuracy": 0.9451195127085635, "total_bits": 646823936, "q_proj": { "group_size": { @@ -24281,7 +24281,7 @@ } }, { - "accuracy": 0.9678486899325722, + "accuracy": 0.9678191291658502, "total_bits": 784740352, "q_proj": { "group_size": { @@ -24342,7 +24342,7 @@ } }, { - "accuracy": 0.9726081804225319, + "accuracy": 0.9726740090470565, "total_bits": 797818880, "q_proj": { "group_size": { @@ -24403,7 +24403,7 @@ } }, { - "accuracy": 0.9795026936029133, + "accuracy": 0.9794598880567049, "total_bits": 911749120, "q_proj": { "group_size": { @@ -24455,7 +24455,7 @@ } }, { - "accuracy": 0.9860845663045582, + "accuracy": 0.9860770388653404, "total_bits": 942718976, "q_proj": { "group_size": { @@ -24507,7 +24507,7 @@ } }, { - "accuracy": 0.9946372854082208, + "accuracy": 0.9946090700595003, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -24561,7 +24561,7 @@ ], "mlp": [ { - "accuracy": 0.6548186101411518, + "accuracy": 0.6545674926356265, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -24613,7 +24613,7 @@ } }, { - "accuracy": 0.6626467202839099, + "accuracy": 0.6624967173526162, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -24665,7 +24665,7 @@ } }, { - "accuracy": 0.7221582312332957, + "accuracy": 0.7218500689456337, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -24714,7 +24714,7 @@ } }, { - "accuracy": 0.7391289911772075, + "accuracy": 0.7388267768056769, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -24763,7 +24763,7 @@ } }, { - "accuracy": 0.8259853187360262, + "accuracy": 0.8258890101784154, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -24815,7 +24815,7 @@ } }, { - "accuracy": 0.8397457976090281, + "accuracy": 0.8396071760277999, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -24867,7 +24867,7 @@ } }, { - "accuracy": 0.8640775429575067, + "accuracy": 0.8639077889291864, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -24916,7 +24916,7 @@ } }, { - "accuracy": 0.9105970608560663, + "accuracy": 0.9105379079517565, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -24959,7 +24959,7 @@ } }, { - "accuracy": 0.9184078354584544, + "accuracy": 0.9183379097988731, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -25002,7 +25002,7 @@ } }, { - "accuracy": 0.9116419804723639, + "accuracy": 0.9115994415785137, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -25054,7 +25054,7 @@ } }, { - "accuracy": 0.9225402505774247, + "accuracy": 0.9224674576207211, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -25106,7 +25106,7 @@ } }, { - "accuracy": 0.955106606608943, + "accuracy": 0.9550855379355581, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -25158,7 +25158,7 @@ } }, { - "accuracy": 0.9614455919516713, + "accuracy": 0.9614145724396956, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -25210,7 +25210,7 @@ } }, { - "accuracy": 0.9759701098266401, + "accuracy": 0.9759513124039298, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -25253,7 +25253,7 @@ } }, { - "accuracy": 0.9771270751953125, + "accuracy": 0.9771081946398082, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -25302,7 +25302,7 @@ } }, { - "accuracy": 0.9820411644483867, + "accuracy": 0.9820088179487931, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -25348,7 +25348,7 @@ } }, { - "accuracy": 0.9937028747640158, + "accuracy": 0.9936935160505144, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -25392,7 +25392,7 @@ "model.layers.13.parallel_decoder": { "attn": [ { - "accuracy": 0.7142993525454873, + "accuracy": 0.7158812221727873, "total_bits": 320757760, "q_proj": { "group_size": { @@ -25456,7 +25456,7 @@ } }, { - "accuracy": 0.7320822163632041, + "accuracy": 0.7319947543897127, "total_bits": 329080832, "q_proj": { "group_size": { @@ -25520,7 +25520,7 @@ } }, { - "accuracy": 0.7705243010269969, + "accuracy": 0.7702314979151675, "total_bits": 336024576, "q_proj": { "group_size": { @@ -25584,7 +25584,7 @@ } }, { - "accuracy": 0.833736030679, + "accuracy": 0.8328599302392257, "total_bits": 401557504, "q_proj": { "group_size": { @@ -25648,7 +25648,7 @@ } }, { - "accuracy": 0.8642075438248484, + "accuracy": 0.8640968548624139, "total_bits": 475279360, "q_proj": { "group_size": { @@ -25712,7 +25712,7 @@ } }, { - "accuracy": 0.866164998004311, + "accuracy": 0.865727612846776, "total_bits": 475479040, "q_proj": { "group_size": { @@ -25776,7 +25776,7 @@ } }, { - "accuracy": 0.9211538088949103, + "accuracy": 0.9206512664493761, "total_bits": 609759232, "q_proj": { "group_size": { @@ -25828,7 +25828,7 @@ } }, { - "accuracy": 0.9231115830572028, + "accuracy": 0.9225078946665713, "total_bits": 610024448, "q_proj": { "group_size": { @@ -25880,7 +25880,7 @@ } }, { - "accuracy": 0.9284255065416035, + "accuracy": 0.9277529653749967, "total_bits": 615020544, "q_proj": { "group_size": { @@ -25932,7 +25932,7 @@ } }, { - "accuracy": 0.9318438078227796, + "accuracy": 0.9315539410239772, "total_bits": 623951872, "q_proj": { "group_size": { @@ -25984,7 +25984,7 @@ } }, { - "accuracy": 0.9320506924077084, + "accuracy": 0.9319805785229331, "total_bits": 626473984, "q_proj": { "group_size": { @@ -26048,7 +26048,7 @@ } }, { - "accuracy": 0.9361781572040758, + "accuracy": 0.9363740055184615, "total_bits": 630355968, "q_proj": { "group_size": { @@ -26112,7 +26112,7 @@ } }, { - "accuracy": 0.9419957399368286, + "accuracy": 0.9417399544464915, "total_bits": 637362176, "q_proj": { "group_size": { @@ -26173,7 +26173,7 @@ } }, { - "accuracy": 0.946492032000893, + "accuracy": 0.9466741837953266, "total_bits": 646823936, "q_proj": { "group_size": { @@ -26234,7 +26234,7 @@ } }, { - "accuracy": 0.9687038879645498, + "accuracy": 0.9685818364745692, "total_bits": 784740352, "q_proj": { "group_size": { @@ -26295,7 +26295,7 @@ } }, { - "accuracy": 0.9733376628474185, + "accuracy": 0.973180375601116, "total_bits": 797818880, "q_proj": { "group_size": { @@ -26356,7 +26356,7 @@ } }, { - "accuracy": 0.9795991386237898, + "accuracy": 0.9793900455299177, "total_bits": 911749120, "q_proj": { "group_size": { @@ -26408,7 +26408,7 @@ } }, { - "accuracy": 0.9865219295024872, + "accuracy": 0.986452474405891, "total_bits": 942718976, "q_proj": { "group_size": { @@ -26460,7 +26460,7 @@ } }, { - "accuracy": 0.994633580508985, + "accuracy": 0.9946639286844354, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -26514,7 +26514,7 @@ ], "mlp": [ { - "accuracy": 0.6693349135549445, + "accuracy": 0.6693553422626697, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -26566,7 +26566,7 @@ } }, { - "accuracy": 0.6772214738946212, + "accuracy": 0.6774971610621402, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -26618,7 +26618,7 @@ } }, { - "accuracy": 0.7359164137589305, + "accuracy": 0.736047669460899, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -26667,7 +26667,7 @@ } }, { - "accuracy": 0.7525643298500463, + "accuracy": 0.7526889349284925, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -26716,7 +26716,7 @@ } }, { - "accuracy": 0.8345015425431102, + "accuracy": 0.8344542101809853, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -26768,7 +26768,7 @@ } }, { - "accuracy": 0.8476507538243344, + "accuracy": 0.8476007988578395, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -26820,7 +26820,7 @@ } }, { - "accuracy": 0.8715789067117792, + "accuracy": 0.8714566732707777, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -26869,7 +26869,7 @@ } }, { - "accuracy": 0.9149250356774581, + "accuracy": 0.9148671627044678, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -26912,7 +26912,7 @@ } }, { - "accuracy": 0.9224488170523393, + "accuracy": 0.9224277609273007, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -26955,7 +26955,7 @@ } }, { - "accuracy": 0.9159371350940905, + "accuracy": 0.9159105200516551, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -27007,7 +27007,7 @@ } }, { - "accuracy": 0.9263843486183568, + "accuracy": 0.9263543392482557, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -27059,7 +27059,7 @@ } }, { - "accuracy": 0.9572272488945409, + "accuracy": 0.9572146032985888, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -27111,7 +27111,7 @@ } }, { - "accuracy": 0.9632891918483534, + "accuracy": 0.9632892890980369, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -27163,7 +27163,7 @@ } }, { - "accuracy": 0.9770109794641796, + "accuracy": 0.9770000827939886, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -27206,7 +27206,7 @@ } }, { - "accuracy": 0.9781589523742074, + "accuracy": 0.9781588143423984, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -27255,7 +27255,7 @@ } }, { - "accuracy": 0.9830474430008939, + "accuracy": 0.9830229109839389, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -27301,7 +27301,7 @@ } }, { - "accuracy": 0.9937480711623242, + "accuracy": 0.9937165363838798, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -27345,7 +27345,7 @@ "model.layers.14.parallel_decoder": { "attn": [ { - "accuracy": 0.6397197623001902, + "accuracy": 0.6399264837566174, "total_bits": 320757760, "q_proj": { "group_size": { @@ -27409,7 +27409,7 @@ } }, { - "accuracy": 0.65821835869237, + "accuracy": 0.6440424417194568, "total_bits": 329080832, "q_proj": { "group_size": { @@ -27473,7 +27473,7 @@ } }, { - "accuracy": 0.713675247995477, + "accuracy": 0.7136448559008146, "total_bits": 336024576, "q_proj": { "group_size": { @@ -27537,7 +27537,7 @@ } }, { - "accuracy": 0.7872930576926783, + "accuracy": 0.7870068048176012, "total_bits": 401557504, "q_proj": { "group_size": { @@ -27601,7 +27601,7 @@ } }, { - "accuracy": 0.8226040162538227, + "accuracy": 0.8213540629336709, "total_bits": 475279360, "q_proj": { "group_size": { @@ -27665,7 +27665,7 @@ } }, { - "accuracy": 0.8265840003364965, + "accuracy": 0.8266264262952303, "total_bits": 475479040, "q_proj": { "group_size": { @@ -27729,7 +27729,7 @@ } }, { - "accuracy": 0.8976886774364271, + "accuracy": 0.8984928570295635, "total_bits": 609759232, "q_proj": { "group_size": { @@ -27781,7 +27781,7 @@ } }, { - "accuracy": 0.901886174553319, + "accuracy": 0.9027060584018105, "total_bits": 610024448, "q_proj": { "group_size": { @@ -27833,7 +27833,7 @@ } }, { - "accuracy": 0.9098180030521593, + "accuracy": 0.9105992505424901, "total_bits": 615020544, "q_proj": { "group_size": { @@ -27885,7 +27885,7 @@ } }, { - "accuracy": 0.9144699824483771, + "accuracy": 0.9152552893287257, "total_bits": 623951872, "q_proj": { "group_size": { @@ -27937,7 +27937,7 @@ } }, { - "accuracy": 0.9120629523929796, + "accuracy": 0.9117947063947979, "total_bits": 626473984, "q_proj": { "group_size": { @@ -28001,7 +28001,7 @@ } }, { - "accuracy": 0.9168857461527774, + "accuracy": 0.9176255841004222, "total_bits": 630355968, "q_proj": { "group_size": { @@ -28065,7 +28065,7 @@ } }, { - "accuracy": 0.927789474788465, + "accuracy": 0.9276108929985448, "total_bits": 637362176, "q_proj": { "group_size": { @@ -28126,7 +28126,7 @@ } }, { - "accuracy": 0.9335401999322992, + "accuracy": 0.9337695837020874, "total_bits": 646823936, "q_proj": { "group_size": { @@ -28187,7 +28187,7 @@ } }, { - "accuracy": 0.9605362823134974, + "accuracy": 0.9604464988959462, "total_bits": 784740352, "q_proj": { "group_size": { @@ -28248,7 +28248,7 @@ } }, { - "accuracy": 0.9667365268657082, + "accuracy": 0.9668363050410622, "total_bits": 797818880, "q_proj": { "group_size": { @@ -28309,7 +28309,7 @@ } }, { - "accuracy": 0.9736072672040839, + "accuracy": 0.9735119060466164, "total_bits": 911749120, "q_proj": { "group_size": { @@ -28361,7 +28361,7 @@ } }, { - "accuracy": 0.9836675226688385, + "accuracy": 0.9835700831915203, "total_bits": 942718976, "q_proj": { "group_size": { @@ -28413,7 +28413,7 @@ } }, { - "accuracy": 0.9932071990088412, + "accuracy": 0.993139710081251, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -28467,7 +28467,7 @@ ], "mlp": [ { - "accuracy": 0.673156788474635, + "accuracy": 0.6729341808118319, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -28519,7 +28519,7 @@ } }, { - "accuracy": 0.6808419729533948, + "accuracy": 0.6805991875497919, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -28571,7 +28571,7 @@ } }, { - "accuracy": 0.7371211553874768, + "accuracy": 0.7367724117479826, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -28620,7 +28620,7 @@ } }, { - "accuracy": 0.7529672070553428, + "accuracy": 0.7525728125321238, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -28669,7 +28669,7 @@ } }, { - "accuracy": 0.8359082121598094, + "accuracy": 0.8358580187747353, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -28721,7 +28721,7 @@ } }, { - "accuracy": 0.8489597345653334, + "accuracy": 0.8488496102784809, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -28773,7 +28773,7 @@ } }, { - "accuracy": 0.8717456616853413, + "accuracy": 0.8715824453454268, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -28822,7 +28822,7 @@ } }, { - "accuracy": 0.9156707211544639, + "accuracy": 0.915613663823981, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -28865,7 +28865,7 @@ } }, { - "accuracy": 0.9230663650914243, + "accuracy": 0.9230076827500996, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -28908,7 +28908,7 @@ } }, { - "accuracy": 0.9165512135154322, + "accuracy": 0.916508160139385, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -28960,7 +28960,7 @@ } }, { - "accuracy": 0.9269398137142784, + "accuracy": 0.9269058327925832, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -29012,7 +29012,7 @@ } }, { - "accuracy": 0.9575041407033017, + "accuracy": 0.9574926056359944, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -29064,7 +29064,7 @@ } }, { - "accuracy": 0.9636012598087913, + "accuracy": 0.9635774022654483, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -29116,7 +29116,7 @@ } }, { - "accuracy": 0.9772207815396158, + "accuracy": 0.977208684933813, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -29159,7 +29159,7 @@ } }, { - "accuracy": 0.9783055123529936, + "accuracy": 0.9782976947332683, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -29208,7 +29208,7 @@ } }, { - "accuracy": 0.9829520253758681, + "accuracy": 0.982925650320555, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -29254,7 +29254,7 @@ } }, { - "accuracy": 0.9940131887009269, + "accuracy": 0.9940083215111181, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -29298,7 +29298,7 @@ "model.layers.15.parallel_decoder": { "attn": [ { - "accuracy": 0.705482608393619, + "accuracy": 0.7033411327161287, "total_bits": 320757760, "q_proj": { "group_size": { @@ -29362,7 +29362,7 @@ } }, { - "accuracy": 0.7141868942662289, + "accuracy": 0.7131129315024929, "total_bits": 329080832, "q_proj": { "group_size": { @@ -29426,7 +29426,7 @@ } }, { - "accuracy": 0.741490966395328, + "accuracy": 0.7401880966989618, "total_bits": 336024576, "q_proj": { "group_size": { @@ -29490,7 +29490,7 @@ } }, { - "accuracy": 0.8072676533146909, + "accuracy": 0.8053387466229891, "total_bits": 401557504, "q_proj": { "group_size": { @@ -29554,7 +29554,7 @@ } }, { - "accuracy": 0.8537831431940982, + "accuracy": 0.8532280420002184, "total_bits": 475279360, "q_proj": { "group_size": { @@ -29618,7 +29618,7 @@ } }, { - "accuracy": 0.8549135107743113, + "accuracy": 0.8545140467191997, "total_bits": 475479040, "q_proj": { "group_size": { @@ -29682,7 +29682,7 @@ } }, { - "accuracy": 0.9136941997628463, + "accuracy": 0.9120977426830091, "total_bits": 609759232, "q_proj": { "group_size": { @@ -29734,7 +29734,7 @@ } }, { - "accuracy": 0.9144668641843294, + "accuracy": 0.9136702386956466, "total_bits": 610024448, "q_proj": { "group_size": { @@ -29786,7 +29786,7 @@ } }, { - "accuracy": 0.9218364828511288, + "accuracy": 0.9214259448804354, "total_bits": 615020544, "q_proj": { "group_size": { @@ -29838,7 +29838,7 @@ } }, { - "accuracy": 0.925444013194034, + "accuracy": 0.9247671302996183, "total_bits": 623951872, "q_proj": { "group_size": { @@ -29890,7 +29890,7 @@ } }, { - "accuracy": 0.9266263309277987, + "accuracy": 0.9266670189405742, "total_bits": 626473984, "q_proj": { "group_size": { @@ -29954,7 +29954,7 @@ } }, { - "accuracy": 0.9310619642860011, + "accuracy": 0.9307883350472701, "total_bits": 630355968, "q_proj": { "group_size": { @@ -30018,7 +30018,7 @@ } }, { - "accuracy": 0.9348437221426713, + "accuracy": 0.9345434527648122, "total_bits": 637362176, "q_proj": { "group_size": { @@ -30079,7 +30079,7 @@ } }, { - "accuracy": 0.9400932286915026, + "accuracy": 0.9398234643434223, "total_bits": 646823936, "q_proj": { "group_size": { @@ -30140,7 +30140,7 @@ } }, { - "accuracy": 0.9650373458862305, + "accuracy": 0.9649168943103991, "total_bits": 784740352, "q_proj": { "group_size": { @@ -30201,7 +30201,7 @@ } }, { - "accuracy": 0.9700008536639967, + "accuracy": 0.9698789464799982, "total_bits": 797818880, "q_proj": { "group_size": { @@ -30262,7 +30262,7 @@ } }, { - "accuracy": 0.9775449664969194, + "accuracy": 0.9773763669164557, "total_bits": 911749120, "q_proj": { "group_size": { @@ -30314,7 +30314,7 @@ } }, { - "accuracy": 0.9848179299580423, + "accuracy": 0.984746084401482, "total_bits": 942718976, "q_proj": { "group_size": { @@ -30366,7 +30366,7 @@ } }, { - "accuracy": 0.9940975080979498, + "accuracy": 0.9940395225819788, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -30420,7 +30420,7 @@ ], "mlp": [ { - "accuracy": 0.6598416378623562, + "accuracy": 0.6591107719822934, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -30472,7 +30472,7 @@ } }, { - "accuracy": 0.6677388642963611, + "accuracy": 0.6670104327954745, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -30524,7 +30524,7 @@ } }, { - "accuracy": 0.7244610786437988, + "accuracy": 0.7238421942058362, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -30573,7 +30573,7 @@ } }, { - "accuracy": 0.7404677993372867, + "accuracy": 0.7398357140390497, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -30622,7 +30622,7 @@ } }, { - "accuracy": 0.8288162632992393, + "accuracy": 0.828441720259817, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -30674,7 +30674,7 @@ } }, { - "accuracy": 0.842328899785092, + "accuracy": 0.8419880867004395, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -30726,7 +30726,7 @@ } }, { - "accuracy": 0.8653788943039744, + "accuracy": 0.865041456724468, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -30775,7 +30775,7 @@ } }, { - "accuracy": 0.9121937249836168, + "accuracy": 0.9119949780012432, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -30818,7 +30818,7 @@ } }, { - "accuracy": 0.9197252675106651, + "accuracy": 0.919530567369963, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -30861,7 +30861,7 @@ } }, { - "accuracy": 0.9131049294220774, + "accuracy": 0.9129012634879664, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -30913,7 +30913,7 @@ } }, { - "accuracy": 0.9238081668552599, + "accuracy": 0.9236239696803846, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -30965,7 +30965,7 @@ } }, { - "accuracy": 0.9558639934188441, + "accuracy": 0.9557643626865587, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -31017,7 +31017,7 @@ } }, { - "accuracy": 0.9620823828797591, + "accuracy": 0.9619896317783155, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -31069,7 +31069,7 @@ } }, { - "accuracy": 0.9763351129858118, + "accuracy": 0.9762816585992512, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -31112,7 +31112,7 @@ } }, { - "accuracy": 0.9775229849313435, + "accuracy": 0.9774633097021204, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -31161,7 +31161,7 @@ } }, { - "accuracy": 0.9821246533017409, + "accuracy": 0.9820727423617714, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -31207,7 +31207,7 @@ } }, { - "accuracy": 0.9938028023431176, + "accuracy": 0.9937854640577969, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -31251,7 +31251,7 @@ "model.layers.16.parallel_decoder": { "attn": [ { - "accuracy": 0.6812107437535336, + "accuracy": 0.6813625285499973, "total_bits": 320757760, "q_proj": { "group_size": { @@ -31315,7 +31315,7 @@ } }, { - "accuracy": 0.6920685768127441, + "accuracy": 0.691225152266653, "total_bits": 329080832, "q_proj": { "group_size": { @@ -31379,7 +31379,7 @@ } }, { - "accuracy": 0.7237493364434493, + "accuracy": 0.7229241571928325, "total_bits": 336024576, "q_proj": { "group_size": { @@ -31443,7 +31443,7 @@ } }, { - "accuracy": 0.7910938890356767, + "accuracy": 0.7897071336445055, "total_bits": 401557504, "q_proj": { "group_size": { @@ -31507,7 +31507,7 @@ } }, { - "accuracy": 0.8390941619873047, + "accuracy": 0.8391526121842234, "total_bits": 475279360, "q_proj": { "group_size": { @@ -31571,7 +31571,7 @@ } }, { - "accuracy": 0.8426663750096371, + "accuracy": 0.8420228958129883, "total_bits": 475479040, "q_proj": { "group_size": { @@ -31635,7 +31635,7 @@ } }, { - "accuracy": 0.8995276375820762, + "accuracy": 0.8993227670067235, "total_bits": 609759232, "q_proj": { "group_size": { @@ -31687,7 +31687,7 @@ } }, { - "accuracy": 0.9040891434016981, + "accuracy": 0.9033532330864354, "total_bits": 610024448, "q_proj": { "group_size": { @@ -31739,7 +31739,7 @@ } }, { - "accuracy": 0.906617798303303, + "accuracy": 0.9058660080558375, "total_bits": 615020544, "q_proj": { "group_size": { @@ -31791,7 +31791,7 @@ } }, { - "accuracy": 0.9115954135593615, + "accuracy": 0.9108278186697709, "total_bits": 623951872, "q_proj": { "group_size": { @@ -31843,7 +31843,7 @@ } }, { - "accuracy": 0.9194964170455933, + "accuracy": 0.9193674765135113, "total_bits": 626473984, "q_proj": { "group_size": { @@ -31907,7 +31907,7 @@ } }, { - "accuracy": 0.9239708247937655, + "accuracy": 0.9240980838474474, "total_bits": 630355968, "q_proj": { "group_size": { @@ -31971,7 +31971,7 @@ } }, { - "accuracy": 0.9281876902831228, + "accuracy": 0.9280301646182412, "total_bits": 637362176, "q_proj": { "group_size": { @@ -32032,7 +32032,7 @@ } }, { - "accuracy": 0.9336344129160831, + "accuracy": 0.9333124035283139, "total_bits": 646823936, "q_proj": { "group_size": { @@ -32093,7 +32093,7 @@ } }, { - "accuracy": 0.9601649422394603, + "accuracy": 0.9600165047143635, "total_bits": 784740352, "q_proj": { "group_size": { @@ -32154,7 +32154,7 @@ } }, { - "accuracy": 0.9669628425648338, + "accuracy": 0.9667730958838212, "total_bits": 797818880, "q_proj": { "group_size": { @@ -32215,7 +32215,7 @@ } }, { - "accuracy": 0.9722917613230253, + "accuracy": 0.9721199462288305, "total_bits": 911749120, "q_proj": { "group_size": { @@ -32267,7 +32267,7 @@ } }, { - "accuracy": 0.9833353491205918, + "accuracy": 0.9833327233791351, "total_bits": 942718976, "q_proj": { "group_size": { @@ -32319,7 +32319,7 @@ } }, { - "accuracy": 0.9928657459585291, + "accuracy": 0.9928394744270727, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -32373,7 +32373,7 @@ ], "mlp": [ { - "accuracy": 0.650985692676745, + "accuracy": 0.650563591404965, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -32425,7 +32425,7 @@ } }, { - "accuracy": 0.6590003967285156, + "accuracy": 0.6585138722469932, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -32477,7 +32477,7 @@ } }, { - "accuracy": 0.7181311155620373, + "accuracy": 0.7176619830884432, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -32526,7 +32526,7 @@ } }, { - "accuracy": 0.7348160492746454, + "accuracy": 0.7343634806181255, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -32575,7 +32575,7 @@ } }, { - "accuracy": 0.8240081511045757, + "accuracy": 0.8237476097910028, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -32627,7 +32627,7 @@ } }, { - "accuracy": 0.8380181036497417, + "accuracy": 0.8377682033338045, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -32679,7 +32679,7 @@ } }, { - "accuracy": 0.8622029078634161, + "accuracy": 0.8619748040249473, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -32728,7 +32728,7 @@ } }, { - "accuracy": 0.9097444634688527, + "accuracy": 0.9096106541784186, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -32771,7 +32771,7 @@ } }, { - "accuracy": 0.9176338785573056, + "accuracy": 0.917495890667564, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -32814,7 +32814,7 @@ } }, { - "accuracy": 0.9105795370905023, + "accuracy": 0.910475636783399, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -32866,7 +32866,7 @@ } }, { - "accuracy": 0.9217014500969335, + "accuracy": 0.9215708092639321, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -32918,7 +32918,7 @@ } }, { - "accuracy": 0.9545370967764604, + "accuracy": 0.9544750922604611, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -32970,7 +32970,7 @@ } }, { - "accuracy": 0.961028023769981, + "accuracy": 0.9609685132378026, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -33022,7 +33022,7 @@ } }, { - "accuracy": 0.9756850286533958, + "accuracy": 0.975653234281038, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -33065,7 +33065,7 @@ } }, { - "accuracy": 0.976821221803364, + "accuracy": 0.9768037325457523, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -33114,7 +33114,7 @@ } }, { - "accuracy": 0.9816081288613772, + "accuracy": 0.981586544137252, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -33160,7 +33160,7 @@ } }, { - "accuracy": 0.9936368332097405, + "accuracy": 0.9936331028216764, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -33204,7 +33204,7 @@ "model.layers.17.parallel_decoder": { "attn": [ { - "accuracy": 0.6915423744603207, + "accuracy": 0.6895626469662315, "total_bits": 320757760, "q_proj": { "group_size": { @@ -33268,7 +33268,7 @@ } }, { - "accuracy": 0.7030346519068668, + "accuracy": 0.6997559698004472, "total_bits": 329080832, "q_proj": { "group_size": { @@ -33332,7 +33332,7 @@ } }, { - "accuracy": 0.737207713880037, + "accuracy": 0.7346908418755782, "total_bits": 336024576, "q_proj": { "group_size": { @@ -33396,7 +33396,7 @@ } }, { - "accuracy": 0.7993671768590024, + "accuracy": 0.7961003905848453, "total_bits": 401557504, "q_proj": { "group_size": { @@ -33460,7 +33460,7 @@ } }, { - "accuracy": 0.8450607249611303, + "accuracy": 0.8448643307936818, "total_bits": 475279360, "q_proj": { "group_size": { @@ -33524,7 +33524,7 @@ } }, { - "accuracy": 0.8495764983327765, + "accuracy": 0.8488544915851793, "total_bits": 475479040, "q_proj": { "group_size": { @@ -33588,7 +33588,7 @@ } }, { - "accuracy": 0.9053878972404882, + "accuracy": 0.9043745367150557, "total_bits": 609759232, "q_proj": { "group_size": { @@ -33640,7 +33640,7 @@ } }, { - "accuracy": 0.9115699153197439, + "accuracy": 0.9104067777332506, "total_bits": 610024448, "q_proj": { "group_size": { @@ -33692,7 +33692,7 @@ } }, { - "accuracy": 0.9167618626042416, + "accuracy": 0.9157234053862722, "total_bits": 615020544, "q_proj": { "group_size": { @@ -33744,7 +33744,7 @@ } }, { - "accuracy": 0.920927461824919, + "accuracy": 0.9200927458311382, "total_bits": 623951872, "q_proj": { "group_size": { @@ -33796,7 +33796,7 @@ } }, { - "accuracy": 0.9230995115480924, + "accuracy": 0.922433426505641, "total_bits": 626473984, "q_proj": { "group_size": { @@ -33860,7 +33860,7 @@ } }, { - "accuracy": 0.9272385647422389, + "accuracy": 0.9268047056700054, "total_bits": 630355968, "q_proj": { "group_size": { @@ -33924,7 +33924,7 @@ } }, { - "accuracy": 0.9333554004368029, + "accuracy": 0.9329010122700742, "total_bits": 637362176, "q_proj": { "group_size": { @@ -33985,7 +33985,7 @@ } }, { - "accuracy": 0.9372499428297344, + "accuracy": 0.9364999595441317, "total_bits": 646823936, "q_proj": { "group_size": { @@ -34046,7 +34046,7 @@ } }, { - "accuracy": 0.9633847067230626, + "accuracy": 0.9632555905141329, "total_bits": 784740352, "q_proj": { "group_size": { @@ -34107,7 +34107,7 @@ } }, { - "accuracy": 0.9689818244231375, + "accuracy": 0.9685660286953575, "total_bits": 797818880, "q_proj": { "group_size": { @@ -34168,7 +34168,7 @@ } }, { - "accuracy": 0.9752615533376995, + "accuracy": 0.9751446780405546, "total_bits": 911749120, "q_proj": { "group_size": { @@ -34220,7 +34220,7 @@ } }, { - "accuracy": 0.9845890089085227, + "accuracy": 0.9841720963779249, "total_bits": 942718976, "q_proj": { "group_size": { @@ -34272,7 +34272,7 @@ } }, { - "accuracy": 0.9934185398252386, + "accuracy": 0.9932921003354224, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -34326,7 +34326,7 @@ ], "mlp": [ { - "accuracy": 0.6503795824552837, + "accuracy": 0.6496923346268504, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -34378,7 +34378,7 @@ } }, { - "accuracy": 0.6583763925652755, + "accuracy": 0.657753442463122, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -34430,7 +34430,7 @@ } }, { - "accuracy": 0.7198282292014675, + "accuracy": 0.719266791092722, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -34479,7 +34479,7 @@ } }, { - "accuracy": 0.7374269334893477, + "accuracy": 0.7368705147191098, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -34528,7 +34528,7 @@ } }, { - "accuracy": 0.8235160551573101, + "accuracy": 0.8231917682446932, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -34580,7 +34580,7 @@ } }, { - "accuracy": 0.837579187593962, + "accuracy": 0.8372521776902049, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -34632,7 +34632,7 @@ } }, { - "accuracy": 0.8629321550068102, + "accuracy": 0.8626409329866108, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -34681,7 +34681,7 @@ } }, { - "accuracy": 0.9091423310731587, + "accuracy": 0.9089598467475489, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -34724,7 +34724,7 @@ } }, { - "accuracy": 0.9171937766828036, + "accuracy": 0.9170197562167519, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -34767,7 +34767,7 @@ } }, { - "accuracy": 0.9103293418884277, + "accuracy": 0.9101460168236181, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -34819,7 +34819,7 @@ } }, { - "accuracy": 0.9214512423465127, + "accuracy": 0.9213095526946218, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -34871,7 +34871,7 @@ } }, { - "accuracy": 0.9544085075980738, + "accuracy": 0.9543147557660153, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -34923,7 +34923,7 @@ } }, { - "accuracy": 0.9608957265552721, + "accuracy": 0.9608172931169209, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -34975,7 +34975,7 @@ } }, { - "accuracy": 0.9755180729062933, + "accuracy": 0.9754662796070701, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -35018,7 +35018,7 @@ } }, { - "accuracy": 0.9767854731333884, + "accuracy": 0.9767233729362488, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -35067,7 +35067,7 @@ } }, { - "accuracy": 0.9818634735910516, + "accuracy": 0.9818014643694225, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -35113,7 +35113,7 @@ } }, { - "accuracy": 0.9935563038054266, + "accuracy": 0.9935184254458076, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -35157,7 +35157,7 @@ "model.layers.18.parallel_decoder": { "attn": [ { - "accuracy": 0.6683647507115414, + "accuracy": 0.6672152469032688, "total_bits": 320757760, "q_proj": { "group_size": { @@ -35221,7 +35221,7 @@ } }, { - "accuracy": 0.6797219828555459, + "accuracy": 0.678667896672299, "total_bits": 329080832, "q_proj": { "group_size": { @@ -35285,7 +35285,7 @@ } }, { - "accuracy": 0.7132848940397564, + "accuracy": 0.7120807547318309, "total_bits": 336024576, "q_proj": { "group_size": { @@ -35349,7 +35349,7 @@ } }, { - "accuracy": 0.7862938579760099, + "accuracy": 0.7849223237288625, "total_bits": 401557504, "q_proj": { "group_size": { @@ -35413,7 +35413,7 @@ } }, { - "accuracy": 0.8339835844541851, + "accuracy": 0.8332334192175614, "total_bits": 475279360, "q_proj": { "group_size": { @@ -35477,7 +35477,7 @@ } }, { - "accuracy": 0.8365791847831324, + "accuracy": 0.8361876136378238, "total_bits": 475479040, "q_proj": { "group_size": { @@ -35541,7 +35541,7 @@ } }, { - "accuracy": 0.8999757892207095, + "accuracy": 0.8990242669456884, "total_bits": 609759232, "q_proj": { "group_size": { @@ -35593,7 +35593,7 @@ } }, { - "accuracy": 0.9033704180466502, + "accuracy": 0.9028075494264302, "total_bits": 610024448, "q_proj": { "group_size": { @@ -35645,7 +35645,7 @@ } }, { - "accuracy": 0.9071416603891473, + "accuracy": 0.906380760042291, "total_bits": 615020544, "q_proj": { "group_size": { @@ -35697,7 +35697,7 @@ } }, { - "accuracy": 0.9123597082338835, + "accuracy": 0.9112884308162489, "total_bits": 623951872, "q_proj": { "group_size": { @@ -35749,7 +35749,7 @@ } }, { - "accuracy": 0.9162097039975619, + "accuracy": 0.9160984566337184, "total_bits": 626473984, "q_proj": { "group_size": { @@ -35813,7 +35813,7 @@ } }, { - "accuracy": 0.920838337195547, + "accuracy": 0.9205675689797652, "total_bits": 630355968, "q_proj": { "group_size": { @@ -35877,7 +35877,7 @@ } }, { - "accuracy": 0.926239289735493, + "accuracy": 0.9260119638944927, "total_bits": 637362176, "q_proj": { "group_size": { @@ -35938,7 +35938,7 @@ } }, { - "accuracy": 0.9321047193125674, + "accuracy": 0.9316480661693373, "total_bits": 646823936, "q_proj": { "group_size": { @@ -35999,7 +35999,7 @@ } }, { - "accuracy": 0.9597703563539606, + "accuracy": 0.959575427205939, "total_bits": 784740352, "q_proj": { "group_size": { @@ -36060,7 +36060,7 @@ } }, { - "accuracy": 0.9660682678222656, + "accuracy": 0.9659076803608945, "total_bits": 797818880, "q_proj": { "group_size": { @@ -36121,7 +36121,7 @@ } }, { - "accuracy": 0.9730859932146574, + "accuracy": 0.972741468956596, "total_bits": 911749120, "q_proj": { "group_size": { @@ -36173,7 +36173,7 @@ } }, { - "accuracy": 0.982748248075184, + "accuracy": 0.9826397707587794, "total_bits": 942718976, "q_proj": { "group_size": { @@ -36225,7 +36225,7 @@ } }, { - "accuracy": 0.9928981363773346, + "accuracy": 0.9927917875741658, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -36279,7 +36279,7 @@ ], "mlp": [ { - "accuracy": 0.6473644658138877, + "accuracy": 0.646369783501876, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -36331,7 +36331,7 @@ } }, { - "accuracy": 0.6554441953960217, + "accuracy": 0.6545653594167609, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -36383,7 +36383,7 @@ } }, { - "accuracy": 0.7154294817071212, + "accuracy": 0.7147126448781866, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -36432,7 +36432,7 @@ } }, { - "accuracy": 0.7326007391277112, + "accuracy": 0.7319277713173313, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -36481,7 +36481,7 @@ } }, { - "accuracy": 0.8217547316300242, + "accuracy": 0.8212720720391524, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -36533,7 +36533,7 @@ } }, { - "accuracy": 0.835912779757851, + "accuracy": 0.835475532632125, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -36585,7 +36585,7 @@ } }, { - "accuracy": 0.860692162262766, + "accuracy": 0.8603179078353078, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -36634,7 +36634,7 @@ } }, { - "accuracy": 0.9082047688333612, + "accuracy": 0.9079406951603136, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -36677,7 +36677,7 @@ } }, { - "accuracy": 0.9163168543263486, + "accuracy": 0.9160895786787334, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -36720,7 +36720,7 @@ } }, { - "accuracy": 0.9094898888939306, + "accuracy": 0.9092459427682977, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -36772,7 +36772,7 @@ } }, { - "accuracy": 0.9206620078337819, + "accuracy": 0.9204541256553248, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -36824,7 +36824,7 @@ } }, { - "accuracy": 0.9540194492591054, + "accuracy": 0.9538837734021639, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -36876,7 +36876,7 @@ } }, { - "accuracy": 0.96051235575425, + "accuracy": 0.9604172706604004, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -36928,7 +36928,7 @@ } }, { - "accuracy": 0.9753376527836448, + "accuracy": 0.9752722162949411, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -36971,7 +36971,7 @@ } }, { - "accuracy": 0.9765955743036772, + "accuracy": 0.9765238463878632, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -37020,7 +37020,7 @@ } }, { - "accuracy": 0.9814812713547757, + "accuracy": 0.9814255880682092, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -37066,7 +37066,7 @@ } }, { - "accuracy": 0.9935723762763174, + "accuracy": 0.9935538439374221, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -37110,7 +37110,7 @@ "model.layers.19.parallel_decoder": { "attn": [ { - "accuracy": 0.7178474225495991, + "accuracy": 0.7156262146799188, "total_bits": 320757760, "q_proj": { "group_size": { @@ -37174,7 +37174,7 @@ } }, { - "accuracy": 0.7271571661296643, + "accuracy": 0.7273105571144505, "total_bits": 329080832, "q_proj": { "group_size": { @@ -37238,7 +37238,7 @@ } }, { - "accuracy": 0.760029064981561, + "accuracy": 0.7601070404052734, "total_bits": 336024576, "q_proj": { "group_size": { @@ -37302,7 +37302,7 @@ } }, { - "accuracy": 0.816328763961792, + "accuracy": 0.8167028301640561, "total_bits": 401557504, "q_proj": { "group_size": { @@ -37366,7 +37366,7 @@ } }, { - "accuracy": 0.8566306892194246, + "accuracy": 0.8557020237571314, "total_bits": 475279360, "q_proj": { "group_size": { @@ -37430,7 +37430,7 @@ } }, { - "accuracy": 0.8593312188198692, + "accuracy": 0.8581410458213404, "total_bits": 475479040, "q_proj": { "group_size": { @@ -37494,7 +37494,7 @@ } }, { - "accuracy": 0.9084076693183497, + "accuracy": 0.9072285827837492, "total_bits": 609759232, "q_proj": { "group_size": { @@ -37546,7 +37546,7 @@ } }, { - "accuracy": 0.9124306816803782, + "accuracy": 0.910527837903876, "total_bits": 610024448, "q_proj": { "group_size": { @@ -37598,7 +37598,7 @@ } }, { - "accuracy": 0.9185777588894493, + "accuracy": 0.9168931308545565, "total_bits": 615020544, "q_proj": { "group_size": { @@ -37650,7 +37650,7 @@ } }, { - "accuracy": 0.92133124878532, + "accuracy": 0.9206003264376992, "total_bits": 623951872, "q_proj": { "group_size": { @@ -37702,7 +37702,7 @@ } }, { - "accuracy": 0.9288980207945171, + "accuracy": 0.9279210065540514, "total_bits": 626473984, "q_proj": { "group_size": { @@ -37766,7 +37766,7 @@ } }, { - "accuracy": 0.9329711637998882, + "accuracy": 0.9324225250043368, "total_bits": 630355968, "q_proj": { "group_size": { @@ -37830,7 +37830,7 @@ } }, { - "accuracy": 0.9372502628125643, + "accuracy": 0.9362547397613525, "total_bits": 637362176, "q_proj": { "group_size": { @@ -37891,7 +37891,7 @@ } }, { - "accuracy": 0.9414442150216353, + "accuracy": 0.9407953463102642, "total_bits": 646823936, "q_proj": { "group_size": { @@ -37952,7 +37952,7 @@ } }, { - "accuracy": 0.9656679755763004, + "accuracy": 0.9653044311623824, "total_bits": 784740352, "q_proj": { "group_size": { @@ -38013,7 +38013,7 @@ } }, { - "accuracy": 0.9708228393604881, + "accuracy": 0.9703280643412941, "total_bits": 797818880, "q_proj": { "group_size": { @@ -38074,7 +38074,7 @@ } }, { - "accuracy": 0.9758188050044211, + "accuracy": 0.975429975672772, "total_bits": 911749120, "q_proj": { "group_size": { @@ -38126,7 +38126,7 @@ } }, { - "accuracy": 0.9858681685046146, + "accuracy": 0.9854332287060587, "total_bits": 942718976, "q_proj": { "group_size": { @@ -38178,7 +38178,7 @@ } }, { - "accuracy": 0.9932837000018672, + "accuracy": 0.9932689831445092, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -38232,7 +38232,7 @@ ], "mlp": [ { - "accuracy": 0.6460470651325427, + "accuracy": 0.6446895097431384, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -38284,7 +38284,7 @@ } }, { - "accuracy": 0.6543502807617188, + "accuracy": 0.653001910761783, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -38336,7 +38336,7 @@ } }, { - "accuracy": 0.7161908400686163, + "accuracy": 0.7150533826727616, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -38385,7 +38385,7 @@ } }, { - "accuracy": 0.734067816483347, + "accuracy": 0.7330231666564941, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -38434,7 +38434,7 @@ } }, { - "accuracy": 0.8210088328311318, + "accuracy": 0.8203123368714985, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -38486,7 +38486,7 @@ } }, { - "accuracy": 0.8353795126864785, + "accuracy": 0.8347144628825941, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -38538,7 +38538,7 @@ } }, { - "accuracy": 0.861140627610056, + "accuracy": 0.8605828410700748, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -38587,7 +38587,7 @@ } }, { - "accuracy": 0.9074560027373464, + "accuracy": 0.9070980172408254, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -38630,7 +38630,7 @@ } }, { - "accuracy": 0.9158874750137329, + "accuracy": 0.9155359205446745, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -38673,7 +38673,7 @@ } }, { - "accuracy": 0.9090137481689453, + "accuracy": 0.9086279178920545, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -38725,7 +38725,7 @@ } }, { - "accuracy": 0.9203724735661557, + "accuracy": 0.9200468502546612, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -38777,7 +38777,7 @@ } }, { - "accuracy": 0.9536887187706797, + "accuracy": 0.9535037969288073, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -38829,7 +38829,7 @@ } }, { - "accuracy": 0.9603533995778937, + "accuracy": 0.960187830423054, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -38881,7 +38881,7 @@ } }, { - "accuracy": 0.9749610957346464, + "accuracy": 0.9748576471680089, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -38924,7 +38924,7 @@ } }, { - "accuracy": 0.9764668894441504, + "accuracy": 0.9763712302634591, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -38973,7 +38973,7 @@ } }, { - "accuracy": 0.9815018537797426, + "accuracy": 0.9814308615107286, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -39019,7 +39019,7 @@ } }, { - "accuracy": 0.9934419436674369, + "accuracy": 0.993412205263188, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -39063,7 +39063,7 @@ "model.layers.20.parallel_decoder": { "attn": [ { - "accuracy": 0.6654969014619526, + "accuracy": 0.664184946762888, "total_bits": 320757760, "q_proj": { "group_size": { @@ -39127,7 +39127,7 @@ } }, { - "accuracy": 0.6779601448460629, + "accuracy": 0.6773340325606496, "total_bits": 329080832, "q_proj": { "group_size": { @@ -39191,7 +39191,7 @@ } }, { - "accuracy": 0.7089420870730752, + "accuracy": 0.7089114189147949, "total_bits": 336024576, "q_proj": { "group_size": { @@ -39255,7 +39255,7 @@ } }, { - "accuracy": 0.7870790832921079, + "accuracy": 0.7875252773887232, "total_bits": 401557504, "q_proj": { "group_size": { @@ -39319,7 +39319,7 @@ } }, { - "accuracy": 0.8327889818894236, + "accuracy": 0.8324088422875655, "total_bits": 475279360, "q_proj": { "group_size": { @@ -39383,7 +39383,7 @@ } }, { - "accuracy": 0.8350439197138736, + "accuracy": 0.835271207909835, "total_bits": 475479040, "q_proj": { "group_size": { @@ -39447,7 +39447,7 @@ } }, { - "accuracy": 0.9073468760440224, + "accuracy": 0.9069125903280157, "total_bits": 609759232, "q_proj": { "group_size": { @@ -39499,7 +39499,7 @@ } }, { - "accuracy": 0.9103449081119738, + "accuracy": 0.9095852876964369, "total_bits": 610024448, "q_proj": { "group_size": { @@ -39551,7 +39551,7 @@ } }, { - "accuracy": 0.9162530271630538, + "accuracy": 0.9160977476521542, "total_bits": 615020544, "q_proj": { "group_size": { @@ -39603,7 +39603,7 @@ } }, { - "accuracy": 0.920561526951037, + "accuracy": 0.9206809621108205, "total_bits": 623951872, "q_proj": { "group_size": { @@ -39655,7 +39655,7 @@ } }, { - "accuracy": 0.9167272103460211, + "accuracy": 0.9162685745640805, "total_bits": 626473984, "q_proj": { "group_size": { @@ -39719,7 +39719,7 @@ } }, { - "accuracy": 0.9214381983405665, + "accuracy": 0.9215375624204937, "total_bits": 630355968, "q_proj": { "group_size": { @@ -39783,7 +39783,7 @@ } }, { - "accuracy": 0.9287046507785195, + "accuracy": 0.9288172219928942, "total_bits": 637362176, "q_proj": { "group_size": { @@ -39844,7 +39844,7 @@ } }, { - "accuracy": 0.9344108355672736, + "accuracy": 0.9340780722467523, "total_bits": 646823936, "q_proj": { "group_size": { @@ -39905,7 +39905,7 @@ } }, { - "accuracy": 0.9616250427145707, + "accuracy": 0.9614803132257963, "total_bits": 784740352, "q_proj": { "group_size": { @@ -39966,7 +39966,7 @@ } }, { - "accuracy": 0.9672544880917198, + "accuracy": 0.9671885559433385, "total_bits": 797818880, "q_proj": { "group_size": { @@ -40027,7 +40027,7 @@ } }, { - "accuracy": 0.9761531698076349, + "accuracy": 0.9761049653354444, "total_bits": 911749120, "q_proj": { "group_size": { @@ -40079,7 +40079,7 @@ } }, { - "accuracy": 0.9833801906359824, + "accuracy": 0.9832926345498938, "total_bits": 942718976, "q_proj": { "group_size": { @@ -40131,7 +40131,7 @@ } }, { - "accuracy": 0.9938114446244741, + "accuracy": 0.9937725400454119, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -40185,7 +40185,7 @@ ], "mlp": [ { - "accuracy": 0.6733162026656301, + "accuracy": 0.6719796281111867, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -40237,7 +40237,7 @@ } }, { - "accuracy": 0.6806366820084422, + "accuracy": 0.679316520690918, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -40289,7 +40289,7 @@ } }, { - "accuracy": 0.7377414703369141, + "accuracy": 0.7366951139349687, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -40338,7 +40338,7 @@ } }, { - "accuracy": 0.754688212746068, + "accuracy": 0.7536976964850175, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -40387,7 +40387,7 @@ } }, { - "accuracy": 0.8339556518353914, + "accuracy": 0.8332654802422774, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -40439,7 +40439,7 @@ } }, { - "accuracy": 0.8472962630422491, + "accuracy": 0.8466521438799406, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -40491,7 +40491,7 @@ } }, { - "accuracy": 0.8714895624863475, + "accuracy": 0.8709361051258288, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -40540,7 +40540,7 @@ } }, { - "accuracy": 0.9137369582527562, + "accuracy": 0.9134091389806647, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -40583,7 +40583,7 @@ } }, { - "accuracy": 0.9215686321258545, + "accuracy": 0.9212399407436973, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -40626,7 +40626,7 @@ } }, { - "accuracy": 0.9155102089831704, + "accuracy": 0.9151732733375147, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -40678,7 +40678,7 @@ } }, { - "accuracy": 0.9260450476094296, + "accuracy": 0.9257458009217915, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -40730,7 +40730,7 @@ } }, { - "accuracy": 0.9570060811544719, + "accuracy": 0.9568320638255069, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -40782,7 +40782,7 @@ } }, { - "accuracy": 0.9631342887878418, + "accuracy": 0.962982642023187, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -40834,7 +40834,7 @@ } }, { - "accuracy": 0.9767100732577475, + "accuracy": 0.976622263067647, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -40877,7 +40877,7 @@ } }, { - "accuracy": 0.9781616204663327, + "accuracy": 0.9780774900787755, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -40926,7 +40926,7 @@ } }, { - "accuracy": 0.9830156188262137, + "accuracy": 0.9829625236360651, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -40972,7 +40972,7 @@ } }, { - "accuracy": 0.9938426237357291, + "accuracy": 0.9938212005715621, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -41016,7 +41016,7 @@ "model.layers.21.parallel_decoder": { "attn": [ { - "accuracy": 0.6944862917849892, + "accuracy": 0.6940723720349764, "total_bits": 320757760, "q_proj": { "group_size": { @@ -41080,7 +41080,7 @@ } }, { - "accuracy": 0.7052905935990184, + "accuracy": 0.705325026261179, "total_bits": 329080832, "q_proj": { "group_size": { @@ -41144,7 +41144,7 @@ } }, { - "accuracy": 0.7410126987256502, + "accuracy": 0.7404348473799856, "total_bits": 336024576, "q_proj": { "group_size": { @@ -41208,7 +41208,7 @@ } }, { - "accuracy": 0.8067785689705297, + "accuracy": 0.806272920809294, "total_bits": 401557504, "q_proj": { "group_size": { @@ -41272,7 +41272,7 @@ } }, { - "accuracy": 0.8484643634996916, + "accuracy": 0.8481228226109555, "total_bits": 475279360, "q_proj": { "group_size": { @@ -41336,7 +41336,7 @@ } }, { - "accuracy": 0.8505937676680715, + "accuracy": 0.8502160498970434, "total_bits": 475479040, "q_proj": { "group_size": { @@ -41400,7 +41400,7 @@ } }, { - "accuracy": 0.9101893901824951, + "accuracy": 0.9096568822860718, "total_bits": 609759232, "q_proj": { "group_size": { @@ -41452,7 +41452,7 @@ } }, { - "accuracy": 0.9125574388002095, + "accuracy": 0.9119447469711304, "total_bits": 610024448, "q_proj": { "group_size": { @@ -41504,7 +41504,7 @@ } }, { - "accuracy": 0.9187091526232267, + "accuracy": 0.919832141775834, "total_bits": 615020544, "q_proj": { "group_size": { @@ -41556,7 +41556,7 @@ } }, { - "accuracy": 0.9233871886604711, + "accuracy": 0.9243502805107519, "total_bits": 623951872, "q_proj": { "group_size": { @@ -41608,7 +41608,7 @@ } }, { - "accuracy": 0.9243726981313605, + "accuracy": 0.9242621095556962, "total_bits": 626473984, "q_proj": { "group_size": { @@ -41672,7 +41672,7 @@ } }, { - "accuracy": 0.9292550337942023, + "accuracy": 0.9293105790489599, "total_bits": 630355968, "q_proj": { "group_size": { @@ -41736,7 +41736,7 @@ } }, { - "accuracy": 0.9343060694242779, + "accuracy": 0.9341071091200176, "total_bits": 637362176, "q_proj": { "group_size": { @@ -41797,7 +41797,7 @@ } }, { - "accuracy": 0.9396643764094302, + "accuracy": 0.9396246985385293, "total_bits": 646823936, "q_proj": { "group_size": { @@ -41858,7 +41858,7 @@ } }, { - "accuracy": 0.9642696443356966, + "accuracy": 0.9643484542244359, "total_bits": 784740352, "q_proj": { "group_size": { @@ -41919,7 +41919,7 @@ } }, { - "accuracy": 0.9700466425795304, + "accuracy": 0.9700822892941927, "total_bits": 797818880, "q_proj": { "group_size": { @@ -41980,7 +41980,7 @@ } }, { - "accuracy": 0.9763597469580801, + "accuracy": 0.9765548235491702, "total_bits": 911749120, "q_proj": { "group_size": { @@ -42032,7 +42032,7 @@ } }, { - "accuracy": 0.9849160972394442, + "accuracy": 0.9849592180628526, "total_bits": 942718976, "q_proj": { "group_size": { @@ -42084,7 +42084,7 @@ } }, { - "accuracy": 0.9939284430522668, + "accuracy": 0.9939060619002894, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -42138,7 +42138,7 @@ ], "mlp": [ { - "accuracy": 0.648841657136616, + "accuracy": 0.6478494092037803, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -42190,7 +42190,7 @@ } }, { - "accuracy": 0.656892927069413, + "accuracy": 0.6559242700275623, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -42242,7 +42242,7 @@ } }, { - "accuracy": 0.7182048998380962, + "accuracy": 0.7172762469241494, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -42291,7 +42291,7 @@ } }, { - "accuracy": 0.7361437395999306, + "accuracy": 0.735227434258712, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -42340,7 +42340,7 @@ } }, { - "accuracy": 0.8226368552760074, + "accuracy": 0.8220879655135305, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -42392,7 +42392,7 @@ } }, { - "accuracy": 0.836681002064755, + "accuracy": 0.8361496925354004, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -42444,7 +42444,7 @@ } }, { - "accuracy": 0.8622640183097438, + "accuracy": 0.8617946976109555, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -42493,7 +42493,7 @@ } }, { - "accuracy": 0.9083322600314492, + "accuracy": 0.9080253463042409, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -42536,7 +42536,7 @@ } }, { - "accuracy": 0.9165125457864058, + "accuracy": 0.9162194540626124, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -42579,7 +42579,7 @@ } }, { - "accuracy": 0.9099194564317402, + "accuracy": 0.9096378652673018, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -42631,7 +42631,7 @@ } }, { - "accuracy": 0.9210371908388639, + "accuracy": 0.9207904589803595, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -42683,7 +42683,7 @@ } }, { - "accuracy": 0.954222211712285, + "accuracy": 0.9540858331479525, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -42735,7 +42735,7 @@ } }, { - "accuracy": 0.9606845347504867, + "accuracy": 0.9605600645667628, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -42787,7 +42787,7 @@ } }, { - "accuracy": 0.9753695836192683, + "accuracy": 0.9752934982902125, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -42830,7 +42830,7 @@ } }, { - "accuracy": 0.976714047946428, + "accuracy": 0.9766323989943454, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -42879,7 +42879,7 @@ } }, { - "accuracy": 0.981800775778921, + "accuracy": 0.9817362396340621, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -42925,7 +42925,7 @@ } }, { - "accuracy": 0.993527109685697, + "accuracy": 0.9935023404265705, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -42969,7 +42969,7 @@ "model.layers.22.parallel_decoder": { "attn": [ { - "accuracy": 0.6826050657975047, + "accuracy": 0.6811048357110274, "total_bits": 320757760, "q_proj": { "group_size": { @@ -43033,7 +43033,7 @@ } }, { - "accuracy": 0.6927855391251414, + "accuracy": 0.6912350152668201, "total_bits": 329080832, "q_proj": { "group_size": { @@ -43097,7 +43097,7 @@ } }, { - "accuracy": 0.7277363476000334, + "accuracy": 0.7269958194933439, "total_bits": 336024576, "q_proj": { "group_size": { @@ -43161,7 +43161,7 @@ } }, { - "accuracy": 0.7985176287199321, + "accuracy": 0.7983050095407587, "total_bits": 401557504, "q_proj": { "group_size": { @@ -43225,7 +43225,7 @@ } }, { - "accuracy": 0.8402326608958998, + "accuracy": 0.8400719040318539, "total_bits": 475279360, "q_proj": { "group_size": { @@ -43289,7 +43289,7 @@ } }, { - "accuracy": 0.8424975244622481, + "accuracy": 0.842316000085128, "total_bits": 475479040, "q_proj": { "group_size": { @@ -43353,7 +43353,7 @@ } }, { - "accuracy": 0.9081598771245856, + "accuracy": 0.9078056749544645, "total_bits": 609759232, "q_proj": { "group_size": { @@ -43405,7 +43405,7 @@ } }, { - "accuracy": 0.9105712802786576, + "accuracy": 0.910260796546936, "total_bits": 610024448, "q_proj": { "group_size": { @@ -43457,7 +43457,7 @@ } }, { - "accuracy": 0.9175104028300235, + "accuracy": 0.9167893874017816, "total_bits": 615020544, "q_proj": { "group_size": { @@ -43509,7 +43509,7 @@ } }, { - "accuracy": 0.9211532504935014, + "accuracy": 0.9205350122953716, "total_bits": 623951872, "q_proj": { "group_size": { @@ -43561,7 +43561,7 @@ } }, { - "accuracy": 0.9201914448487132, + "accuracy": 0.9200646689063624, "total_bits": 626473984, "q_proj": { "group_size": { @@ -43625,7 +43625,7 @@ } }, { - "accuracy": 0.9252368023521022, + "accuracy": 0.9250117477617765, "total_bits": 630355968, "q_proj": { "group_size": { @@ -43689,7 +43689,7 @@ } }, { - "accuracy": 0.9315145266683478, + "accuracy": 0.9312301874160767, "total_bits": 637362176, "q_proj": { "group_size": { @@ -43750,7 +43750,7 @@ } }, { - "accuracy": 0.9367755525990537, + "accuracy": 0.9364190854524311, "total_bits": 646823936, "q_proj": { "group_size": { @@ -43811,7 +43811,7 @@ } }, { - "accuracy": 0.9629308142160115, + "accuracy": 0.9627715129601329, "total_bits": 784740352, "q_proj": { "group_size": { @@ -43872,7 +43872,7 @@ } }, { - "accuracy": 0.9684551954269409, + "accuracy": 0.9683090228783457, "total_bits": 797818880, "q_proj": { "group_size": { @@ -43933,7 +43933,7 @@ } }, { - "accuracy": 0.9762679903130782, + "accuracy": 0.9761347770690918, "total_bits": 911749120, "q_proj": { "group_size": { @@ -43985,7 +43985,7 @@ } }, { - "accuracy": 0.9840047939827568, + "accuracy": 0.983946431624262, "total_bits": 942718976, "q_proj": { "group_size": { @@ -44037,7 +44037,7 @@ } }, { - "accuracy": 0.9937169708703694, + "accuracy": 0.9937074223631307, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -44091,7 +44091,7 @@ ], "mlp": [ { - "accuracy": 0.6681219402112459, + "accuracy": 0.6657265111019737, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -44143,7 +44143,7 @@ } }, { - "accuracy": 0.6753235616182026, + "accuracy": 0.6729466036746377, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -44195,7 +44195,7 @@ } }, { - "accuracy": 0.7343667431881553, + "accuracy": 0.7323827241596423, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -44244,7 +44244,7 @@ } }, { - "accuracy": 0.7520402607164884, + "accuracy": 0.7501730667917352, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -44293,7 +44293,7 @@ } }, { - "accuracy": 0.8315526184282804, + "accuracy": 0.8303214248858, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -44345,7 +44345,7 @@ } }, { - "accuracy": 0.8448981109418368, + "accuracy": 0.8438098430633545, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -44397,7 +44397,7 @@ } }, { - "accuracy": 0.8699044302890175, + "accuracy": 0.8689718120976498, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -44446,7 +44446,7 @@ } }, { - "accuracy": 0.9125920847842568, + "accuracy": 0.9119396146975065, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -44489,7 +44489,7 @@ } }, { - "accuracy": 0.9205130526894018, + "accuracy": 0.919913975816024, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -44532,7 +44532,7 @@ } }, { - "accuracy": 0.914333092538934, + "accuracy": 0.9137381064264398, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -44584,7 +44584,7 @@ } }, { - "accuracy": 0.9249190405795449, + "accuracy": 0.9243739592401605, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -44636,7 +44636,7 @@ } }, { - "accuracy": 0.9564178899714821, + "accuracy": 0.956121215694829, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -44688,7 +44688,7 @@ } }, { - "accuracy": 0.9625912998851976, + "accuracy": 0.9623300088079352, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -44740,7 +44740,7 @@ } }, { - "accuracy": 0.9765198120945379, + "accuracy": 0.976343140790337, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -44783,7 +44783,7 @@ } }, { - "accuracy": 0.9778413741212142, + "accuracy": 0.9776918574383384, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -44832,7 +44832,7 @@ } }, { - "accuracy": 0.9829356435098147, + "accuracy": 0.9828175918052071, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -44878,7 +44878,7 @@ } }, { - "accuracy": 0.9937647702662569, + "accuracy": 0.9937239869644767, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -44922,7 +44922,7 @@ "model.layers.23.parallel_decoder": { "attn": [ { - "accuracy": 0.6989561633059853, + "accuracy": 0.6979517936706543, "total_bits": 320757760, "q_proj": { "group_size": { @@ -44986,7 +44986,7 @@ } }, { - "accuracy": 0.7112824791356137, + "accuracy": 0.710040293241802, "total_bits": 329080832, "q_proj": { "group_size": { @@ -45050,7 +45050,7 @@ } }, { - "accuracy": 0.7464073833666349, + "accuracy": 0.7447434726514315, "total_bits": 336024576, "q_proj": { "group_size": { @@ -45114,7 +45114,7 @@ } }, { - "accuracy": 0.8108340188076621, + "accuracy": 0.809438241155524, "total_bits": 401557504, "q_proj": { "group_size": { @@ -45178,7 +45178,7 @@ } }, { - "accuracy": 0.8492120692604467, + "accuracy": 0.8480074405670166, "total_bits": 475279360, "q_proj": { "group_size": { @@ -45242,7 +45242,7 @@ } }, { - "accuracy": 0.8521707183436343, + "accuracy": 0.8513643992574591, "total_bits": 475479040, "q_proj": { "group_size": { @@ -45306,7 +45306,7 @@ } }, { - "accuracy": 0.9096949727911698, + "accuracy": 0.9084971264788979, "total_bits": 609759232, "q_proj": { "group_size": { @@ -45358,7 +45358,7 @@ } }, { - "accuracy": 0.9132288757123446, + "accuracy": 0.9130520255942094, "total_bits": 610024448, "q_proj": { "group_size": { @@ -45410,7 +45410,7 @@ } }, { - "accuracy": 0.9187785575264379, + "accuracy": 0.9179100739328485, "total_bits": 615020544, "q_proj": { "group_size": { @@ -45462,7 +45462,7 @@ } }, { - "accuracy": 0.9225031011982968, + "accuracy": 0.9217169284820557, "total_bits": 623951872, "q_proj": { "group_size": { @@ -45514,7 +45514,7 @@ } }, { - "accuracy": 0.9244472604048879, + "accuracy": 0.9240930770572863, "total_bits": 626473984, "q_proj": { "group_size": { @@ -45578,7 +45578,7 @@ } }, { - "accuracy": 0.9293595991636577, + "accuracy": 0.9287852801774678, "total_bits": 630355968, "q_proj": { "group_size": { @@ -45642,7 +45642,7 @@ } }, { - "accuracy": 0.9351604863217002, + "accuracy": 0.9347656777030543, "total_bits": 637362176, "q_proj": { "group_size": { @@ -45703,7 +45703,7 @@ } }, { - "accuracy": 0.9404206275939941, + "accuracy": 0.9397797647275423, "total_bits": 646823936, "q_proj": { "group_size": { @@ -45764,7 +45764,7 @@ } }, { - "accuracy": 0.9645766553125883, + "accuracy": 0.9643421894625613, "total_bits": 784740352, "q_proj": { "group_size": { @@ -45825,7 +45825,7 @@ } }, { - "accuracy": 0.9701469634708605, + "accuracy": 0.9699142543893111, "total_bits": 797818880, "q_proj": { "group_size": { @@ -45886,7 +45886,7 @@ } }, { - "accuracy": 0.9760825900655044, + "accuracy": 0.9758936511842828, "total_bits": 911749120, "q_proj": { "group_size": { @@ -45938,7 +45938,7 @@ } }, { - "accuracy": 0.9847561748404252, + "accuracy": 0.9846311227271431, "total_bits": 942718976, "q_proj": { "group_size": { @@ -45990,7 +45990,7 @@ } }, { - "accuracy": 0.993781255656167, + "accuracy": 0.9937134792930201, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -46044,7 +46044,7 @@ ], "mlp": [ { - "accuracy": 0.6611178046778629, + "accuracy": 0.6598502962212813, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -46096,7 +46096,7 @@ } }, { - "accuracy": 0.668476330606561, + "accuracy": 0.6671948181955438, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -46148,7 +46148,7 @@ } }, { - "accuracy": 0.7272436744288394, + "accuracy": 0.7262607122722424, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -46197,7 +46197,7 @@ } }, { - "accuracy": 0.7453379380075555, + "accuracy": 0.7444511463767605, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -46246,7 +46246,7 @@ } }, { - "accuracy": 0.8275101059361508, + "accuracy": 0.8268019902078729, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -46298,7 +46298,7 @@ } }, { - "accuracy": 0.8411803371027896, + "accuracy": 0.840587390096564, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -46350,7 +46350,7 @@ } }, { - "accuracy": 0.8665374580182528, + "accuracy": 0.866005822231895, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -46399,7 +46399,7 @@ } }, { - "accuracy": 0.9102052512921786, + "accuracy": 0.9098252497221294, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -46442,7 +46442,7 @@ } }, { - "accuracy": 0.9184587315509194, + "accuracy": 0.9181082123204282, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -46485,7 +46485,7 @@ } }, { - "accuracy": 0.9123223706295616, + "accuracy": 0.9119861251429507, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -46537,7 +46537,7 @@ } }, { - "accuracy": 0.923131177299901, + "accuracy": 0.9228458404541016, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -46589,7 +46589,7 @@ } }, { - "accuracy": 0.955385961030659, + "accuracy": 0.9552233407371923, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -46641,7 +46641,7 @@ } }, { - "accuracy": 0.9616892620136863, + "accuracy": 0.9615531971580104, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -46693,7 +46693,7 @@ } }, { - "accuracy": 0.9758771689314591, + "accuracy": 0.9757850421102423, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -46736,7 +46736,7 @@ } }, { - "accuracy": 0.9773427342113695, + "accuracy": 0.9772659053927973, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -46785,7 +46785,7 @@ } }, { - "accuracy": 0.9825077150997362, + "accuracy": 0.9824427319200415, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -46831,7 +46831,7 @@ } }, { - "accuracy": 0.9936254887204421, + "accuracy": 0.9936062572033781, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -46875,7 +46875,7 @@ "model.layers.24.parallel_decoder": { "attn": [ { - "accuracy": 0.6656583484850431, + "accuracy": 0.6646034341109426, "total_bits": 320757760, "q_proj": { "group_size": { @@ -46939,7 +46939,7 @@ } }, { - "accuracy": 0.6759980854235197, + "accuracy": 0.6747976102327046, "total_bits": 329080832, "q_proj": { "group_size": { @@ -47003,7 +47003,7 @@ } }, { - "accuracy": 0.713353809557463, + "accuracy": 0.711875463786878, "total_bits": 336024576, "q_proj": { "group_size": { @@ -47067,7 +47067,7 @@ } }, { - "accuracy": 0.7876518149124949, + "accuracy": 0.7867822647094727, "total_bits": 401557504, "q_proj": { "group_size": { @@ -47131,7 +47131,7 @@ } }, { - "accuracy": 0.8323951520417866, + "accuracy": 0.8317524006492213, "total_bits": 475279360, "q_proj": { "group_size": { @@ -47195,7 +47195,7 @@ } }, { - "accuracy": 0.8347362468117162, + "accuracy": 0.8338680141850522, "total_bits": 475479040, "q_proj": { "group_size": { @@ -47259,7 +47259,7 @@ } }, { - "accuracy": 0.9021727034920141, + "accuracy": 0.9014277771899575, "total_bits": 609759232, "q_proj": { "group_size": { @@ -47311,7 +47311,7 @@ } }, { - "accuracy": 0.9048650390223453, + "accuracy": 0.904050161964015, "total_bits": 610024448, "q_proj": { "group_size": { @@ -47363,7 +47363,7 @@ } }, { - "accuracy": 0.912041984106365, + "accuracy": 0.9109531515523007, "total_bits": 615020544, "q_proj": { "group_size": { @@ -47415,7 +47415,7 @@ } }, { - "accuracy": 0.9165726021716469, + "accuracy": 0.9153552118100619, "total_bits": 623951872, "q_proj": { "group_size": { @@ -47467,7 +47467,7 @@ } }, { - "accuracy": 0.9171958032407259, + "accuracy": 0.9163736167706942, "total_bits": 626473984, "q_proj": { "group_size": { @@ -47531,7 +47531,7 @@ } }, { - "accuracy": 0.9220569698434127, + "accuracy": 0.9217494788922762, "total_bits": 630355968, "q_proj": { "group_size": { @@ -47595,7 +47595,7 @@ } }, { - "accuracy": 0.9286587050086573, + "accuracy": 0.9278294538196764, "total_bits": 637362176, "q_proj": { "group_size": { @@ -47656,7 +47656,7 @@ } }, { - "accuracy": 0.9335134656805741, + "accuracy": 0.9334405786112735, "total_bits": 646823936, "q_proj": { "group_size": { @@ -47717,7 +47717,7 @@ } }, { - "accuracy": 0.9612710507292497, + "accuracy": 0.960796898917148, "total_bits": 784740352, "q_proj": { "group_size": { @@ -47778,7 +47778,7 @@ } }, { - "accuracy": 0.9671079422298231, + "accuracy": 0.9667938508485493, "total_bits": 797818880, "q_proj": { "group_size": { @@ -47839,7 +47839,7 @@ } }, { - "accuracy": 0.9749589264392853, + "accuracy": 0.9745075718352669, "total_bits": 911749120, "q_proj": { "group_size": { @@ -47891,7 +47891,7 @@ } }, { - "accuracy": 0.9833155760639593, + "accuracy": 0.9831167302633587, "total_bits": 942718976, "q_proj": { "group_size": { @@ -47943,7 +47943,7 @@ } }, { - "accuracy": 0.9935242012143135, + "accuracy": 0.9934400229861862, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -47997,7 +47997,7 @@ ], "mlp": [ { - "accuracy": 0.6670395951522023, + "accuracy": 0.6660980174415989, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -48049,7 +48049,7 @@ } }, { - "accuracy": 0.6741236636513157, + "accuracy": 0.6731529235839844, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -48101,7 +48101,7 @@ } }, { - "accuracy": 0.7325061245968467, + "accuracy": 0.7316503273813348, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -48150,7 +48150,7 @@ } }, { - "accuracy": 0.7506153709010074, + "accuracy": 0.7497990005894711, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -48199,7 +48199,7 @@ } }, { - "accuracy": 0.8308624844802053, + "accuracy": 0.8303038948460629, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -48251,7 +48251,7 @@ } }, { - "accuracy": 0.8442502021789551, + "accuracy": 0.8437121667360005, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -48303,7 +48303,7 @@ } }, { - "accuracy": 0.8691708414178145, + "accuracy": 0.8687071800231934, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -48352,7 +48352,7 @@ } }, { - "accuracy": 0.9119347459391544, + "accuracy": 0.9116703585574502, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -48395,7 +48395,7 @@ } }, { - "accuracy": 0.9199414253234863, + "accuracy": 0.9196870076028925, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -48438,7 +48438,7 @@ } }, { - "accuracy": 0.914037779757851, + "accuracy": 0.913770173725329, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -48490,7 +48490,7 @@ } }, { - "accuracy": 0.9246194613607306, + "accuracy": 0.9243823666321604, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -48542,7 +48542,7 @@ } }, { - "accuracy": 0.9562668235678422, + "accuracy": 0.9561387708312586, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -48594,7 +48594,7 @@ } }, { - "accuracy": 0.9624289650666087, + "accuracy": 0.9623054015009027, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -48646,7 +48646,7 @@ } }, { - "accuracy": 0.9763069592024151, + "accuracy": 0.9762442409992218, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -48689,7 +48689,7 @@ } }, { - "accuracy": 0.977776111740815, + "accuracy": 0.977702763519789, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -48738,7 +48738,7 @@ } }, { - "accuracy": 0.982900614801206, + "accuracy": 0.9828402949006934, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -48784,7 +48784,7 @@ } }, { - "accuracy": 0.9936416090319031, + "accuracy": 0.9936227508281407, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -48828,7 +48828,7 @@ "model.layers.25.parallel_decoder": { "attn": [ { - "accuracy": 0.7188073208457546, + "accuracy": 0.7202883770591335, "total_bits": 320757760, "q_proj": { "group_size": { @@ -48892,7 +48892,7 @@ } }, { - "accuracy": 0.729920462558144, + "accuracy": 0.7311338625456157, "total_bits": 329080832, "q_proj": { "group_size": { @@ -48956,7 +48956,7 @@ } }, { - "accuracy": 0.763148307800293, + "accuracy": 0.7639772766514829, "total_bits": 336024576, "q_proj": { "group_size": { @@ -49020,7 +49020,7 @@ } }, { - "accuracy": 0.8246749074835527, + "accuracy": 0.8259798225603605, "total_bits": 401557504, "q_proj": { "group_size": { @@ -49084,7 +49084,7 @@ } }, { - "accuracy": 0.8608155501516241, + "accuracy": 0.8611088175522654, "total_bits": 475279360, "q_proj": { "group_size": { @@ -49148,7 +49148,7 @@ } }, { - "accuracy": 0.862704640940616, + "accuracy": 0.8628057806115401, "total_bits": 475479040, "q_proj": { "group_size": { @@ -49212,7 +49212,7 @@ } }, { - "accuracy": 0.9183235984099538, + "accuracy": 0.9185932874679565, "total_bits": 609759232, "q_proj": { "group_size": { @@ -49264,7 +49264,7 @@ } }, { - "accuracy": 0.9203015754097387, + "accuracy": 0.9204568298239457, "total_bits": 610024448, "q_proj": { "group_size": { @@ -49316,7 +49316,7 @@ } }, { - "accuracy": 0.9260579535835668, + "accuracy": 0.9257026471589741, "total_bits": 615020544, "q_proj": { "group_size": { @@ -49368,7 +49368,7 @@ } }, { - "accuracy": 0.9301058116712069, + "accuracy": 0.929545659767954, "total_bits": 623951872, "q_proj": { "group_size": { @@ -49420,7 +49420,7 @@ } }, { - "accuracy": 0.9305812120437622, + "accuracy": 0.9306496883693495, "total_bits": 626473984, "q_proj": { "group_size": { @@ -49484,7 +49484,7 @@ } }, { - "accuracy": 0.9347925499865883, + "accuracy": 0.9349077312569869, "total_bits": 630355968, "q_proj": { "group_size": { @@ -49548,7 +49548,7 @@ } }, { - "accuracy": 0.9405943531739085, + "accuracy": 0.9405922826967741, "total_bits": 637362176, "q_proj": { "group_size": { @@ -49609,7 +49609,7 @@ } }, { - "accuracy": 0.9449223719145122, + "accuracy": 0.9449449463894493, "total_bits": 646823936, "q_proj": { "group_size": { @@ -49670,7 +49670,7 @@ } }, { - "accuracy": 0.9677811014024835, + "accuracy": 0.9677283857998095, "total_bits": 784740352, "q_proj": { "group_size": { @@ -49731,7 +49731,7 @@ } }, { - "accuracy": 0.972515837142342, + "accuracy": 0.9724783709174708, "total_bits": 797818880, "q_proj": { "group_size": { @@ -49792,7 +49792,7 @@ } }, { - "accuracy": 0.9790367562519876, + "accuracy": 0.9789432553868544, "total_bits": 911749120, "q_proj": { "group_size": { @@ -49844,7 +49844,7 @@ } }, { - "accuracy": 0.9856799025284616, + "accuracy": 0.9856460047395605, "total_bits": 942718976, "q_proj": { "group_size": { @@ -49896,7 +49896,7 @@ } }, { - "accuracy": 0.9945377080064071, + "accuracy": 0.9945548153237292, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -49950,7 +49950,7 @@ ], "mlp": [ { - "accuracy": 0.6653775415922466, + "accuracy": 0.6642366208528218, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -50002,7 +50002,7 @@ } }, { - "accuracy": 0.6724101618716591, + "accuracy": 0.6714018771522923, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -50054,7 +50054,7 @@ } }, { - "accuracy": 0.730204557117663, + "accuracy": 0.7292933715017218, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -50103,7 +50103,7 @@ } }, { - "accuracy": 0.7484110531054045, + "accuracy": 0.7474829523186934, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -50152,7 +50152,7 @@ } }, { - "accuracy": 0.8301747472662675, + "accuracy": 0.8296228459006861, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -50204,7 +50204,7 @@ } }, { - "accuracy": 0.8433965130856163, + "accuracy": 0.8429108042466014, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -50256,7 +50256,7 @@ } }, { - "accuracy": 0.8683759162300512, + "accuracy": 0.8679318929973402, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -50305,7 +50305,7 @@ } }, { - "accuracy": 0.9116203784942627, + "accuracy": 0.9113526281557585, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -50348,7 +50348,7 @@ } }, { - "accuracy": 0.9196387403889706, + "accuracy": 0.9193949322951467, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -50391,7 +50391,7 @@ } }, { - "accuracy": 0.9138070156699732, + "accuracy": 0.9135380544160542, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -50443,7 +50443,7 @@ } }, { - "accuracy": 0.9243520121825368, + "accuracy": 0.9241060834181936, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -50495,7 +50495,7 @@ } }, { - "accuracy": 0.9561965653770849, + "accuracy": 0.956073566486961, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -50547,7 +50547,7 @@ } }, { - "accuracy": 0.9623143955280906, + "accuracy": 0.962197764923698, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -50599,7 +50599,7 @@ } }, { - "accuracy": 0.9762812554836273, + "accuracy": 0.976223715041813, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -50642,7 +50642,7 @@ } }, { - "accuracy": 0.9777301220517409, + "accuracy": 0.97767100679247, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -50691,7 +50691,7 @@ } }, { - "accuracy": 0.9827683536629928, + "accuracy": 0.9827085623615667, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -50737,7 +50737,7 @@ } }, { - "accuracy": 0.9934779927134514, + "accuracy": 0.9934801035805753, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -50781,7 +50781,7 @@ "model.layers.26.parallel_decoder": { "attn": [ { - "accuracy": 0.6371532992312783, + "accuracy": 0.6347944109063399, "total_bits": 320757760, "q_proj": { "group_size": { @@ -50845,7 +50845,7 @@ } }, { - "accuracy": 0.6515008524844521, + "accuracy": 0.6500517945540578, "total_bits": 329080832, "q_proj": { "group_size": { @@ -50909,7 +50909,7 @@ } }, { - "accuracy": 0.7050113176044666, + "accuracy": 0.704709529876709, "total_bits": 336024576, "q_proj": { "group_size": { @@ -50973,7 +50973,7 @@ } }, { - "accuracy": 0.7692124467147023, + "accuracy": 0.7686654141074732, "total_bits": 401557504, "q_proj": { "group_size": { @@ -51037,7 +51037,7 @@ } }, { - "accuracy": 0.8170124480598852, + "accuracy": 0.8169618029343455, "total_bits": 475279360, "q_proj": { "group_size": { @@ -51101,7 +51101,7 @@ } }, { - "accuracy": 0.8222160966772782, + "accuracy": 0.8216424741243061, "total_bits": 475479040, "q_proj": { "group_size": { @@ -51165,7 +51165,7 @@ } }, { - "accuracy": 0.8936389370968467, + "accuracy": 0.894514472861039, "total_bits": 609759232, "q_proj": { "group_size": { @@ -51217,7 +51217,7 @@ } }, { - "accuracy": 0.8988595197075292, + "accuracy": 0.898798333971124, "total_bits": 610024448, "q_proj": { "group_size": { @@ -51269,7 +51269,7 @@ } }, { - "accuracy": 0.9078388214111328, + "accuracy": 0.9074359379316631, "total_bits": 615020544, "q_proj": { "group_size": { @@ -51321,7 +51321,7 @@ } }, { - "accuracy": 0.9120512824309499, + "accuracy": 0.9116082693401136, "total_bits": 623951872, "q_proj": { "group_size": { @@ -51373,7 +51373,7 @@ } }, { - "accuracy": 0.9093265282480341, + "accuracy": 0.9094420671463013, "total_bits": 626473984, "q_proj": { "group_size": { @@ -51437,7 +51437,7 @@ } }, { - "accuracy": 0.9153970291740016, + "accuracy": 0.9162389479185405, "total_bits": 630355968, "q_proj": { "group_size": { @@ -51501,7 +51501,7 @@ } }, { - "accuracy": 0.9261010132337871, + "accuracy": 0.9249093658045718, "total_bits": 637362176, "q_proj": { "group_size": { @@ -51562,7 +51562,7 @@ } }, { - "accuracy": 0.9317168624777543, + "accuracy": 0.931663713957134, "total_bits": 646823936, "q_proj": { "group_size": { @@ -51623,7 +51623,7 @@ } }, { - "accuracy": 0.9585882048857839, + "accuracy": 0.9589851122153432, "total_bits": 784740352, "q_proj": { "group_size": { @@ -51684,7 +51684,7 @@ } }, { - "accuracy": 0.9661586880683899, + "accuracy": 0.9659008979797363, "total_bits": 797818880, "q_proj": { "group_size": { @@ -51745,7 +51745,7 @@ } }, { - "accuracy": 0.9710800365397805, + "accuracy": 0.9717463129445126, "total_bits": 911749120, "q_proj": { "group_size": { @@ -51797,7 +51797,7 @@ } }, { - "accuracy": 0.9833521513562453, + "accuracy": 0.9832418262958527, "total_bits": 942718976, "q_proj": { "group_size": { @@ -51849,7 +51849,7 @@ } }, { - "accuracy": 0.9926430528101168, + "accuracy": 0.9927679323836377, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -51903,7 +51903,7 @@ ], "mlp": [ { - "accuracy": 0.6714831904361123, + "accuracy": 0.6705644005223325, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -51955,7 +51955,7 @@ } }, { - "accuracy": 0.678408522354929, + "accuracy": 0.6774697303771973, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -52007,7 +52007,7 @@ } }, { - "accuracy": 0.7341908404701634, + "accuracy": 0.7333925648739463, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -52056,7 +52056,7 @@ } }, { - "accuracy": 0.7517376447978773, + "accuracy": 0.7509671010469136, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -52105,7 +52105,7 @@ } }, { - "accuracy": 0.8328161239624023, + "accuracy": 0.8323439798857036, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -52157,7 +52157,7 @@ } }, { - "accuracy": 0.8459191573293585, + "accuracy": 0.8454833407151072, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -52209,7 +52209,7 @@ } }, { - "accuracy": 0.8701647080873188, + "accuracy": 0.869764767195049, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -52258,7 +52258,7 @@ } }, { - "accuracy": 0.9130347465213976, + "accuracy": 0.9128176413084331, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -52301,7 +52301,7 @@ } }, { - "accuracy": 0.9209600122351396, + "accuracy": 0.9207385464718467, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -52344,7 +52344,7 @@ } }, { - "accuracy": 0.9151261354747572, + "accuracy": 0.914918767778497, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -52396,7 +52396,7 @@ } }, { - "accuracy": 0.9255442242873342, + "accuracy": 0.9253294091475637, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -52448,7 +52448,7 @@ } }, { - "accuracy": 0.9569093773239538, + "accuracy": 0.9567918965691015, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -52500,7 +52500,7 @@ } }, { - "accuracy": 0.9629432119821247, + "accuracy": 0.9628408143394872, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -52552,7 +52552,7 @@ } }, { - "accuracy": 0.9767332751499979, + "accuracy": 0.9766785565175509, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -52595,7 +52595,7 @@ } }, { - "accuracy": 0.9781247989127511, + "accuracy": 0.9780704881015577, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -52644,7 +52644,7 @@ } }, { - "accuracy": 0.9830416064513358, + "accuracy": 0.9829898915792766, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -52690,7 +52690,7 @@ } }, { - "accuracy": 0.9937670885732299, + "accuracy": 0.9937534183263779, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -52734,7 +52734,7 @@ "model.layers.27.parallel_decoder": { "attn": [ { - "accuracy": 0.6876310549284282, + "accuracy": 0.6865877101295872, "total_bits": 320757760, "q_proj": { "group_size": { @@ -52798,7 +52798,7 @@ } }, { - "accuracy": 0.6983653871636641, + "accuracy": 0.7008949831912392, "total_bits": 329080832, "q_proj": { "group_size": { @@ -52862,7 +52862,7 @@ } }, { - "accuracy": 0.7394941229569285, + "accuracy": 0.7399462900663677, "total_bits": 336024576, "q_proj": { "group_size": { @@ -52926,7 +52926,7 @@ } }, { - "accuracy": 0.8039014213963559, + "accuracy": 0.8036598029889559, "total_bits": 401557504, "q_proj": { "group_size": { @@ -52990,7 +52990,7 @@ } }, { - "accuracy": 0.8438912692822909, + "accuracy": 0.8438819709577059, "total_bits": 475279360, "q_proj": { "group_size": { @@ -53054,7 +53054,7 @@ } }, { - "accuracy": 0.8470626002863834, + "accuracy": 0.8468201034947446, "total_bits": 475479040, "q_proj": { "group_size": { @@ -53118,7 +53118,7 @@ } }, { - "accuracy": 0.9120555739653737, + "accuracy": 0.9119049436167667, "total_bits": 609759232, "q_proj": { "group_size": { @@ -53170,7 +53170,7 @@ } }, { - "accuracy": 0.9151835755298012, + "accuracy": 0.9148113790311312, "total_bits": 610024448, "q_proj": { "group_size": { @@ -53222,7 +53222,7 @@ } }, { - "accuracy": 0.9213047529521742, + "accuracy": 0.9208133722606459, "total_bits": 615020544, "q_proj": { "group_size": { @@ -53274,7 +53274,7 @@ } }, { - "accuracy": 0.9253500198063097, + "accuracy": 0.9248107232545552, "total_bits": 623951872, "q_proj": { "group_size": { @@ -53326,7 +53326,7 @@ } }, { - "accuracy": 0.9224377180400648, + "accuracy": 0.9225639983227378, "total_bits": 626473984, "q_proj": { "group_size": { @@ -53390,7 +53390,7 @@ } }, { - "accuracy": 0.9273133528859991, + "accuracy": 0.9271176175067299, "total_bits": 630355968, "q_proj": { "group_size": { @@ -53454,7 +53454,7 @@ } }, { - "accuracy": 0.935041672305057, + "accuracy": 0.934918202851948, "total_bits": 637362176, "q_proj": { "group_size": { @@ -53515,7 +53515,7 @@ } }, { - "accuracy": 0.9397689355047125, + "accuracy": 0.9398823411841142, "total_bits": 646823936, "q_proj": { "group_size": { @@ -53576,7 +53576,7 @@ } }, { - "accuracy": 0.9644302317970678, + "accuracy": 0.9643984625214025, "total_bits": 784740352, "q_proj": { "group_size": { @@ -53637,7 +53637,7 @@ } }, { - "accuracy": 0.9700730443000793, + "accuracy": 0.9701465368270874, "total_bits": 797818880, "q_proj": { "group_size": { @@ -53698,7 +53698,7 @@ } }, { - "accuracy": 0.9769047906524256, + "accuracy": 0.9767278119137413, "total_bits": 911749120, "q_proj": { "group_size": { @@ -53750,7 +53750,7 @@ } }, { - "accuracy": 0.9849967485979984, + "accuracy": 0.9850050446234251, "total_bits": 942718976, "q_proj": { "group_size": { @@ -53802,7 +53802,7 @@ } }, { - "accuracy": 0.993953648758562, + "accuracy": 0.9939225966993132, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -53856,7 +53856,7 @@ ], "mlp": [ { - "accuracy": 0.6739994852166427, + "accuracy": 0.6728754043579102, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -53908,7 +53908,7 @@ } }, { - "accuracy": 0.6807177694220292, + "accuracy": 0.6797199751201428, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -53960,7 +53960,7 @@ } }, { - "accuracy": 0.7360566540768272, + "accuracy": 0.7352751179745323, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -54009,7 +54009,7 @@ } }, { - "accuracy": 0.7538644891036184, + "accuracy": 0.7531285034982782, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -54058,7 +54058,7 @@ } }, { - "accuracy": 0.8337045845232511, + "accuracy": 0.8332138312490363, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -54110,7 +54110,7 @@ } }, { - "accuracy": 0.846812649777061, + "accuracy": 0.8463182825791209, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -54162,7 +54162,7 @@ } }, { - "accuracy": 0.871094026063618, + "accuracy": 0.8706376427098325, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -54211,7 +54211,7 @@ } }, { - "accuracy": 0.9131500532752589, + "accuracy": 0.9128691899149042, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -54254,7 +54254,7 @@ } }, { - "accuracy": 0.9212563226097509, + "accuracy": 0.921006441116333, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -54297,7 +54297,7 @@ } }, { - "accuracy": 0.9155390199862028, + "accuracy": 0.9152700461839375, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -54349,7 +54349,7 @@ } }, { - "accuracy": 0.925956600590756, + "accuracy": 0.9257274991587588, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -54401,7 +54401,7 @@ } }, { - "accuracy": 0.9570910522812291, + "accuracy": 0.9569581464717263, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -54453,7 +54453,7 @@ } }, { - "accuracy": 0.963138316806994, + "accuracy": 0.9630308151245117, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -54505,7 +54505,7 @@ } }, { - "accuracy": 0.97675873731312, + "accuracy": 0.9766938074638969, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -54548,7 +54548,7 @@ } }, { - "accuracy": 0.9782578129517404, + "accuracy": 0.9781981769360995, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -54597,7 +54597,7 @@ } }, { - "accuracy": 0.9832149273470828, + "accuracy": 0.983159087206188, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -54643,7 +54643,7 @@ } }, { - "accuracy": 0.9938218519091606, + "accuracy": 0.9938095004150742, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -54687,7 +54687,7 @@ "model.layers.28.parallel_decoder": { "attn": [ { - "accuracy": 0.5957828320954975, + "accuracy": 0.5890737332795796, "total_bits": 320757760, "q_proj": { "group_size": { @@ -54751,7 +54751,7 @@ } }, { - "accuracy": 0.6124690708361173, + "accuracy": 0.6114240194621838, "total_bits": 329080832, "q_proj": { "group_size": { @@ -54815,7 +54815,7 @@ } }, { - "accuracy": 0.6845614533675344, + "accuracy": 0.6843720486289577, "total_bits": 336024576, "q_proj": { "group_size": { @@ -54879,7 +54879,7 @@ } }, { - "accuracy": 0.7553241127415707, + "accuracy": 0.7549653304250616, "total_bits": 401557504, "q_proj": { "group_size": { @@ -54943,7 +54943,7 @@ } }, { - "accuracy": 0.8002860295145136, + "accuracy": 0.8026244514866879, "total_bits": 475279360, "q_proj": { "group_size": { @@ -55007,7 +55007,7 @@ } }, { - "accuracy": 0.8093710196645636, + "accuracy": 0.8086798567520945, "total_bits": 475479040, "q_proj": { "group_size": { @@ -55071,7 +55071,7 @@ } }, { - "accuracy": 0.8770204092326918, + "accuracy": 0.8782029779333818, "total_bits": 609759232, "q_proj": { "group_size": { @@ -55123,7 +55123,7 @@ } }, { - "accuracy": 0.8885441453833329, + "accuracy": 0.8867920448905543, "total_bits": 610024448, "q_proj": { "group_size": { @@ -55175,7 +55175,7 @@ } }, { - "accuracy": 0.8995257616043091, + "accuracy": 0.8967921733856201, "total_bits": 615020544, "q_proj": { "group_size": { @@ -55227,7 +55227,7 @@ } }, { - "accuracy": 0.9038807341926977, + "accuracy": 0.9010616729134008, "total_bits": 623951872, "q_proj": { "group_size": { @@ -55279,7 +55279,7 @@ } }, { - "accuracy": 0.904286779855427, + "accuracy": 0.9021672637839067, "total_bits": 626473984, "q_proj": { "group_size": { @@ -55343,7 +55343,7 @@ } }, { - "accuracy": 0.9099326698403609, + "accuracy": 0.9102130940085963, "total_bits": 630355968, "q_proj": { "group_size": { @@ -55407,7 +55407,7 @@ } }, { - "accuracy": 0.9205502773586073, + "accuracy": 0.9204482153842324, "total_bits": 637362176, "q_proj": { "group_size": { @@ -55468,7 +55468,7 @@ } }, { - "accuracy": 0.927564865664432, + "accuracy": 0.9268271421131334, "total_bits": 646823936, "q_proj": { "group_size": { @@ -55529,7 +55529,7 @@ } }, { - "accuracy": 0.95609008952191, + "accuracy": 0.9557342497926009, "total_bits": 784740352, "q_proj": { "group_size": { @@ -55590,7 +55590,7 @@ } }, { - "accuracy": 0.9635162541740819, + "accuracy": 0.9632243827769631, "total_bits": 797818880, "q_proj": { "group_size": { @@ -55651,7 +55651,7 @@ } }, { - "accuracy": 0.9689839137227911, + "accuracy": 0.9685006455371254, "total_bits": 911749120, "q_proj": { "group_size": { @@ -55703,7 +55703,7 @@ } }, { - "accuracy": 0.9816232166792217, + "accuracy": 0.981605506257007, "total_bits": 942718976, "q_proj": { "group_size": { @@ -55755,7 +55755,7 @@ } }, { - "accuracy": 0.9915682555813539, + "accuracy": 0.9915106822001306, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -55809,7 +55809,7 @@ ], "mlp": [ { - "accuracy": 0.6807494665447034, + "accuracy": 0.679599686672813, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -55861,7 +55861,7 @@ } }, { - "accuracy": 0.6875252974660773, + "accuracy": 0.6863229651200144, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -55913,7 +55913,7 @@ } }, { - "accuracy": 0.7393286855597245, + "accuracy": 0.7383975982666016, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -55962,7 +55962,7 @@ } }, { - "accuracy": 0.7557300768400493, + "accuracy": 0.7548128429212069, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -56011,7 +56011,7 @@ } }, { - "accuracy": 0.8376202708796451, + "accuracy": 0.8370792489302785, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -56063,7 +56063,7 @@ } }, { - "accuracy": 0.8501967630888286, + "accuracy": 0.849679382223832, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -56115,7 +56115,7 @@ } }, { - "accuracy": 0.872776847136648, + "accuracy": 0.8722899838497764, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -56164,7 +56164,7 @@ } }, { - "accuracy": 0.9156048423365543, + "accuracy": 0.9153180875276264, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -56207,7 +56207,7 @@ } }, { - "accuracy": 0.9231915285712794, + "accuracy": 0.9229291677474976, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -56250,7 +56250,7 @@ } }, { - "accuracy": 0.9176044401369596, + "accuracy": 0.9173182688261333, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -56302,7 +56302,7 @@ } }, { - "accuracy": 0.9276604087729203, + "accuracy": 0.9273916608408878, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -56354,7 +56354,7 @@ } }, { - "accuracy": 0.9581753109630785, + "accuracy": 0.9580357231591877, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -56406,7 +56406,7 @@ } }, { - "accuracy": 0.9639913408379805, + "accuracy": 0.9638661648097792, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -56458,7 +56458,7 @@ } }, { - "accuracy": 0.977405984150736, + "accuracy": 0.9773367329647666, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -56501,7 +56501,7 @@ } }, { - "accuracy": 0.9787833769070474, + "accuracy": 0.9787171483039856, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -56550,7 +56550,7 @@ } }, { - "accuracy": 0.9833170363777562, + "accuracy": 0.9832578913161629, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -56596,7 +56596,7 @@ } }, { - "accuracy": 0.9939462828793024, + "accuracy": 0.9939357959910443, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -56640,7 +56640,7 @@ "model.layers.29.parallel_decoder": { "attn": [ { - "accuracy": 0.6083787867897434, + "accuracy": 0.6093172274137798, "total_bits": 320757760, "q_proj": { "group_size": { @@ -56704,7 +56704,7 @@ } }, { - "accuracy": 0.6284829189902859, + "accuracy": 0.6304681426600406, "total_bits": 329080832, "q_proj": { "group_size": { @@ -56768,7 +56768,7 @@ } }, { - "accuracy": 0.6885251246000591, + "accuracy": 0.6913789447985197, "total_bits": 336024576, "q_proj": { "group_size": { @@ -56832,7 +56832,7 @@ } }, { - "accuracy": 0.7568104141636899, + "accuracy": 0.7594637368854723, "total_bits": 401557504, "q_proj": { "group_size": { @@ -56896,7 +56896,7 @@ } }, { - "accuracy": 0.8064394875576621, + "accuracy": 0.8107257893210963, "total_bits": 475279360, "q_proj": { "group_size": { @@ -56960,7 +56960,7 @@ } }, { - "accuracy": 0.8120060845425254, + "accuracy": 0.8159098625183105, "total_bits": 475479040, "q_proj": { "group_size": { @@ -57024,7 +57024,7 @@ } }, { - "accuracy": 0.8866181248112729, + "accuracy": 0.8882084018305728, "total_bits": 609759232, "q_proj": { "group_size": { @@ -57076,7 +57076,7 @@ } }, { - "accuracy": 0.8948353466234709, + "accuracy": 0.8953099313535189, "total_bits": 610024448, "q_proj": { "group_size": { @@ -57128,7 +57128,7 @@ } }, { - "accuracy": 0.9032774849941856, + "accuracy": 0.9038325548171997, "total_bits": 615020544, "q_proj": { "group_size": { @@ -57180,7 +57180,7 @@ } }, { - "accuracy": 0.9075262922989695, + "accuracy": 0.9077924929167095, "total_bits": 623951872, "q_proj": { "group_size": { @@ -57232,7 +57232,7 @@ } }, { - "accuracy": 0.9066394128297505, + "accuracy": 0.9075991354490581, "total_bits": 626473984, "q_proj": { "group_size": { @@ -57296,7 +57296,7 @@ } }, { - "accuracy": 0.912817446809066, + "accuracy": 0.9124389824114347, "total_bits": 630355968, "q_proj": { "group_size": { @@ -57360,7 +57360,7 @@ } }, { - "accuracy": 0.9227360612467715, + "accuracy": 0.9235230119604814, "total_bits": 637362176, "q_proj": { "group_size": { @@ -57421,7 +57421,7 @@ } }, { - "accuracy": 0.9289020927328813, + "accuracy": 0.9289305523822182, "total_bits": 646823936, "q_proj": { "group_size": { @@ -57482,7 +57482,7 @@ } }, { - "accuracy": 0.9572328736907557, + "accuracy": 0.9572695838777643, "total_bits": 784740352, "q_proj": { "group_size": { @@ -57543,7 +57543,7 @@ } }, { - "accuracy": 0.9643964171409607, + "accuracy": 0.9645301793750963, "total_bits": 797818880, "q_proj": { "group_size": { @@ -57604,7 +57604,7 @@ } }, { - "accuracy": 0.9710068545843425, + "accuracy": 0.9709844181412145, "total_bits": 911749120, "q_proj": { "group_size": { @@ -57656,7 +57656,7 @@ } }, { - "accuracy": 0.982342635330401, + "accuracy": 0.9823691641029558, "total_bits": 942718976, "q_proj": { "group_size": { @@ -57708,7 +57708,7 @@ } }, { - "accuracy": 0.99238478823712, + "accuracy": 0.9924311237899881, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -57762,7 +57762,7 @@ ], "mlp": [ { - "accuracy": 0.6665726711875515, + "accuracy": 0.6653244620875308, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -57814,7 +57814,7 @@ } }, { - "accuracy": 0.6735293488753469, + "accuracy": 0.6724932821173417, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -57866,7 +57866,7 @@ } }, { - "accuracy": 0.7257549888209293, + "accuracy": 0.7248385830929405, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -57915,7 +57915,7 @@ } }, { - "accuracy": 0.7421153972023411, + "accuracy": 0.7412113892404657, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -57964,7 +57964,7 @@ } }, { - "accuracy": 0.8300229122764186, + "accuracy": 0.8294781885649029, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -58016,7 +58016,7 @@ } }, { - "accuracy": 0.8432933782276354, + "accuracy": 0.8427816190217671, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -58068,7 +58068,7 @@ } }, { - "accuracy": 0.865992684113352, + "accuracy": 0.8655490624277216, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -58117,7 +58117,7 @@ } }, { - "accuracy": 0.9118945096668444, + "accuracy": 0.9116196255934865, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -58160,7 +58160,7 @@ } }, { - "accuracy": 0.919784847058748, + "accuracy": 0.9195174417997661, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -58203,7 +58203,7 @@ } }, { - "accuracy": 0.9137809778514662, + "accuracy": 0.9135108245046515, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -58255,7 +58255,7 @@ } }, { - "accuracy": 0.9243505816710622, + "accuracy": 0.9241060269506354, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -58307,7 +58307,7 @@ } }, { - "accuracy": 0.9562689003191496, + "accuracy": 0.956141195799175, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -58359,7 +58359,7 @@ } }, { - "accuracy": 0.9623833330054032, + "accuracy": 0.9622540379825392, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -58411,7 +58411,7 @@ } }, { - "accuracy": 0.9764601478451177, + "accuracy": 0.9763891147939783, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -58454,7 +58454,7 @@ } }, { - "accuracy": 0.9778224998398831, + "accuracy": 0.977740300329108, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -58503,7 +58503,7 @@ } }, { - "accuracy": 0.9823531163366217, + "accuracy": 0.9822763800621033, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -58549,7 +58549,7 @@ } }, { - "accuracy": 0.9937507416072645, + "accuracy": 0.9937348632436049, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -58593,7 +58593,7 @@ "model.layers.30.parallel_decoder": { "attn": [ { - "accuracy": 0.6259599233928479, + "accuracy": 0.624770766810367, "total_bits": 320757760, "q_proj": { "group_size": { @@ -58657,7 +58657,7 @@ } }, { - "accuracy": 0.6422422308670848, + "accuracy": 0.6380404673124614, "total_bits": 329080832, "q_proj": { "group_size": { @@ -58721,7 +58721,7 @@ } }, { - "accuracy": 0.6972393989562988, + "accuracy": 0.6967056926928068, "total_bits": 336024576, "q_proj": { "group_size": { @@ -58785,7 +58785,7 @@ } }, { - "accuracy": 0.7676543185585424, + "accuracy": 0.7671158188267758, "total_bits": 401557504, "q_proj": { "group_size": { @@ -58849,7 +58849,7 @@ } }, { - "accuracy": 0.8127887625443309, + "accuracy": 0.813088944083766, "total_bits": 475279360, "q_proj": { "group_size": { @@ -58913,7 +58913,7 @@ } }, { - "accuracy": 0.8144950866699219, + "accuracy": 0.8144933800948293, "total_bits": 475479040, "q_proj": { "group_size": { @@ -58977,7 +58977,7 @@ } }, { - "accuracy": 0.895874349694503, + "accuracy": 0.8945301206488359, "total_bits": 609759232, "q_proj": { "group_size": { @@ -59029,7 +59029,7 @@ } }, { - "accuracy": 0.8996490742030897, + "accuracy": 0.8979673197394923, "total_bits": 610024448, "q_proj": { "group_size": { @@ -59081,7 +59081,7 @@ } }, { - "accuracy": 0.9057846759494982, + "accuracy": 0.9067898612273366, "total_bits": 615020544, "q_proj": { "group_size": { @@ -59133,7 +59133,7 @@ } }, { - "accuracy": 0.9100261738425807, + "accuracy": 0.911182767466495, "total_bits": 623951872, "q_proj": { "group_size": { @@ -59185,7 +59185,7 @@ } }, { - "accuracy": 0.9076555026204962, + "accuracy": 0.9074176361686305, "total_bits": 626473984, "q_proj": { "group_size": { @@ -59249,7 +59249,7 @@ } }, { - "accuracy": 0.9131737633755332, + "accuracy": 0.91344658324593, "total_bits": 630355968, "q_proj": { "group_size": { @@ -59313,7 +59313,7 @@ } }, { - "accuracy": 0.9250707312634117, + "accuracy": 0.9248035957938746, "total_bits": 637362176, "q_proj": { "group_size": { @@ -59374,7 +59374,7 @@ } }, { - "accuracy": 0.9312782413081119, + "accuracy": 0.9311538119065135, "total_bits": 646823936, "q_proj": { "group_size": { @@ -59435,7 +59435,7 @@ } }, { - "accuracy": 0.959219123187818, + "accuracy": 0.9591606980875919, "total_bits": 784740352, "q_proj": { "group_size": { @@ -59496,7 +59496,7 @@ } }, { - "accuracy": 0.9656542351371363, + "accuracy": 0.9654391878529599, "total_bits": 797818880, "q_proj": { "group_size": { @@ -59557,7 +59557,7 @@ } }, { - "accuracy": 0.9726732686946267, + "accuracy": 0.9727924842583505, "total_bits": 911749120, "q_proj": { "group_size": { @@ -59609,7 +59609,7 @@ } }, { - "accuracy": 0.9827145557654532, + "accuracy": 0.9826842969969699, "total_bits": 942718976, "q_proj": { "group_size": { @@ -59661,7 +59661,7 @@ } }, { - "accuracy": 0.9930528033720819, + "accuracy": 0.9929354520220506, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -59715,7 +59715,7 @@ ], "mlp": [ { - "accuracy": 0.6645467908758866, + "accuracy": 0.6631467969794023, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -59767,7 +59767,7 @@ } }, { - "accuracy": 0.6717994589554637, + "accuracy": 0.6703783587405556, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -59819,7 +59819,7 @@ } }, { - "accuracy": 0.7221655092741314, + "accuracy": 0.7209378041719136, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -59868,7 +59868,7 @@ } }, { - "accuracy": 0.7380497330113461, + "accuracy": 0.7368455937034206, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -59917,7 +59917,7 @@ } }, { - "accuracy": 0.8288128752457469, + "accuracy": 0.8281140829387464, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -59969,7 +59969,7 @@ } }, { - "accuracy": 0.8421891363043534, + "accuracy": 0.8414942339846962, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -60021,7 +60021,7 @@ } }, { - "accuracy": 0.8641240094837389, + "accuracy": 0.8634593235818964, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -60070,7 +60070,7 @@ } }, { - "accuracy": 0.9112744394101595, + "accuracy": 0.9108790472934121, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -60113,7 +60113,7 @@ } }, { - "accuracy": 0.919187633614791, + "accuracy": 0.9188225206575895, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -60156,7 +60156,7 @@ } }, { - "accuracy": 0.9132199977573595, + "accuracy": 0.9128118753433228, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -60208,7 +60208,7 @@ } }, { - "accuracy": 0.92377129353975, + "accuracy": 0.9234395968286615, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -60260,7 +60260,7 @@ } }, { - "accuracy": 0.9559953589188426, + "accuracy": 0.9557921980556688, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -60312,7 +60312,7 @@ } }, { - "accuracy": 0.9621017575263977, + "accuracy": 0.9619346417878804, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -60364,7 +60364,7 @@ } }, { - "accuracy": 0.9763038174102181, + "accuracy": 0.9761993712500522, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -60407,7 +60407,7 @@ } }, { - "accuracy": 0.97767321373287, + "accuracy": 0.9775832060136294, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -60456,7 +60456,7 @@ } }, { - "accuracy": 0.9820256593980288, + "accuracy": 0.9819498360157013, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -60502,7 +60502,7 @@ } }, { - "accuracy": 0.993745041520972, + "accuracy": 0.9937177782780245, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -60546,7 +60546,7 @@ "model.layers.31.parallel_decoder": { "attn": [ { - "accuracy": 0.5904956114919562, + "accuracy": 0.5826703874688399, "total_bits": 320757760, "q_proj": { "group_size": { @@ -60610,7 +60610,7 @@ } }, { - "accuracy": 0.6113428065651341, + "accuracy": 0.6067323935659308, "total_bits": 329080832, "q_proj": { "group_size": { @@ -60674,7 +60674,7 @@ } }, { - "accuracy": 0.6682754566794948, + "accuracy": 0.66664289173327, "total_bits": 336024576, "q_proj": { "group_size": { @@ -60738,7 +60738,7 @@ } }, { - "accuracy": 0.7438325129057232, + "accuracy": 0.7415217098436857, "total_bits": 401557504, "q_proj": { "group_size": { @@ -60802,7 +60802,7 @@ } }, { - "accuracy": 0.7989620911447626, + "accuracy": 0.795212281377692, "total_bits": 475279360, "q_proj": { "group_size": { @@ -60866,7 +60866,7 @@ } }, { - "accuracy": 0.8017839883503161, + "accuracy": 0.7990801710831492, "total_bits": 475479040, "q_proj": { "group_size": { @@ -60930,7 +60930,7 @@ } }, { - "accuracy": 0.8871467364461798, + "accuracy": 0.8866545903055292, "total_bits": 609759232, "q_proj": { "group_size": { @@ -60982,7 +60982,7 @@ } }, { - "accuracy": 0.892099656556782, + "accuracy": 0.8903883507377223, "total_bits": 610024448, "q_proj": { "group_size": { @@ -61034,7 +61034,7 @@ } }, { - "accuracy": 0.8995643540432579, + "accuracy": 0.8984406245382208, "total_bits": 615020544, "q_proj": { "group_size": { @@ -61086,7 +61086,7 @@ } }, { - "accuracy": 0.9039143700348704, + "accuracy": 0.9026776614942049, "total_bits": 623951872, "q_proj": { "group_size": { @@ -61138,7 +61138,7 @@ } }, { - "accuracy": 0.9000862083936992, + "accuracy": 0.8990844801852578, "total_bits": 626473984, "q_proj": { "group_size": { @@ -61202,7 +61202,7 @@ } }, { - "accuracy": 0.9073542795683208, + "accuracy": 0.9067166290785137, "total_bits": 630355968, "q_proj": { "group_size": { @@ -61266,7 +61266,7 @@ } }, { - "accuracy": 0.9189116013677496, + "accuracy": 0.9191232166792217, "total_bits": 637362176, "q_proj": { "group_size": { @@ -61327,7 +61327,7 @@ } }, { - "accuracy": 0.9252575510426572, + "accuracy": 0.9253438773908114, "total_bits": 646823936, "q_proj": { "group_size": { @@ -61388,7 +61388,7 @@ } }, { - "accuracy": 0.9562188481029711, + "accuracy": 0.9557603942720514, "total_bits": 784740352, "q_proj": { "group_size": { @@ -61449,7 +61449,7 @@ } }, { - "accuracy": 0.9628692739888242, + "accuracy": 0.9626511241260328, "total_bits": 797818880, "q_proj": { "group_size": { @@ -61510,7 +61510,7 @@ } }, { - "accuracy": 0.971124507878956, + "accuracy": 0.9702649273370442, "total_bits": 911749120, "q_proj": { "group_size": { @@ -61562,7 +61562,7 @@ } }, { - "accuracy": 0.9814147902162451, + "accuracy": 0.9813818837466993, "total_bits": 942718976, "q_proj": { "group_size": { @@ -61614,7 +61614,7 @@ } }, { - "accuracy": 0.9924523093198475, + "accuracy": 0.9923648732273203, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -61668,7 +61668,7 @@ ], "mlp": [ { - "accuracy": 0.6477695766248202, + "accuracy": 0.64626061288934, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -61720,7 +61720,7 @@ } }, { - "accuracy": 0.655418872833252, + "accuracy": 0.654052784568385, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -61772,7 +61772,7 @@ } }, { - "accuracy": 0.705962105801231, + "accuracy": 0.7047306361951327, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -61821,7 +61821,7 @@ } }, { - "accuracy": 0.7217687305651213, + "accuracy": 0.7205654194480495, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -61870,7 +61870,7 @@ } }, { - "accuracy": 0.820084672225149, + "accuracy": 0.8193211304514032, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -61922,7 +61922,7 @@ } }, { - "accuracy": 0.8341414677469354, + "accuracy": 0.8334364389118395, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -61974,7 +61974,7 @@ } }, { - "accuracy": 0.8560449324156109, + "accuracy": 0.8553996086120605, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -62023,7 +62023,7 @@ } }, { - "accuracy": 0.9068963904129832, + "accuracy": 0.9064949688158537, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -62066,7 +62066,7 @@ } }, { - "accuracy": 0.9151185060802259, + "accuracy": 0.9147582556072035, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -62109,7 +62109,7 @@ } }, { - "accuracy": 0.9087353882036711, + "accuracy": 0.9083156585693359, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -62161,7 +62161,7 @@ } }, { - "accuracy": 0.9198729113528603, + "accuracy": 0.9195228940562198, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -62213,7 +62213,7 @@ } }, { - "accuracy": 0.9537028670310974, + "accuracy": 0.9534846763861806, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -62265,7 +62265,7 @@ } }, { - "accuracy": 0.9601298852970725, + "accuracy": 0.9599594913030925, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -62317,7 +62317,7 @@ } }, { - "accuracy": 0.9750456464918036, + "accuracy": 0.9749389133955303, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -62360,7 +62360,7 @@ } }, { - "accuracy": 0.9764354448569449, + "accuracy": 0.9763244139520746, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -62409,7 +62409,7 @@ } }, { - "accuracy": 0.9806822883455377, + "accuracy": 0.9805841163585061, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -62455,7 +62455,7 @@ } }, { - "accuracy": 0.993097838602568, + "accuracy": 0.9930926184905203, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -62499,7 +62499,7 @@ "model.layers.32.parallel_decoder": { "attn": [ { - "accuracy": 0.629795977943822, + "accuracy": 0.6314227455540707, "total_bits": 320757760, "q_proj": { "group_size": { @@ -62563,7 +62563,7 @@ } }, { - "accuracy": 0.6484688959623638, + "accuracy": 0.6411813434801603, "total_bits": 329080832, "q_proj": { "group_size": { @@ -62627,7 +62627,7 @@ } }, { - "accuracy": 0.6988850392793354, + "accuracy": 0.6984822373641164, "total_bits": 336024576, "q_proj": { "group_size": { @@ -62691,7 +62691,7 @@ } }, { - "accuracy": 0.7679750040957802, + "accuracy": 0.7666117266604775, "total_bits": 401557504, "q_proj": { "group_size": { @@ -62755,7 +62755,7 @@ } }, { - "accuracy": 0.8181548369558234, + "accuracy": 0.8184554953324168, "total_bits": 475279360, "q_proj": { "group_size": { @@ -62819,7 +62819,7 @@ } }, { - "accuracy": 0.8210365772247314, + "accuracy": 0.8221613858875475, "total_bits": 475479040, "q_proj": { "group_size": { @@ -62883,7 +62883,7 @@ } }, { - "accuracy": 0.8970079233771876, + "accuracy": 0.8972459843284205, "total_bits": 609759232, "q_proj": { "group_size": { @@ -62935,7 +62935,7 @@ } }, { - "accuracy": 0.9020615941599796, + "accuracy": 0.900819458459553, "total_bits": 610024448, "q_proj": { "group_size": { @@ -62987,7 +62987,7 @@ } }, { - "accuracy": 0.9103090386641652, + "accuracy": 0.9102019949963218, "total_bits": 615020544, "q_proj": { "group_size": { @@ -63039,7 +63039,7 @@ } }, { - "accuracy": 0.9145241724817377, + "accuracy": 0.9143048022922716, "total_bits": 623951872, "q_proj": { "group_size": { @@ -63091,7 +63091,7 @@ } }, { - "accuracy": 0.9099615561334711, + "accuracy": 0.9096145441657618, "total_bits": 626473984, "q_proj": { "group_size": { @@ -63155,7 +63155,7 @@ } }, { - "accuracy": 0.9154264550460012, + "accuracy": 0.9169587085121557, "total_bits": 630355968, "q_proj": { "group_size": { @@ -63219,7 +63219,7 @@ } }, { - "accuracy": 0.9261006179608797, + "accuracy": 0.9259929468757228, "total_bits": 637362176, "q_proj": { "group_size": { @@ -63280,7 +63280,7 @@ } }, { - "accuracy": 0.9317886076475445, + "accuracy": 0.9318766970383494, "total_bits": 646823936, "q_proj": { "group_size": { @@ -63341,7 +63341,7 @@ } }, { - "accuracy": 0.9593017352254767, + "accuracy": 0.9594828298217372, "total_bits": 784740352, "q_proj": { "group_size": { @@ -63402,7 +63402,7 @@ } }, { - "accuracy": 0.9659130385047511, + "accuracy": 0.9660888659326654, "total_bits": 797818880, "q_proj": { "group_size": { @@ -63463,7 +63463,7 @@ } }, { - "accuracy": 0.9735381195419713, + "accuracy": 0.9734599841268439, "total_bits": 911749120, "q_proj": { "group_size": { @@ -63515,7 +63515,7 @@ } }, { - "accuracy": 0.983134189718648, + "accuracy": 0.9831868049345518, "total_bits": 942718976, "q_proj": { "group_size": { @@ -63567,7 +63567,7 @@ } }, { - "accuracy": 0.9931638625107313, + "accuracy": 0.9932185348711515, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -63621,7 +63621,7 @@ ], "mlp": [ { - "accuracy": 0.6473975934480366, + "accuracy": 0.6457244471499795, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -63673,7 +63673,7 @@ } }, { - "accuracy": 0.6550811466417814, + "accuracy": 0.6534031315853721, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -63725,7 +63725,7 @@ } }, { - "accuracy": 0.7050713488930149, + "accuracy": 0.7036054510819285, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -63774,7 +63774,7 @@ } }, { - "accuracy": 0.7208580217863384, + "accuracy": 0.7194147862886128, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -63823,7 +63823,7 @@ } }, { - "accuracy": 0.8196859610708136, + "accuracy": 0.8187807359193501, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -63875,7 +63875,7 @@ } }, { - "accuracy": 0.8337357546153822, + "accuracy": 0.8329804947501734, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -63927,7 +63927,7 @@ } }, { - "accuracy": 0.8555573036796168, + "accuracy": 0.854846904152318, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -63976,7 +63976,7 @@ } }, { - "accuracy": 0.9066041394283897, + "accuracy": 0.9061329176551417, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -64019,7 +64019,7 @@ } }, { - "accuracy": 0.914906194335536, + "accuracy": 0.9145238901439466, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -64062,7 +64062,7 @@ } }, { - "accuracy": 0.9085694927918284, + "accuracy": 0.9081132286473325, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -64114,7 +64114,7 @@ } }, { - "accuracy": 0.9197032326146176, + "accuracy": 0.9193144095571417, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -64166,7 +64166,7 @@ } }, { - "accuracy": 0.953656394230692, + "accuracy": 0.9534246701943248, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -64218,7 +64218,7 @@ } }, { - "accuracy": 0.9600877071681776, + "accuracy": 0.9598904628502696, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -64270,7 +64270,7 @@ } }, { - "accuracy": 0.975082063361218, + "accuracy": 0.9749586848836196, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -64313,7 +64313,7 @@ } }, { - "accuracy": 0.9764837261877561, + "accuracy": 0.9763666124720323, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -64362,7 +64362,7 @@ } }, { - "accuracy": 0.9807487271333996, + "accuracy": 0.9806470949398843, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -64408,7 +64408,7 @@ } }, { - "accuracy": 0.9933875038435585, + "accuracy": 0.9933569023483678, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -64452,7 +64452,7 @@ "model.layers.33.parallel_decoder": { "attn": [ { - "accuracy": 0.6279209036576121, + "accuracy": 0.6282115986472683, "total_bits": 320757760, "q_proj": { "group_size": { @@ -64516,7 +64516,7 @@ } }, { - "accuracy": 0.6443516580682052, + "accuracy": 0.6446107563219572, "total_bits": 329080832, "q_proj": { "group_size": { @@ -64580,7 +64580,7 @@ } }, { - "accuracy": 0.692165123788934, + "accuracy": 0.69255741018998, "total_bits": 336024576, "q_proj": { "group_size": { @@ -64644,7 +64644,7 @@ } }, { - "accuracy": 0.762463419060958, + "accuracy": 0.7620014391447368, "total_bits": 401557504, "q_proj": { "group_size": { @@ -64708,7 +64708,7 @@ } }, { - "accuracy": 0.8129260163558156, + "accuracy": 0.8125455003035695, "total_bits": 475279360, "q_proj": { "group_size": { @@ -64772,7 +64772,7 @@ } }, { - "accuracy": 0.8168183878848427, + "accuracy": 0.8162648928792853, "total_bits": 475479040, "q_proj": { "group_size": { @@ -64836,7 +64836,7 @@ } }, { - "accuracy": 0.8950637265255577, + "accuracy": 0.8947292880008095, "total_bits": 609759232, "q_proj": { "group_size": { @@ -64888,7 +64888,7 @@ } }, { - "accuracy": 0.8993674516677856, + "accuracy": 0.8993922221033197, "total_bits": 610024448, "q_proj": { "group_size": { @@ -64940,7 +64940,7 @@ } }, { - "accuracy": 0.9067812969810084, + "accuracy": 0.9066955917759946, "total_bits": 615020544, "q_proj": { "group_size": { @@ -64992,7 +64992,7 @@ } }, { - "accuracy": 0.9113574404465525, + "accuracy": 0.9112766416449296, "total_bits": 623951872, "q_proj": { "group_size": { @@ -65044,7 +65044,7 @@ } }, { - "accuracy": 0.9073784790540996, + "accuracy": 0.9070402509287784, "total_bits": 626473984, "q_proj": { "group_size": { @@ -65108,7 +65108,7 @@ } }, { - "accuracy": 0.913601749821713, + "accuracy": 0.9137222578651026, "total_bits": 630355968, "q_proj": { "group_size": { @@ -65172,7 +65172,7 @@ } }, { - "accuracy": 0.9241195540679128, + "accuracy": 0.9240405434056332, "total_bits": 637362176, "q_proj": { "group_size": { @@ -65233,7 +65233,7 @@ } }, { - "accuracy": 0.9300136440678647, + "accuracy": 0.9301602526714927, "total_bits": 646823936, "q_proj": { "group_size": { @@ -65294,7 +65294,7 @@ } }, { - "accuracy": 0.9587480959139372, + "accuracy": 0.958749322514785, "total_bits": 784740352, "q_proj": { "group_size": { @@ -65355,7 +65355,7 @@ } }, { - "accuracy": 0.9651598930358887, + "accuracy": 0.9651957938545629, "total_bits": 797818880, "q_proj": { "group_size": { @@ -65416,7 +65416,7 @@ } }, { - "accuracy": 0.9731079308610213, + "accuracy": 0.9729349079884981, "total_bits": 911749120, "q_proj": { "group_size": { @@ -65468,7 +65468,7 @@ } }, { - "accuracy": 0.9825017342441961, + "accuracy": 0.9825191137037779, "total_bits": 942718976, "q_proj": { "group_size": { @@ -65520,7 +65520,7 @@ } }, { - "accuracy": 0.9930030962354258, + "accuracy": 0.9929701623163725, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -65574,7 +65574,7 @@ ], "mlp": [ { - "accuracy": 0.6503290879098993, + "accuracy": 0.6482865433943898, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -65626,7 +65626,7 @@ } }, { - "accuracy": 0.6580842670641447, + "accuracy": 0.6560340931541042, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -65678,7 +65678,7 @@ } }, { - "accuracy": 0.7063677687393992, + "accuracy": 0.7044493273684853, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -65727,7 +65727,7 @@ } }, { - "accuracy": 0.7215262463218288, + "accuracy": 0.7196461025037264, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -65776,7 +65776,7 @@ } }, { - "accuracy": 0.8211258712567782, + "accuracy": 0.8200483824077406, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -65828,7 +65828,7 @@ } }, { - "accuracy": 0.8351726030048571, + "accuracy": 0.8342344007993999, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -65880,7 +65880,7 @@ } }, { - "accuracy": 0.8562395572662354, + "accuracy": 0.8553903479325144, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -65929,7 +65929,7 @@ } }, { - "accuracy": 0.9072542567002146, + "accuracy": 0.9067489536185014, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -65972,7 +65972,7 @@ } }, { - "accuracy": 0.9154961611095228, + "accuracy": 0.9150279007459942, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -66015,7 +66015,7 @@ } }, { - "accuracy": 0.9091977382961073, + "accuracy": 0.9087123180690565, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -66067,7 +66067,7 @@ } }, { - "accuracy": 0.9202819058769628, + "accuracy": 0.9197991458993209, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -66119,7 +66119,7 @@ } }, { - "accuracy": 0.9539370254466408, + "accuracy": 0.9536874513877065, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -66171,7 +66171,7 @@ } }, { - "accuracy": 0.9603359573765805, + "accuracy": 0.960118293762207, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -66223,7 +66223,7 @@ } }, { - "accuracy": 0.975194010295366, + "accuracy": 0.9750696483411287, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -66266,7 +66266,7 @@ } }, { - "accuracy": 0.9766052067279816, + "accuracy": 0.9764717566339594, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -66315,7 +66315,7 @@ } }, { - "accuracy": 0.9807161657433761, + "accuracy": 0.9805986269524223, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -66361,7 +66361,7 @@ } }, { - "accuracy": 0.993429004361755, + "accuracy": 0.9934035774908567, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -66405,7 +66405,7 @@ "model.layers.34.parallel_decoder": { "attn": [ { - "accuracy": 0.6424451125295538, + "accuracy": 0.6460426481146562, "total_bits": 320757760, "q_proj": { "group_size": { @@ -66469,7 +66469,7 @@ } }, { - "accuracy": 0.6586596338372481, + "accuracy": 0.6651631656445955, "total_bits": 329080832, "q_proj": { "group_size": { @@ -66533,7 +66533,7 @@ } }, { - "accuracy": 0.6992039429514032, + "accuracy": 0.7044269913121274, "total_bits": 336024576, "q_proj": { "group_size": { @@ -66597,7 +66597,7 @@ } }, { - "accuracy": 0.7684934515702098, + "accuracy": 0.7724471594157972, "total_bits": 401557504, "q_proj": { "group_size": { @@ -66661,7 +66661,7 @@ } }, { - "accuracy": 0.8215607718417519, + "accuracy": 0.822956875750893, "total_bits": 475279360, "q_proj": { "group_size": { @@ -66725,7 +66725,7 @@ } }, { - "accuracy": 0.8255545089119359, + "accuracy": 0.8257402871784411, "total_bits": 475479040, "q_proj": { "group_size": { @@ -66789,7 +66789,7 @@ } }, { - "accuracy": 0.8962994688435605, + "accuracy": 0.8957673562200446, "total_bits": 609759232, "q_proj": { "group_size": { @@ -66841,7 +66841,7 @@ } }, { - "accuracy": 0.9015377383483083, + "accuracy": 0.901236565489518, "total_bits": 610024448, "q_proj": { "group_size": { @@ -66893,7 +66893,7 @@ } }, { - "accuracy": 0.9084869811409398, + "accuracy": 0.9089270014511912, "total_bits": 615020544, "q_proj": { "group_size": { @@ -66945,7 +66945,7 @@ } }, { - "accuracy": 0.9133854539770829, + "accuracy": 0.9138647882561934, "total_bits": 623951872, "q_proj": { "group_size": { @@ -66997,7 +66997,7 @@ } }, { - "accuracy": 0.9121412979929071, + "accuracy": 0.912263506337216, "total_bits": 626473984, "q_proj": { "group_size": { @@ -67061,7 +67061,7 @@ } }, { - "accuracy": 0.9178872610393324, + "accuracy": 0.9185702110591688, "total_bits": 630355968, "q_proj": { "group_size": { @@ -67125,7 +67125,7 @@ } }, { - "accuracy": 0.9250044885434603, + "accuracy": 0.9259170231066252, "total_bits": 637362176, "q_proj": { "group_size": { @@ -67186,7 +67186,7 @@ } }, { - "accuracy": 0.9313257117020457, + "accuracy": 0.932315330756338, "total_bits": 646823936, "q_proj": { "group_size": { @@ -67247,7 +67247,7 @@ } }, { - "accuracy": 0.9589409451735647, + "accuracy": 0.9595490631304289, "total_bits": 784740352, "q_proj": { "group_size": { @@ -67308,7 +67308,7 @@ } }, { - "accuracy": 0.9657665647958454, + "accuracy": 0.9660093188285828, "total_bits": 797818880, "q_proj": { "group_size": { @@ -67369,7 +67369,7 @@ } }, { - "accuracy": 0.9723878377362302, + "accuracy": 0.9728262142131203, "total_bits": 911749120, "q_proj": { "group_size": { @@ -67421,7 +67421,7 @@ } }, { - "accuracy": 0.9828568195041857, + "accuracy": 0.9830105790966436, "total_bits": 942718976, "q_proj": { "group_size": { @@ -67473,7 +67473,7 @@ } }, { - "accuracy": 0.9928183704614639, + "accuracy": 0.9928979787387346, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -67527,7 +67527,7 @@ ], "mlp": [ { - "accuracy": 0.6637116482383327, + "accuracy": 0.6616372811166864, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -67579,7 +67579,7 @@ } }, { - "accuracy": 0.6711180586563914, + "accuracy": 0.66909795058401, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -67631,7 +67631,7 @@ } }, { - "accuracy": 0.7168567557083934, + "accuracy": 0.7151819028352436, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -67680,7 +67680,7 @@ } }, { - "accuracy": 0.731956582320364, + "accuracy": 0.7303380715219598, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -67729,7 +67729,7 @@ } }, { - "accuracy": 0.8276517265721371, + "accuracy": 0.8266221975025377, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -67781,7 +67781,7 @@ } }, { - "accuracy": 0.8412390257182875, + "accuracy": 0.840211316158897, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -67833,7 +67833,7 @@ } }, { - "accuracy": 0.8614632204959267, + "accuracy": 0.8605538543901945, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -67882,7 +67882,7 @@ } }, { - "accuracy": 0.9101262782749376, + "accuracy": 0.909626377256293, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -67925,7 +67925,7 @@ } }, { - "accuracy": 0.9182490361364264, + "accuracy": 0.917769193649292, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -67968,7 +67968,7 @@ } }, { - "accuracy": 0.9125268647545263, + "accuracy": 0.9120207209336131, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -68020,7 +68020,7 @@ } }, { - "accuracy": 0.9232120764882941, + "accuracy": 0.922742065630461, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -68072,7 +68072,7 @@ } }, { - "accuracy": 0.9555844099898088, + "accuracy": 0.9553341896910417, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -68124,7 +68124,7 @@ } }, { - "accuracy": 0.96176888127076, + "accuracy": 0.9615242951794675, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -68176,7 +68176,7 @@ } }, { - "accuracy": 0.975914401443381, + "accuracy": 0.9757787349976992, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -68219,7 +68219,7 @@ } }, { - "accuracy": 0.9774474642778698, + "accuracy": 0.977317003827346, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -68268,7 +68268,7 @@ } }, { - "accuracy": 0.9814368831483942, + "accuracy": 0.9813217316803179, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -68314,7 +68314,7 @@ } }, { - "accuracy": 0.993411439029794, + "accuracy": 0.9933913467746032, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -68358,7 +68358,7 @@ "model.layers.35.parallel_decoder": { "attn": [ { - "accuracy": 0.6465980630171926, + "accuracy": 0.6493247935646458, "total_bits": 320757760, "q_proj": { "group_size": { @@ -68422,7 +68422,7 @@ } }, { - "accuracy": 0.662907600402832, + "accuracy": 0.6656991055137234, "total_bits": 329080832, "q_proj": { "group_size": { @@ -68486,7 +68486,7 @@ } }, { - "accuracy": 0.7109487433182566, + "accuracy": 0.7149473240501003, "total_bits": 336024576, "q_proj": { "group_size": { @@ -68550,7 +68550,7 @@ } }, { - "accuracy": 0.7713603722421747, + "accuracy": 0.773094001569246, "total_bits": 401557504, "q_proj": { "group_size": { @@ -68614,7 +68614,7 @@ } }, { - "accuracy": 0.8236599344956248, + "accuracy": 0.8255921037573564, "total_bits": 475279360, "q_proj": { "group_size": { @@ -68678,7 +68678,7 @@ } }, { - "accuracy": 0.8286597101311934, + "accuracy": 0.8282992212395919, "total_bits": 475479040, "q_proj": { "group_size": { @@ -68742,7 +68742,7 @@ } }, { - "accuracy": 0.8959627465197915, + "accuracy": 0.8964072654121801, "total_bits": 609759232, "q_proj": { "group_size": { @@ -68794,7 +68794,7 @@ } }, { - "accuracy": 0.9023268662000957, + "accuracy": 0.9032096172633924, "total_bits": 610024448, "q_proj": { "group_size": { @@ -68846,7 +68846,7 @@ } }, { - "accuracy": 0.9101066589355469, + "accuracy": 0.9112207889556885, "total_bits": 615020544, "q_proj": { "group_size": { @@ -68898,7 +68898,7 @@ } }, { - "accuracy": 0.9148847303892437, + "accuracy": 0.9153132250434474, "total_bits": 623951872, "q_proj": { "group_size": { @@ -68950,7 +68950,7 @@ } }, { - "accuracy": 0.9134670809695595, + "accuracy": 0.9141908570339805, "total_bits": 626473984, "q_proj": { "group_size": { @@ -69014,7 +69014,7 @@ } }, { - "accuracy": 0.9199141891379106, + "accuracy": 0.9200823683487742, "total_bits": 630355968, "q_proj": { "group_size": { @@ -69078,7 +69078,7 @@ } }, { - "accuracy": 0.927891775181419, + "accuracy": 0.9278101419147692, "total_bits": 637362176, "q_proj": { "group_size": { @@ -69139,7 +69139,7 @@ } }, { - "accuracy": 0.933735119669061, + "accuracy": 0.9349461919382999, "total_bits": 646823936, "q_proj": { "group_size": { @@ -69200,7 +69200,7 @@ } }, { - "accuracy": 0.9602641400537992, + "accuracy": 0.9608807093218753, "total_bits": 784740352, "q_proj": { "group_size": { @@ -69261,7 +69261,7 @@ } }, { - "accuracy": 0.9671593120223597, + "accuracy": 0.9674560490407442, "total_bits": 797818880, "q_proj": { "group_size": { @@ -69322,7 +69322,7 @@ } }, { - "accuracy": 0.9733771022997404, + "accuracy": 0.9735774993896484, "total_bits": 911749120, "q_proj": { "group_size": { @@ -69374,7 +69374,7 @@ } }, { - "accuracy": 0.9832662438091478, + "accuracy": 0.9835290877442611, "total_bits": 942718976, "q_proj": { "group_size": { @@ -69426,7 +69426,7 @@ } }, { - "accuracy": 0.9930619095501146, + "accuracy": 0.993023430830554, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -69480,7 +69480,7 @@ ], "mlp": [ { - "accuracy": 0.6830350223340487, + "accuracy": 0.6810321054960552, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -69532,7 +69532,7 @@ } }, { - "accuracy": 0.6900827508223684, + "accuracy": 0.6882693140130294, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -69584,7 +69584,7 @@ } }, { - "accuracy": 0.7316516073126542, + "accuracy": 0.7300675291764109, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -69633,7 +69633,7 @@ } }, { - "accuracy": 0.7462367258573833, + "accuracy": 0.7446803293730083, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -69682,7 +69682,7 @@ } }, { - "accuracy": 0.8374404907226562, + "accuracy": 0.8364784592076352, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -69734,7 +69734,7 @@ } }, { - "accuracy": 0.8501967505404824, + "accuracy": 0.8493359716315019, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -69786,7 +69786,7 @@ } }, { - "accuracy": 0.8689852137314646, + "accuracy": 0.8682069778442383, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -69835,7 +69835,7 @@ } }, { - "accuracy": 0.9146829592554193, + "accuracy": 0.9142303341313412, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -69878,7 +69878,7 @@ } }, { - "accuracy": 0.9224717366067987, + "accuracy": 0.9220570012142784, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -69921,7 +69921,7 @@ } }, { - "accuracy": 0.9175294198487934, + "accuracy": 0.9170662980330617, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -69973,7 +69973,7 @@ } }, { - "accuracy": 0.9276124176226164, + "accuracy": 0.9271852781898097, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -70025,7 +70025,7 @@ } }, { - "accuracy": 0.9581781437522486, + "accuracy": 0.9579229292116667, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -70077,7 +70077,7 @@ } }, { - "accuracy": 0.9639747801579928, + "accuracy": 0.9637717165445027, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -70129,7 +70129,7 @@ } }, { - "accuracy": 0.9772295591078306, + "accuracy": 0.9770949906424472, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -70172,7 +70172,7 @@ } }, { - "accuracy": 0.9788052235779009, + "accuracy": 0.9786854480442247, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -70221,7 +70221,7 @@ } }, { - "accuracy": 0.9825390373405657, + "accuracy": 0.9824329896977073, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -70267,7 +70267,7 @@ } }, { - "accuracy": 0.993855987724505, + "accuracy": 0.9938307514316157, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -70311,7 +70311,7 @@ "model.layers.36.parallel_decoder": { "attn": [ { - "accuracy": 0.6407439332259328, + "accuracy": 0.6434955847890753, "total_bits": 320757760, "q_proj": { "group_size": { @@ -70375,7 +70375,7 @@ } }, { - "accuracy": 0.6557123786524722, + "accuracy": 0.6615089617277446, "total_bits": 329080832, "q_proj": { "group_size": { @@ -70439,7 +70439,7 @@ } }, { - "accuracy": 0.714650505467465, + "accuracy": 0.7182879698903937, "total_bits": 336024576, "q_proj": { "group_size": { @@ -70503,7 +70503,7 @@ } }, { - "accuracy": 0.7758938387820595, + "accuracy": 0.7790922867624384, "total_bits": 401557504, "q_proj": { "group_size": { @@ -70567,7 +70567,7 @@ } }, { - "accuracy": 0.8254091739654541, + "accuracy": 0.8269630482322291, "total_bits": 475279360, "q_proj": { "group_size": { @@ -70631,7 +70631,7 @@ } }, { - "accuracy": 0.8297742040533769, + "accuracy": 0.8294641218687359, "total_bits": 475479040, "q_proj": { "group_size": { @@ -70695,7 +70695,7 @@ } }, { - "accuracy": 0.9035371102784809, + "accuracy": 0.9041589373036435, "total_bits": 609759232, "q_proj": { "group_size": { @@ -70747,7 +70747,7 @@ } }, { - "accuracy": 0.905676753897416, + "accuracy": 0.9075973975031, "total_bits": 610024448, "q_proj": { "group_size": { @@ -70799,7 +70799,7 @@ } }, { - "accuracy": 0.9136265139830739, + "accuracy": 0.9143112960614657, "total_bits": 615020544, "q_proj": { "group_size": { @@ -70851,7 +70851,7 @@ } }, { - "accuracy": 0.9173579404228612, + "accuracy": 0.9180888000287508, "total_bits": 623951872, "q_proj": { "group_size": { @@ -70903,7 +70903,7 @@ } }, { - "accuracy": 0.9131597907919633, + "accuracy": 0.9151945239619205, "total_bits": 626473984, "q_proj": { "group_size": { @@ -70967,7 +70967,7 @@ } }, { - "accuracy": 0.9203361210070158, + "accuracy": 0.9200234538630435, "total_bits": 630355968, "q_proj": { "group_size": { @@ -71031,7 +71031,7 @@ } }, { - "accuracy": 0.9306798922388178, + "accuracy": 0.9311187330045199, "total_bits": 637362176, "q_proj": { "group_size": { @@ -71092,7 +71092,7 @@ } }, { - "accuracy": 0.9357094827451204, + "accuracy": 0.9365291219008596, "total_bits": 646823936, "q_proj": { "group_size": { @@ -71153,7 +71153,7 @@ } }, { - "accuracy": 0.9620099569621839, + "accuracy": 0.9621840997746116, "total_bits": 784740352, "q_proj": { "group_size": { @@ -71214,7 +71214,7 @@ } }, { - "accuracy": 0.9680520923514115, + "accuracy": 0.9685845971107483, "total_bits": 797818880, "q_proj": { "group_size": { @@ -71275,7 +71275,7 @@ } }, { - "accuracy": 0.9749602957775718, + "accuracy": 0.9747740689076876, "total_bits": 911749120, "q_proj": { "group_size": { @@ -71327,7 +71327,7 @@ } }, { - "accuracy": 0.9838960296229312, + "accuracy": 0.9841628231500325, "total_bits": 942718976, "q_proj": { "group_size": { @@ -71379,7 +71379,7 @@ } }, { - "accuracy": 0.9935200339869449, + "accuracy": 0.9934934969795378, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -71433,7 +71433,7 @@ ], "mlp": [ { - "accuracy": 0.7079954900239643, + "accuracy": 0.7063711818895841, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -71485,7 +71485,7 @@ } }, { - "accuracy": 0.7144645891691509, + "accuracy": 0.7129006636770148, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -71537,7 +71537,7 @@ } }, { - "accuracy": 0.7526771896763852, + "accuracy": 0.7514076483877081, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -71586,7 +71586,7 @@ } }, { - "accuracy": 0.7665339771069979, + "accuracy": 0.7653010769894248, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -71635,7 +71635,7 @@ } }, { - "accuracy": 0.849220966037951, + "accuracy": 0.8484787439045153, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -71687,7 +71687,7 @@ } }, { - "accuracy": 0.8612394583852667, + "accuracy": 0.8605065471247623, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -71739,7 +71739,7 @@ } }, { - "accuracy": 0.8789251980028654, + "accuracy": 0.8782966513382762, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -71788,7 +71788,7 @@ } }, { - "accuracy": 0.9205503212778192, + "accuracy": 0.9201615546879015, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -71831,7 +71831,7 @@ } }, { - "accuracy": 0.9279217029872694, + "accuracy": 0.9275566025784141, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -71874,7 +71874,7 @@ } }, { - "accuracy": 0.9233758637779638, + "accuracy": 0.9229700251629478, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -71926,7 +71926,7 @@ } }, { - "accuracy": 0.9328537551980269, + "accuracy": 0.9325125593888133, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -71978,7 +71978,7 @@ } }, { - "accuracy": 0.9611187420393291, + "accuracy": 0.9609211463677256, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -72030,7 +72030,7 @@ } }, { - "accuracy": 0.9665771597310117, + "accuracy": 0.9663972603647333, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -72082,7 +72082,7 @@ } }, { - "accuracy": 0.9787642814611134, + "accuracy": 0.9786501338607386, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -72125,7 +72125,7 @@ } }, { - "accuracy": 0.9802991951766767, + "accuracy": 0.9801945215777347, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -72174,7 +72174,7 @@ } }, { - "accuracy": 0.9838502956064124, + "accuracy": 0.9837598737917448, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -72220,7 +72220,7 @@ } }, { - "accuracy": 0.9942556529452926, + "accuracy": 0.9942339172488764, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -72264,7 +72264,7 @@ "model.layers.37.parallel_decoder": { "attn": [ { - "accuracy": 0.7244028041237278, + "accuracy": 0.7251401449504651, "total_bits": 320757760, "q_proj": { "group_size": { @@ -72328,7 +72328,7 @@ } }, { - "accuracy": 0.7353480991564298, + "accuracy": 0.7351364838449579, "total_bits": 329080832, "q_proj": { "group_size": { @@ -72392,7 +72392,7 @@ } }, { - "accuracy": 0.7765583490070543, + "accuracy": 0.778467855955425, "total_bits": 336024576, "q_proj": { "group_size": { @@ -72456,7 +72456,7 @@ } }, { - "accuracy": 0.8254814524399607, + "accuracy": 0.826851581272326, "total_bits": 401557504, "q_proj": { "group_size": { @@ -72520,7 +72520,7 @@ } }, { - "accuracy": 0.865220960817839, + "accuracy": 0.8661547334570634, "total_bits": 475279360, "q_proj": { "group_size": { @@ -72584,7 +72584,7 @@ } }, { - "accuracy": 0.8673955766778243, + "accuracy": 0.8682361025559275, "total_bits": 475479040, "q_proj": { "group_size": { @@ -72648,7 +72648,7 @@ } }, { - "accuracy": 0.9254547984976518, + "accuracy": 0.9255397508018895, "total_bits": 609759232, "q_proj": { "group_size": { @@ -72700,7 +72700,7 @@ } }, { - "accuracy": 0.9279975012729043, + "accuracy": 0.9278140444504588, "total_bits": 610024448, "q_proj": { "group_size": { @@ -72752,7 +72752,7 @@ } }, { - "accuracy": 0.9330483424036127, + "accuracy": 0.9334863236075953, "total_bits": 615020544, "q_proj": { "group_size": { @@ -72804,7 +72804,7 @@ } }, { - "accuracy": 0.9362571678663555, + "accuracy": 0.9367028663032934, "total_bits": 623951872, "q_proj": { "group_size": { @@ -72856,7 +72856,7 @@ } }, { - "accuracy": 0.9334207647725156, + "accuracy": 0.9334219882362768, "total_bits": 626473984, "q_proj": { "group_size": { @@ -72920,7 +72920,7 @@ } }, { - "accuracy": 0.9379308098240903, + "accuracy": 0.9380430798781545, "total_bits": 630355968, "q_proj": { "group_size": { @@ -72984,7 +72984,7 @@ } }, { - "accuracy": 0.9450287567941766, + "accuracy": 0.9452606502332186, "total_bits": 637362176, "q_proj": { "group_size": { @@ -73045,7 +73045,7 @@ } }, { - "accuracy": 0.9495111986210472, + "accuracy": 0.9497248436275282, "total_bits": 646823936, "q_proj": { "group_size": { @@ -73106,7 +73106,7 @@ } }, { - "accuracy": 0.970076510780736, + "accuracy": 0.9702935187440169, "total_bits": 784740352, "q_proj": { "group_size": { @@ -73167,7 +73167,7 @@ } }, { - "accuracy": 0.9748381360581047, + "accuracy": 0.9750342478877619, "total_bits": 797818880, "q_proj": { "group_size": { @@ -73228,7 +73228,7 @@ } }, { - "accuracy": 0.9807194643899014, + "accuracy": 0.9808537364006042, "total_bits": 911749120, "q_proj": { "group_size": { @@ -73280,7 +73280,7 @@ } }, { - "accuracy": 0.9872332466276068, + "accuracy": 0.9873170484053461, "total_bits": 942718976, "q_proj": { "group_size": { @@ -73332,7 +73332,7 @@ } }, { - "accuracy": 0.9950202472115818, + "accuracy": 0.995018891598049, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -73386,7 +73386,7 @@ ], "mlp": [ { - "accuracy": 0.7667725211695621, + "accuracy": 0.765455773002223, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -73438,7 +73438,7 @@ } }, { - "accuracy": 0.7721489605150724, + "accuracy": 0.7709403038024902, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -73490,7 +73490,7 @@ } }, { - "accuracy": 0.8030493510396857, + "accuracy": 0.8020108624508506, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -73539,7 +73539,7 @@ } }, { - "accuracy": 0.8142537819711786, + "accuracy": 0.8132211283633584, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -73588,7 +73588,7 @@ } }, { - "accuracy": 0.8791496628209164, + "accuracy": 0.8785799302552876, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -73640,7 +73640,7 @@ } }, { - "accuracy": 0.88903210037633, + "accuracy": 0.8885431164189389, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -73692,7 +73692,7 @@ } }, { - "accuracy": 0.9035413453453466, + "accuracy": 0.9030971652583072, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -73741,7 +73741,7 @@ } }, { - "accuracy": 0.9363332108447426, + "accuracy": 0.9360679011595876, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -73784,7 +73784,7 @@ } }, { - "accuracy": 0.9423104399128964, + "accuracy": 0.942075208613747, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -73827,7 +73827,7 @@ } }, { - "accuracy": 0.9384875360288119, + "accuracy": 0.938214898109436, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -73879,7 +73879,7 @@ } }, { - "accuracy": 0.9462612302679765, + "accuracy": 0.9459976459804335, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -73931,7 +73931,7 @@ } }, { - "accuracy": 0.96875598556117, + "accuracy": 0.9686232522914284, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -73983,7 +73983,7 @@ } }, { - "accuracy": 0.973231177580984, + "accuracy": 0.9730978231681021, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -74035,7 +74035,7 @@ } }, { - "accuracy": 0.9829269976992356, + "accuracy": 0.9828629572140543, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -74078,7 +74078,7 @@ } }, { - "accuracy": 0.9841671256642592, + "accuracy": 0.9840919516588512, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -74127,7 +74127,7 @@ } }, { - "accuracy": 0.987087430138337, + "accuracy": 0.9870178660279826, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -74173,7 +74173,7 @@ } }, { - "accuracy": 0.9954305161771021, + "accuracy": 0.9954205523980292, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -74217,7 +74217,7 @@ "model.layers.38.parallel_decoder": { "attn": [ { - "accuracy": 0.9299139411825883, + "accuracy": 0.9306988904350683, "total_bits": 320757760, "q_proj": { "group_size": { @@ -74281,7 +74281,7 @@ } }, { - "accuracy": 0.9339893679869802, + "accuracy": 0.9347448788191143, "total_bits": 329080832, "q_proj": { "group_size": { @@ -74345,7 +74345,7 @@ } }, { - "accuracy": 0.9445653400923076, + "accuracy": 0.9452660083770752, "total_bits": 336024576, "q_proj": { "group_size": { @@ -74409,7 +74409,7 @@ } }, { - "accuracy": 0.9545748233795166, + "accuracy": 0.9553516632632205, "total_bits": 401557504, "q_proj": { "group_size": { @@ -74473,7 +74473,7 @@ } }, { - "accuracy": 0.9652095907612851, + "accuracy": 0.9652297057603535, "total_bits": 475279360, "q_proj": { "group_size": { @@ -74537,7 +74537,7 @@ } }, { - "accuracy": 0.9656619743296975, + "accuracy": 0.9657016208297328, "total_bits": 475479040, "q_proj": { "group_size": { @@ -74601,7 +74601,7 @@ } }, { - "accuracy": 0.9813487106247952, + "accuracy": 0.9813028605360734, "total_bits": 609759232, "q_proj": { "group_size": { @@ -74653,7 +74653,7 @@ } }, { - "accuracy": 0.9819033396871466, + "accuracy": 0.9818047128225628, "total_bits": 610024448, "q_proj": { "group_size": { @@ -74705,7 +74705,7 @@ } }, { - "accuracy": 0.9834265740294206, + "accuracy": 0.9835476796878012, "total_bits": 615020544, "q_proj": { "group_size": { @@ -74757,7 +74757,7 @@ } }, { - "accuracy": 0.9843374867188303, + "accuracy": 0.9844483905716946, "total_bits": 623951872, "q_proj": { "group_size": { @@ -74809,7 +74809,7 @@ } }, { - "accuracy": 0.9828256650974876, + "accuracy": 0.9827951130114103, "total_bits": 626473984, "q_proj": { "group_size": { @@ -74873,7 +74873,7 @@ } }, { - "accuracy": 0.9843038696991769, + "accuracy": 0.9841892264391247, "total_bits": 630355968, "q_proj": { "group_size": { @@ -74937,7 +74937,7 @@ } }, { - "accuracy": 0.9867989452261674, + "accuracy": 0.9867906131242451, "total_bits": 637362176, "q_proj": { "group_size": { @@ -74998,7 +74998,7 @@ } }, { - "accuracy": 0.9880912092171217, + "accuracy": 0.9881284817268974, "total_bits": 646823936, "q_proj": { "group_size": { @@ -75059,7 +75059,7 @@ } }, { - "accuracy": 0.9927338251942083, + "accuracy": 0.9926980490747251, "total_bits": 784740352, "q_proj": { "group_size": { @@ -75120,7 +75120,7 @@ } }, { - "accuracy": 0.9940380238388714, + "accuracy": 0.9940555734853995, "total_bits": 797818880, "q_proj": { "group_size": { @@ -75181,7 +75181,7 @@ } }, { - "accuracy": 0.9951839576426306, + "accuracy": 0.9952051282713288, "total_bits": 911749120, "q_proj": { "group_size": { @@ -75233,7 +75233,7 @@ } }, { - "accuracy": 0.9970956131031639, + "accuracy": 0.9970944927711236, "total_bits": 942718976, "q_proj": { "group_size": { @@ -75285,7 +75285,7 @@ } }, { - "accuracy": 0.9986868586979414, + "accuracy": 0.9987017416248196, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -75339,7 +75339,7 @@ ], "mlp": [ { - "accuracy": 0.7906235895658794, + "accuracy": 0.7905755921414024, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -75391,7 +75391,7 @@ } }, { - "accuracy": 0.7957301265315005, + "accuracy": 0.7956169906415438, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -75443,7 +75443,7 @@ } }, { - "accuracy": 0.8290606297944721, + "accuracy": 0.8291943449723094, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -75492,7 +75492,7 @@ } }, { - "accuracy": 0.8401337673789576, + "accuracy": 0.8402749613711709, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -75541,7 +75541,7 @@ } }, { - "accuracy": 0.8902415350863808, + "accuracy": 0.890121761121248, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -75593,7 +75593,7 @@ } }, { - "accuracy": 0.9023244569176122, + "accuracy": 0.9022378921508789, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -75645,7 +75645,7 @@ } }, { - "accuracy": 0.9159346003281443, + "accuracy": 0.9158611109382228, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -75694,7 +75694,7 @@ } }, { - "accuracy": 0.9425918365779676, + "accuracy": 0.9425125812229357, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -75737,7 +75737,7 @@ } }, { - "accuracy": 0.9481671640747472, + "accuracy": 0.9481104831946523, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -75780,7 +75780,7 @@ } }, { - "accuracy": 0.943213513022975, + "accuracy": 0.9431991765373632, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -75832,7 +75832,7 @@ } }, { - "accuracy": 0.9510493654953807, + "accuracy": 0.9509898612373754, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -75884,7 +75884,7 @@ } }, { - "accuracy": 0.970435967570857, + "accuracy": 0.9704155388631319, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -75936,7 +75936,7 @@ } }, { - "accuracy": 0.9748415225430539, + "accuracy": 0.9748073502590782, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -75988,7 +75988,7 @@ } }, { - "accuracy": 0.9836706377957997, + "accuracy": 0.9836494122680864, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -76031,7 +76031,7 @@ } }, { - "accuracy": 0.984560848850953, + "accuracy": 0.9845251284147564, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -76080,7 +76080,7 @@ } }, { - "accuracy": 0.9876177169774708, + "accuracy": 0.9875867813825607, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -76126,7 +76126,7 @@ } }, { - "accuracy": 0.9950860581899944, + "accuracy": 0.9950819184121332, "total_bits": 4852038656, "gate_proj": { "group_size": { @@ -76170,7 +76170,7 @@ "model.layers.39.parallel_decoder": { "attn": [ { - "accuracy": 0.9525483815293563, + "accuracy": 0.952865437457436, "total_bits": 320757760, "q_proj": { "group_size": { @@ -76234,7 +76234,7 @@ } }, { - "accuracy": 0.9572794029587194, + "accuracy": 0.9574657992312783, "total_bits": 329080832, "q_proj": { "group_size": { @@ -76298,7 +76298,7 @@ } }, { - "accuracy": 0.9645042388062728, + "accuracy": 0.9646425780497099, "total_bits": 336024576, "q_proj": { "group_size": { @@ -76362,7 +76362,7 @@ } }, { - "accuracy": 0.9745496574201082, + "accuracy": 0.9746346159985191, "total_bits": 401557504, "q_proj": { "group_size": { @@ -76426,7 +76426,7 @@ } }, { - "accuracy": 0.9781708105614311, + "accuracy": 0.9782494118339137, "total_bits": 475279360, "q_proj": { "group_size": { @@ -76490,7 +76490,7 @@ } }, { - "accuracy": 0.9786491143076044, + "accuracy": 0.9786814043396398, "total_bits": 475479040, "q_proj": { "group_size": { @@ -76554,7 +76554,7 @@ } }, { - "accuracy": 0.9868288604836715, + "accuracy": 0.9868921261084708, "total_bits": 609759232, "q_proj": { "group_size": { @@ -76606,7 +76606,7 @@ } }, { - "accuracy": 0.9874026602820346, + "accuracy": 0.9874283843918851, "total_bits": 610024448, "q_proj": { "group_size": { @@ -76658,7 +76658,7 @@ } }, { - "accuracy": 0.9884626410509411, + "accuracy": 0.9885324812249133, "total_bits": 615020544, "q_proj": { "group_size": { @@ -76710,7 +76710,7 @@ } }, { - "accuracy": 0.9889502893937262, + "accuracy": 0.9890143886992806, "total_bits": 623951872, "q_proj": { "group_size": { @@ -76762,7 +76762,7 @@ } }, { - "accuracy": 0.9892521461373881, + "accuracy": 0.9892967390386682, "total_bits": 626473984, "q_proj": { "group_size": { @@ -76826,7 +76826,7 @@ } }, { - "accuracy": 0.9900991751959449, + "accuracy": 0.9901401408408818, "total_bits": 630355968, "q_proj": { "group_size": { @@ -76890,7 +76890,7 @@ } }, { - "accuracy": 0.9913232781385121, + "accuracy": 0.9913640908504787, "total_bits": 637362176, "q_proj": { "group_size": { @@ -76951,7 +76951,7 @@ } }, { - "accuracy": 0.9921078830957413, + "accuracy": 0.9921370054546156, "total_bits": 646823936, "q_proj": { "group_size": { @@ -77012,7 +77012,7 @@ } }, { - "accuracy": 0.995225683638924, + "accuracy": 0.995233513022724, "total_bits": 784740352, "q_proj": { "group_size": { @@ -77073,7 +77073,7 @@ } }, { - "accuracy": 0.9960473446469558, + "accuracy": 0.9960600098496989, "total_bits": 797818880, "q_proj": { "group_size": { @@ -77134,7 +77134,7 @@ } }, { - "accuracy": 0.9965417722338125, + "accuracy": 0.9965638714401346, "total_bits": 911749120, "q_proj": { "group_size": { @@ -77186,7 +77186,7 @@ } }, { - "accuracy": 0.9980570715116827, + "accuracy": 0.998064035255658, "total_bits": 942718976, "q_proj": { "group_size": { @@ -77238,7 +77238,7 @@ } }, { - "accuracy": 0.9990102603639427, + "accuracy": 0.9990143782802319, "total_bits": 1213739008, "q_proj": { "group_size": { @@ -77292,7 +77292,7 @@ ], "mlp": [ { - "accuracy": 0.9018635247883043, + "accuracy": 0.9023951292037964, "total_bits": 1350990784, "gate_proj": { "group_size": { @@ -77344,7 +77344,7 @@ } }, { - "accuracy": 0.9045764772515548, + "accuracy": 0.9050821567836561, "total_bits": 1399749568, "gate_proj": { "group_size": { @@ -77396,7 +77396,7 @@ } }, { - "accuracy": 0.9270597570820859, + "accuracy": 0.9275048343758834, "total_bits": 1563213824, "gate_proj": { "group_size": { @@ -77445,7 +77445,7 @@ } }, { - "accuracy": 0.9350585498307881, + "accuracy": 0.9354739440114874, "total_bits": 1754316800, "gate_proj": { "group_size": { @@ -77494,7 +77494,7 @@ } }, { - "accuracy": 0.9487703536686144, + "accuracy": 0.9490110874176025, "total_bits": 1977565600, "gate_proj": { "group_size": { @@ -77546,7 +77546,7 @@ } }, { - "accuracy": 0.9537994736119321, + "accuracy": 0.9540607490037617, "total_bits": 2030891008, "gate_proj": { "group_size": { @@ -77598,7 +77598,7 @@ } }, { - "accuracy": 0.9632092996647483, + "accuracy": 0.9634318383116471, "total_bits": 2183891360, "gate_proj": { "group_size": { @@ -77647,7 +77647,7 @@ } }, { - "accuracy": 0.9718156676543387, + "accuracy": 0.9719637851966055, "total_bits": 2496882080, "gate_proj": { "group_size": { @@ -77690,7 +77690,7 @@ } }, { - "accuracy": 0.9753017002030423, + "accuracy": 0.9754333982342168, "total_bits": 2533683200, "gate_proj": { "group_size": { @@ -77733,7 +77733,7 @@ } }, { - "accuracy": 0.9733954573932447, + "accuracy": 0.9735525846481323, "total_bits": 2571321760, "gate_proj": { "group_size": { @@ -77785,7 +77785,7 @@ } }, { - "accuracy": 0.9769529966931594, + "accuracy": 0.9770862287596652, "total_bits": 2624647168, "gate_proj": { "group_size": { @@ -77837,7 +77837,7 @@ } }, { - "accuracy": 0.9861231571749637, + "accuracy": 0.98619985894153, "total_bits": 3165077920, "gate_proj": { "group_size": { @@ -77889,7 +77889,7 @@ } }, { - "accuracy": 0.98814683603613, + "accuracy": 0.9882074892520905, "total_bits": 3218403328, "gate_proj": { "group_size": { @@ -77941,7 +77941,7 @@ } }, { - "accuracy": 0.992190608852788, + "accuracy": 0.9922330222631756, "total_bits": 3665510816, "gate_proj": { "group_size": { @@ -77984,7 +77984,7 @@ } }, { - "accuracy": 0.9927252852603009, + "accuracy": 0.992766764603163, "total_bits": 3792976896, "gate_proj": { "group_size": { @@ -78033,7 +78033,7 @@ } }, { - "accuracy": 0.9948799421912745, + "accuracy": 0.9949128694440189, "total_bits": 4134812672, "gate_proj": { "group_size": { @@ -78079,7 +78079,7 @@ } }, { - "accuracy": 0.997058662929033, + "accuracy": 0.997077870133676, "total_bits": 4852038656, "gate_proj": { "group_size": {