{ "measurement": [ { "key": "model.layers.0.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.025843940675258636, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.022233786061406136, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.015540667809545994, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.013083028607070446, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.011291236616671085, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.007538632024079561, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.021149978041648865, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.018448594957590103, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.011836325749754906, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.009741468355059624, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.009769486263394356, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.010239193215966225, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.008880102075636387, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.006015786901116371, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.005143091082572937, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.005478122737258673, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.004028458148241043, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.0036687508691102266, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.0038256137631833553, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.0034089582040905952, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.003627719124779105, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.003721820656210184, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.003007530700415373, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.003191932337358594, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.025843940675258636, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.025843940675258636, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.0.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.02000662125647068, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.018395278602838516, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.009860841557383537, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.00915547925978899, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.008662768639624119, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.003919897601008415, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.01892559602856636, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.017116842791438103, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.008916200138628483, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.008192046545445919, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.00859655812382698, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.009084168821573257, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.008003586903214455, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.0045045847073197365, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.0031780735589563847, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.004681113641709089, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.0028246708679944277, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.002225262112915516, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.002761220093816519, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.002125183120369911, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.002788171637803316, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.0027371675241738558, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.0019207752775400877, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0020614448003470898, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.02000662125647068, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.02000662125647068, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.0.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.104000985622406, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.0892106145620346, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.08318392932415009, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.0673719272017479, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.04655004292726517, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.04103343188762665, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.057017982006073, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.051884692162275314, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.04844582453370094, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.037283506244421005, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.03244951367378235, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.028931589797139168, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.024717997759580612, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.022287389263510704, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.021681513637304306, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.014404124580323696, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.011450008489191532, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.01123921386897564, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.00960567407310009, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.009076293557882309, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.007423306815326214, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.007257247809320688, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.006493628025054932, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.004603424109518528, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.051884692162275314, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.051884692162275314, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.0.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.0026081122923642397, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.0022594553884118795, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.0020108427852392197, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.001850552624091506, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.0011672974796965718, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.0009944535559043288, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.0016130015719681978, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.0014779421035200357, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.001234553405083716, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.001063139527104795, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.001026332494802773, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.0008505434379912913, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.000792766222730279, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.0006609209813177586, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.0006174158188514411, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.0004917846526950598, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.000468731508590281, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.0004511422594077885, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.00045675618457607925, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.00042930373456329107, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.000346072280080989, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.0004276791005395353, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.00030322340899147093, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0003830332134384662, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.0026081122923642397, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.0026081122923642397, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.0.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.0659717544913292, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.06216574087738991, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.06082967668771744, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.05548328161239624, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.02899600751698017, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.027884546667337418, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.032442715018987656, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.03005068376660347, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.029372457414865494, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.026698485016822815, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.02543824166059494, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.01638084463775158, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.014283494092524052, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.013828003779053688, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.013720802031457424, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.008207634091377258, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.007336140610277653, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.007297173608094454, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.006912090815603733, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.0068178786896169186, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.004487617872655392, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.00477009080350399, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.004348795861005783, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.003545341780409217, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.032442715018987656, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.032442715018987656, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.0.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.06877529621124268, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.06485509872436523, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.0634889230132103, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.057928331196308136, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.03020455129444599, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.0290645994246006, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.033704545348882675, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.0312662310898304, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.030586544424295425, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.027823975309729576, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.026482081040740013, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.016938596963882446, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.01477598026394844, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.014315049163997173, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.014207475818693638, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.00845425296574831, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.007434955332428217, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.0073951794765889645, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.006979979109019041, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.006880046799778938, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.004495304077863693, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.0046202149242162704, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.004350102506577969, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.0032066309358924627, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.033704545348882675, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.033704545348882675, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.0.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.014120152220129967, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.013297990895807743, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.01292544323951006, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.011831159703433514, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.006288517732173204, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.005987393204122782, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.007216340862214565, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.006677363067865372, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.006362648215144873, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.005822448991239071, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.005642315372824669, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.0036949801724404097, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.0033483379520475864, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.0031728886533528566, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.0031349693890661, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.0019438359886407852, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.0019271998899057508, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.0019119775388389826, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.0018561347387731075, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.0018353573977947235, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.0012162417406216264, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.001519791199825704, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.0011772828875109553, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.001343177049420774, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.014120152220129967, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.014120152220129967, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.008610150776803493, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.0085469800978899, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.004659760743379593, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.0046527571976184845, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.004646299872547388, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.0036818410735577345, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.013567249290645123, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.008513258770108223, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.004651530645787716, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.004640970379114151, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.004636736586689949, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.0053974031470716, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0046392520889639854, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.0038555427454411983, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.0036785616539418697, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.0037567622493952513, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.0036777914501726627, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.0036090773064643145, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.0036776200868189335, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.003608772763982415, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.003536229021847248, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.0036775697953999043, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.0034552544821053743, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0036086775362491608, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.008610150776803493, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.008610150776803493, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.006585354451090097, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.006529771722853184, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.003484063781797886, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.0034793068189173937, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.003475113306194544, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.002515500644221902, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.011541284620761871, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.006507552228868008, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.003481614403426647, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.003469709074124694, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.0034912170376628637, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.004309406038373709, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.0034683470148593187, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.0026811319403350353, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.002512967912480235, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.0025834578555077314, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.0025117818731814623, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.0024543728213757277, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.0025115322787314653, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.0024538070429116488, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.0023715910501778126, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.002511517843231559, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.0022995141334831715, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.002453722758218646, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.006585354451090097, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.006585354451090097, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.04319089651107788, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.03566965088248253, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.023981191217899323, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.02092968299984932, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.018634159117937088, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.011224995367228985, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.04392813518643379, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.030927257612347603, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.02011062577366829, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.015817854553461075, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.01819688268005848, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.023548677563667297, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.014759991317987442, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.009421613067388535, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.007644884753972292, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.01164019014686346, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.005581624805927277, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.004746539052575827, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.005133230704814196, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.004091234412044287, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.006061093416064978, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.004972049966454506, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.0028402553871273994, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0036455693189054728, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.04319089651107788, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.04319089651107788, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.005400415044277906, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.0041708312928676605, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.0031807073391973972, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.0028211604803800583, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.0023292896803468466, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.0017643215833231807, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.003656888147816062, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.0033725746907293797, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.002575846156105399, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.00204369961284101, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.0019755735993385315, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.0019275256199762225, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0018888971535488963, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.001513117109425366, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.001424497808329761, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.0011832399759441614, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.0012829778715968132, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.0012510071974247694, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.0012600412592291832, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.001218239078298211, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.000947321648709476, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.0012492359383031726, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.0008740240009501576, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0011934074573218822, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.005400415044277906, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.005400415044277906, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.1.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.07924827933311462, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.07554766535758972, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.07442837953567505, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.06862374395132065, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.034971702843904495, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.03402518481016159, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.038482602685689926, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.03564052656292915, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.03530058637261391, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.03273997828364372, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.03150581941008568, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.019496772438287735, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.01712876744568348, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.01684875786304474, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.016785338521003723, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.009834729135036469, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.00921819731593132, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.009197061881422997, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.008823592215776443, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.008757720701396465, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.0055849882774055, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.006297114305198193, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.005503627005964518, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.00503914337605238, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.038482602685689926, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.038482602685689926, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.08776680380105972, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.08371774852275848, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.08252225816249847, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.07609917968511581, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.0386366993188858, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.037602394819259644, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.042483456432819366, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.039346832782030106, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.038995299488306046, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.036146894097328186, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.03473760187625885, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.021339135244488716, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.018576472997665405, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.018273932859301567, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.01820489577949047, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.010647858493030071, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.009449318051338196, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.009430397301912308, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.008977982215583324, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.008894989266991615, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.0056812576949596405, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.005774825345724821, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.005583176854997873, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.003982068970799446, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.042483456432819366, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.042483456432819366, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.02775115892291069, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.0237689558416605, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.021388215944170952, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.019487733021378517, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.011903993785381317, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.01019152533262968, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.015955761075019836, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.01465627271682024, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.012689168564975262, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.010574127547442913, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.010295987129211426, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.008145186118781567, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.007298193406313658, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.0061173210851848125, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.005815968383103609, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.004277672618627548, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.003859354415908456, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.003749663010239601, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.003667720127850771, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.003511102870106697, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.0026613392401486635, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.0032450996804982424, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.0023461468517780304, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.0028461916372179985, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.02775115892291069, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.02775115892291069, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.007408258505165577, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.006991593632847071, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.003811595728620887, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.0036608148366212845, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.0034906957298517227, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.0018642995273694396, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.008079396560788155, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.006723076105117798, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.0035699631553143263, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.0033916260581463575, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.0035622292198240757, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.00374901806935668, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.003339759772643447, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.002111678244546056, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.0017084445571526885, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.002116854302585125, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.0016474730800837278, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.0014990345807746053, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.0016377829015254974, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.0014849636936560273, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.0015357763040810823, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.00163301732391119, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.001311675412580371, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0014747831737622619, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.007408258505165577, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.007408258505165577, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.005026649218052626, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.0047342898324131966, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.0025972763542085886, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.0024793257471174, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.002345347311347723, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.0012990126851946115, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.0056839012540876865, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.004528711084276438, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.0023985051084309816, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.0022717220708727837, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.0024227809626609087, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.0026093062479048967, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.002231893129646778, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.0014299197355285287, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.001183407031930983, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.001457106089219451, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.0011375490576028824, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.0010468271793797612, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.001130228745751083, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.00103615818079561, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.0010902287904173136, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.0011265757493674755, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.000950206012930721, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0010278496192768216, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.005026649218052626, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.005026649218052626, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.06888406723737717, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.059178758412599564, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.053311787545681, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.04556599259376526, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.030244532972574234, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.02560463175177574, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.0423421636223793, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.03700269013643265, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.03169163689017296, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.02532977983355522, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.024187026545405388, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.021893566474318504, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.017816511914134026, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.01472429744899273, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.01388526614755392, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.01095952745527029, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.007969450205564499, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.007603440899401903, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.007059962954372168, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.006493458990007639, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.005741273518651724, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.005788190755993128, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.004400086589157581, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.004176443908363581, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.04556599259376526, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.04556599259376526, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.030898552387952805, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.019963888451457024, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.013188035227358341, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.012999484315514565, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.011908523738384247, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.007331755943596363, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.01985127292573452, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.017812779173254967, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.014289267361164093, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.009570017457008362, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.009767191484570503, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.01017251517623663, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.00923790130764246, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.0069763134233653545, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.006333070807158947, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.005703113041818142, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.00527947535738349, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.00512341596186161, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.00504005653783679, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.004777785390615463, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.0040353452786803246, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.005009779240936041, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.00354318437166512, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.004658181220293045, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.030898552387952805, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.030898552387952805, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.2.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.06970974057912827, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.0667479857802391, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.06591116636991501, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.06124626845121384, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.030803052708506584, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.0300827007740736, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.03380828723311424, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.031316179782152176, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.031056124716997147, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.029050784185528755, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.028194187209010124, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.017129145562648773, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.015071713365614414, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.014854403212666512, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.014808478765189648, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.00862695463001728, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.008152451366186142, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.008140381425619125, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.007853121496737003, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.007794983685016632, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.004886392038315535, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.005596502684056759, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.004822871182113886, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.004511299077421427, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.03380828723311424, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.03380828723311424, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.08780109882354736, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.08419986814260483, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.08317215740680695, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.07736268639564514, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.03895510733127594, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.03803742304444313, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.04274235665798187, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.039569005370140076, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.03925566375255585, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.03675905987620354, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.03572848439216614, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.02163494937121868, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.01891576498746872, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.018654417246580124, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.018597312271595, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.01085300650447607, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.010031034238636494, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.010008583776652813, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.009632961824536324, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.009564459323883057, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.006019542459398508, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.006639758124947548, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.005939398426562548, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.005135218612849712, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.04274235665798187, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.04274235665798187, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.08421576768159866, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.08390801399946213, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.036085717380046844, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.03602762892842293, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.03594524785876274, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.008399075828492641, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.10862499475479126, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.08369862288236618, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.03597969561815262, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.03588265925645828, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.03583763912320137, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.03866152837872505, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.03588022664189339, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.017013320699334145, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.008368405513465405, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.01950937509536743, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.008360183797776699, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.0022744538728147745, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.00835869275033474, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.0022664344869554043, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.00837328378111124, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.008358162827789783, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.0024225148372352123, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.002262736205011606, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.036085717380046844, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.036085717380046844, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.012420151382684708, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.011108530685305595, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.00860197376459837, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.007519052363932133, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.005550490226596594, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.0039626252837479115, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.009023968130350113, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.008576808497309685, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.005743222776800394, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.004916837438941002, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.004855210892856121, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.0045547098852694035, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.004113977774977684, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.0027271730359643698, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.002291641663759947, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.002304255962371826, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.0015109502710402012, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.001288563129492104, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.0013974410248920321, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.0011321459896862507, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.001251706387847662, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.0012619406916201115, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.0008498755632899702, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0008530581835657358, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.012420151382684708, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.012420151382684708, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.009939822368323803, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.008733638562262058, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.00717575429007411, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.006189893465489149, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.004359678365290165, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.0033608293160796165, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.006652197800576687, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.006269910838454962, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.00452897883951664, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.003775375196710229, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.00363554828800261, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.0033057520631700754, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.0029896413907408714, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.0021575195714831352, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.0019149151630699635, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.0016989920986816287, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.0012298407964408398, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.001104935654439032, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.0011314954608678818, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.0009759064996615052, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.0009759233798831701, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.00100693479180336, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.0007474397425539792, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0007382053299807012, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.009939822368323803, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.009939822368323803, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.10575728118419647, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.09445790201425552, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.08950971066951752, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.07783294469118118, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.048038557171821594, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.04351942241191864, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.057978320866823196, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.05306093394756317, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.049522656947374344, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.04118700325489044, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.038194622844457626, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.02983429841697216, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.02549120783805847, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.023089764639735222, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.022451726719737053, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.015100822784006596, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.011919498443603516, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.011652966029942036, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.01054359134286642, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.010054015554487705, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.00776065606623888, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.007610870059579611, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.006718098651617765, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.004930534865707159, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.049522656947374344, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.049522656947374344, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.12341587245464325, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.08051969856023788, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.05102473497390747, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.052434030920267105, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.051336098462343216, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.02865811251103878, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08241984993219376, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.07399637997150421, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.058608949184417725, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.03839264437556267, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.04128936678171158, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04301469027996063, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.037853121757507324, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.028519444167613983, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.025867491960525513, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.022764671593904495, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.020097747445106506, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.01935688592493534, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.018578380346298218, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.017430448904633522, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.014033257029950619, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.018523860722780228, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.011175500229001045, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.016848010942339897, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.05102473497390747, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.05102473497390747, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.3.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.09199230372905731, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.08807350695133209, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.08691591024398804, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.08043616265058517, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.040313396602869034, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.039318881928920746, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.04430348426103592, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.040995050221681595, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.0406489223241806, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.03789211064577103, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.03661875054240227, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.0222573671489954, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.019343819469213486, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.019053416326642036, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.01898716390132904, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.011108756996691227, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.009872082620859146, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.009851941838860512, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.009419307112693787, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.009340737015008926, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.005960607435554266, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.006063229404389858, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.0058717685751616955, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.004221799783408642, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.04430348426103592, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.04430348426103592, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.11126136034727097, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.10660670697689056, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.10521780699491501, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.09748604148626328, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.049085501581430435, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.047877274453639984, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.053852684795856476, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.049895141273736954, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.0494898185133934, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.046163346618413925, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.04454568400979042, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.02700335532426834, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.023511214181780815, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.023158762603998184, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.02307385951280594, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.013446030206978321, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.011797936633229256, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.011775468476116657, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.011232121847569942, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.011132092215120792, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.007019624579697847, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.00695012416690588, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.006907671689987183, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.004471766296774149, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.049895141273736954, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.049895141273736954, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.07996979355812073, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.07501263916492462, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.07296552509069443, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.06759732961654663, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.03578392416238785, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.03408770263195038, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.04102431237697601, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.03773834556341171, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.03634224086999893, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.033101800829172134, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.03231138736009598, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.020921703428030014, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.018303632736206055, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.017455510795116425, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.017256177961826324, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.010680051520466805, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.00975209940224886, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.009679985232651234, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.00928886141628027, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.009175127372145653, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.006237255875021219, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.006968967616558075, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.00599891971796751, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.005699501372873783, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.04102431237697601, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.04102431237697601, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.01119693648070097, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.010393594391644001, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.007047922350466251, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.006327252835035324, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.005113367456942797, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.0031372469384223223, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.009214596822857857, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.008933981880545616, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.005238302983343601, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.004727061837911606, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.004762172698974609, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.004615816753357649, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.004267201293259859, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.002565220231190324, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.0019788891077041626, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.002343707950785756, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.001489334157668054, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.0011926718289032578, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.0014240287709981203, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.0010975688928738236, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.0013261991553008556, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.001350227277725935, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.000882867316249758, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0009418418048880994, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.01119693648070097, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.01119693648070097, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.009929757565259933, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.009029469452798367, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.006687307730317116, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.005889250431209803, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.0044805146753787994, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.003088592318817973, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.007592733483761549, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.00726046459749341, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.004614925477653742, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.004040856845676899, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.003978516440838575, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.0038018273189663887, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.0034813075326383114, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.0022675625514239073, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.00187193532474339, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.0019455667352303863, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.0013367942301556468, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.0011387442937120795, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.0012641607318073511, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.001040364382788539, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.0011358173796907067, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.0011781837092712522, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.0008270266698673368, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0008704406209290028, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.009929757565259933, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.009929757565259933, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.12955443561077118, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.11520540714263916, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.10876218974590302, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.09436947107315063, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.05900483578443527, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.053180184215307236, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.07473693788051605, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.0657300055027008, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.060905370861291885, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.050223786383867264, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.04723478481173515, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.0386272557079792, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.031450919806957245, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.02832101285457611, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.027556687593460083, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.019367076456546783, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.014555997215211391, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.014222720637917519, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.012784142978489399, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.01215216051787138, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.00987689383327961, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.009193317964673042, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.008189215324819088, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.005774770863354206, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.050223786383867264, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.050223786383867264, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.07392568141222, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.05344756692647934, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.04306643083691597, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.03705855458974838, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.03142846003174782, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.02318122237920761, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.04519719257950783, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.04023343324661255, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.0346250906586647, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.023066576570272446, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.022811146453022957, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.023470036685466766, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.020180586725473404, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.016463015228509903, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.015469123609364033, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.012884442694485188, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.01060081459581852, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.010295289568603039, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.009453565813601017, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.00892107654362917, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.008751006796956062, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.009115111082792282, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.007891852408647537, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.008048877120018005, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.04306643083691597, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.04306643083691597, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.4.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.12525051832199097, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.1196160688996315, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.11792106926441193, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.10894551873207092, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.056167975068092346, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.054653722792863846, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.06187829747796059, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.057179272174835205, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.056690700352191925, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.052485086023807526, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0505569763481617, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.03131196275353432, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.02716907486319542, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.026736142113804817, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.026636015623807907, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.01567194238305092, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.01397704891860485, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.013948038220405579, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.013283614069223404, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.013161938637495041, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.008517672307789326, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.008751983754336834, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.008384796790778637, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.006278037093579769, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0505569763481617, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, "best_option": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0505569763481617, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } } }, { "key": "model.layers.4.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.14389167726039886, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.13750258088111877, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.13559284806251526, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.12529796361923218, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.06459489464759827, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.06285069137811661, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.07113651931285858, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.06574908643960953, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.06518126279115677, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.060370489954948425, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0580611526966095, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.035822197794914246, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.03109646402299404, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.030591903254389763, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.030478278174996376, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.01786530390381813, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.015634052455425262, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.015602782368659973, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.014806229621171951, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.014665178023278713, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.009411921724677086, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.009266100823879242, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.00925367046147585, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.006033315323293209, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.035822197794914246, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.035822197794914246, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.10278879851102829, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.09592702984809875, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.09288200736045837, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.08607310801744461, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.04622052237391472, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.043683767318725586, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.05344530567526817, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.049209702759981155, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.047112636268138885, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.042580634355545044, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.041579719632864, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.0272271316498518, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.02380370907485485, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.022477218881249428, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.022168107330799103, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.01381848007440567, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.012444077990949154, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.012326089665293694, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.011794161051511765, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.011620703153312206, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.00788844469934702, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.008785356767475605, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.007490541320294142, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.0070558590814471245, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.049209702759981155, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.049209702759981155, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.014834117144346237, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.013437079265713692, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.010960960760712624, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.00962181854993105, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.006738596595823765, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.0052258032374084, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.010417471639811993, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.009835830889642239, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.006939742248505354, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.005965628661215305, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.0058096665889024734, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.005257274955511093, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.004697067197412252, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.003340600524097681, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.002936356933787465, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.0026840257924050093, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.0018851622007787228, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.0016830124659463763, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.0017485584830865264, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.0015065046027302742, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.0015220585046336055, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.001539986813440919, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.0011562943691387773, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.001120404340326786, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.014834117144346237, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.014834117144346237, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.011628219857811928, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.010452786460518837, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.008823576383292675, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.007699696812778711, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.005261517129838467, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.004198253154754639, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.007813959382474422, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.007307611871510744, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.005426582880318165, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.004610482603311539, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.004432376008480787, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.003921371418982744, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.0035081924870610237, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.0025697103701531887, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.002306179841980338, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.001981007633730769, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.0014044387498870492, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.0012700960505753756, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.0012845902238041162, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.0011157618137076497, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.0010950419818982482, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.0010937340557575226, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.0008362801745533943, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.000754521053750068, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.011628219857811928, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.011628219857811928, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.14276868104934692, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.1291055977344513, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.12375989556312561, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.10863922536373138, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.0658268928527832, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.06076117232441902, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.07841485738754272, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.07083337008953094, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.06765925139188766, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.056910961866378784, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.05306608974933624, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.04021231085062027, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.033907294273376465, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.0315721295773983, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.03100767731666565, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.02006104402244091, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.01614030823111534, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.015919040888547897, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.014312737621366978, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.013823430985212326, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.010311239399015903, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.0098568769171834, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.009244364686310291, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.006148289889097214, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.04021231085062027, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.04021231085062027, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.13768139481544495, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.08660118281841278, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.05967831239104271, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.05936720594763756, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.0560309961438179, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.029961388558149338, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08242049813270569, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.07512425631284714, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.0649869367480278, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.03971896320581436, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.040261562913656235, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04257185757160187, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.037238724529743195, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.02900276705622673, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.026757489889860153, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.022266484797000885, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.018072839826345444, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.01748741790652275, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.015709025785326958, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.014434640295803547, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.013760841451585293, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.015396286733448505, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.011658115312457085, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.013136830180883408, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.03971896320581436, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.03971896320581436, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.5.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.13654206693172455, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.1302049607038498, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.1283496767282486, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.11839388310909271, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.06161745637655258, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.05987868830561638, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.06792002171278, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.06277064979076385, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.062205396592617035, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.05743391811847687, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0552445687353611, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.034267619252204895, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.029779240489006042, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.02927464246749878, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.02916068769991398, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.01712714694440365, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.01513020507991314, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.015097202733159065, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.01431501004844904, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.014179997146129608, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.009134972468018532, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.009215866215527058, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.008973252959549427, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.0063050067983567715, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.034267619252204895, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.034267619252204895, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.15308767557144165, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.14607344567775726, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.14399363100528717, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.13292187452316284, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.06917644292116165, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.06722693890333176, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.07633078843355179, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.07046107202768326, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.06984409689903259, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.06449753791093826, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.061968788504600525, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.03844179958105087, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.03339109569787979, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.03282254934310913, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.032693617045879364, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.019214220345020294, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.016785118728876114, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.016745567321777344, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.01585947535932064, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.01570439524948597, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.010144495405256748, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.009956524707376957, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.009963047690689564, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.006488547194749117, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.03844179958105087, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.03844179958105087, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.09039951115846634, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.08340651541948318, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.08047215640544891, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.07414006441831589, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.040664318948984146, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.03817104548215866, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.047239840030670166, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.04340602084994316, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.041552357375621796, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.03700484707951546, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.03599017858505249, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.024218766018748283, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.02110530249774456, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.019901305437088013, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.01962221972644329, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.012422410771250725, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.011184820905327797, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.011084903962910175, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.010541585274040699, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.010383201763033867, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.007405245676636696, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.008065490983426571, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.007069723680615425, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.0066213360987603664, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.047239840030670166, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.047239840030670166, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.026082539930939674, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.023311439901590347, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.021331200376152992, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.018673773854970932, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.01186483446508646, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.010356283746659756, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.01537253800779581, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.0142107168212533, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.0122463908046484, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.010243336670100689, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.009625181555747986, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.007821823470294476, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.00685910927131772, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.005770368501543999, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.005490567535161972, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.003945660777390003, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.003117411397397518, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.0029865121468901634, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.0028127862606197596, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.0026218683924525976, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.002139580436050892, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.002230287529528141, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.0018073446117341518, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0016154665499925613, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.026082539930939674, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.026082539930939674, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.024301758036017418, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.021625589579343796, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.019936654716730118, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.0174059197306633, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.010917278937995434, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.009612878784537315, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.013961930759251118, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.01284374576061964, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.011273181065917015, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.00937648769468069, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.008791853673756123, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.0070759388618171215, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.006156004965305328, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.0052773128263652325, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.0050561013631522655, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.0035503089893609285, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.002795721869915724, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.002692289650440216, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.0025003021582961082, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.0023375162854790688, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.0018869508057832718, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.0019222950795665383, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.0016010630642995238, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0013280637795105577, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.024301758036017418, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.024301758036017418, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.13916632533073425, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.12748199701309204, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.1233283281326294, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.10942733287811279, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.0640752986073494, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.05998694524168968, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.07363865524530411, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.06766519695520401, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.06554705649614334, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.05634668096899986, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.052326880395412445, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03741620108485222, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.03228344768285751, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.030648579820990562, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.03026818297803402, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.018653610721230507, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.015595341101288795, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.01545724831521511, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.013994508422911167, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.013633624650537968, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.009574627503752708, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.009301858954131603, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.008941133506596088, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0056995521299541, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.052326880395412445, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, "best_option": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.052326880395412445, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.16603484749794006, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.11265256255865097, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.08721882849931717, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.07217040657997131, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.0683794841170311, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.048045359551906586, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.10059858113527298, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.08913315087556839, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.078766368329525, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.04826291278004646, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.04854719340801239, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05254250764846802, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04502253979444504, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03603416308760643, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.033607859164476395, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.028561154380440712, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.023151641711592674, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.022605065256357193, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02035236544907093, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.019056610763072968, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.019115984439849854, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.01997089758515358, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.0169872734695673, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.017596684396266937, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.04826291278004646, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.04826291278004646, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.6.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.1457291841506958, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.13874079287052155, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.136605367064476, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.12556949257850647, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.06623972207307816, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.06422437727451324, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.07309992611408234, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.06761032342910767, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.06694268435239792, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.06143079325556755, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.058782368898391724, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.03701658919453621, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0321493074297905, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.031556881964206696, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.03141025826334953, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.018482236191630363, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.016359122470021248, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.01631772704422474, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.015425650402903557, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.015267189592123032, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.009872722439467907, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.010054058395326138, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.009681517258286476, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.006975352764129639, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.03701658919453621, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.03701658919453621, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.16565412282943726, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.15768951177597046, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.15530633926391602, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.14279869198799133, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.07525903731584549, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.0729556530714035, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.08319869637489319, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.07680517435073853, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.07607019692659378, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.06982135772705078, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.06681700050830841, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04203501343727112, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.03644685819745064, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.03577293083071709, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.03561203554272652, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.021005738526582718, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.018291659653186798, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.018247047439217567, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.01720774732530117, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.017025159671902657, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.011114311404526234, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.010862037539482117, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.010895169340074062, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.007060956209897995, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04203501343727112, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04203501343727112, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.12013043463230133, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.11011148244142532, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.10611344873905182, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.09684678912162781, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.054172806441783905, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.050651539117097855, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.06329703330993652, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.05776390805840492, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.055391255766153336, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.04884927347302437, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.04712269455194473, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.03231757879257202, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.028113659471273422, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.026493918150663376, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.0261037927120924, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.016563991084694862, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.014841358177363873, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.014711057767271996, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.0139154689386487, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.013706759549677372, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.009907941333949566, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.010676726698875427, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.009471739642322063, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.008728936314582825, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.04884927347302437, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.04884927347302437, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.03258148953318596, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.029655836522579193, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.027804255485534668, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.024603014811873436, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.014965142123401165, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.013524401001632214, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.01837221160531044, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.017153985798358917, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.015351060777902603, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.01311182975769043, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.012263044714927673, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.009316184557974339, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.008200970478355885, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.007221508305519819, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.0069770291447639465, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.00467536598443985, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.0038026391994208097, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.0036847670562565327, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.0034385023172944784, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.0032574955839663744, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.0024913933593779802, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.0025487798266112804, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.0021915712859481573, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.001748626702465117, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.03258148953318596, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.03258148953318596, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.02377437986433506, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.02163076028227806, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.02010137215256691, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.017790500074625015, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.01083446480333805, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.009705196134746075, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.013788005337119102, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.012710070237517357, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.01110733300447464, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.009510677307844162, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.00895420927554369, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.0069371797144412994, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.0060601672157645226, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.005228833761066198, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.005012826528400183, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.003479661885648966, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.002740467432886362, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.002636779798194766, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.0024800822138786316, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.0023297774605453014, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.001850947504863143, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.0018471674993634224, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.0015961831668391824, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.001238480443134904, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.02377437986433506, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.02377437986433506, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.16368931531906128, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.1509920060634613, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.14677083492279053, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.13124123215675354, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.07579737901687622, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.07143781334161758, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.08653600513935089, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.0793488621711731, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.07736392319202423, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.06722193211317062, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.06278593093156815, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.0439889132976532, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.03785514086484909, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.0362376794219017, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.03583306074142456, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.021946417167782784, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.018361685797572136, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.01823428086936474, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.016573233529925346, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.016198035329580307, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.011216825805604458, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.010780763812363148, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.010539840906858444, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.006458149291574955, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.0439889132976532, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.0439889132976532, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.16818299889564514, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.13489452004432678, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.11920493841171265, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.09408489614725113, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07433659583330154, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.060530614107847214, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.09969933331012726, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.0894002690911293, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.07894498854875565, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.0566081739962101, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05153876543045044, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.051306385546922684, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04425298422574997, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.037700552493333817, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03602447360754013, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.027306903153657913, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02271858975291252, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.022115381434559822, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.01997474953532219, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.018879359588027, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.017387056723237038, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.018312955275177956, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.015756642445921898, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.015595789067447186, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05153876543045044, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, "best_option": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05153876543045044, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } } }, { "key": "model.layers.7.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.16463837027549744, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.1561741977930069, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.15361139178276062, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.14064587652683258, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.07506212592124939, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.07257774472236633, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.08288591355085373, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.07672575116157532, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.07592247426509857, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.06920907646417618, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.06582032144069672, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04185529425740242, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.03641136735677719, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.03567937761545181, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.03549785912036896, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.020867647603154182, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.0182492695748806, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.018200477585196495, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.01707996241748333, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.016883816570043564, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.010900311172008514, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.010853174142539501, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.01066515501588583, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.007050867658108473, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04185529425740242, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04185529425740242, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.1879492998123169, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.17832794785499573, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.17540408670902252, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1607491374015808, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.08591083437204361, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.0830693170428276, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.09485725313425064, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.08779361099004745, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.08689046651124954, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.07921946048736572, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07538432627916336, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.0479409322142601, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04166445881128311, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.040833793580532074, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.040630485862493515, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02389499358832836, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.020779242739081383, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.020724579691886902, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.01943201944231987, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.01920248568058014, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.012421028688549995, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01218933705240488, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.012147344648838043, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.007697886321693659, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.0479409322142601, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.0479409322142601, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.15461257100105286, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.1444457769393921, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.14048877358436584, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.12900188565254211, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.07018941640853882, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.06665374338626862, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.08026205748319626, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.07368392497301102, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.07135344296693802, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.06422559171915054, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.06208798289299011, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.04082823172211647, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.035490963608026505, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.03396053984761238, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.0335954874753952, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.02064395882189274, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.018428292125463486, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.01829107291996479, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.017336150631308556, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.017120754346251488, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.011700826697051525, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.012536427937448025, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.011252378113567829, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.009753216058015823, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.04082823172211647, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.04082823172211647, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.028695357963442802, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.02625611610710621, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.024232903495430946, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.021489208564162254, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.013280780985951424, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.011818953789770603, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.016854288056492805, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.015807734802365303, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.013606196269392967, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.011705652810633183, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.011009957641363144, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.008565999567508698, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.007596082054078579, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.006455098278820515, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.006159386597573757, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.004332114476710558, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.00346195581369102, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.0033165537752211094, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.0031616073101758957, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.002959707984700799, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.002382350154221058, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.0024384211283177137, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.002065584994852543, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0017372056609019637, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.028695357963442802, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.028695357963442802, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.023249085992574692, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.0212791059166193, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.019550172612071037, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.01731235906481743, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.010688897222280502, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.009474515914916992, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.013925609178841114, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.012876437976956367, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.01094835251569748, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.00943948794156313, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.008935919031500816, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.006990412250161171, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.006157930474728346, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.005183144938200712, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.004934169352054596, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.0035216331016272306, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.00275665195658803, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.002624277723953128, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.0025116768665611744, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.002333901822566986, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.00190944061614573, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.0019234592327848077, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.0016244349535554647, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0013197724474593997, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.023249085992574692, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.023249085992574692, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.17717182636260986, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.1634749472141266, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.15869715809822083, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.1415565311908722, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.08243823796510696, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.07758256793022156, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.09459762275218964, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.08638734370470047, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.0841558426618576, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.07289765775203705, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.06809672713279724, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.04811403900384903, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04123430699110031, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.039414193481206894, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.03898291662335396, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.024021213874220848, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.019965410232543945, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.01981145143508911, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.017968997359275818, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.01754465512931347, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.012274385429918766, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.011722947470843792, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.011467767879366875, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.007001817692071199, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.04811403900384903, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.04811403900384903, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.13536378741264343, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.10142737627029419, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.08700256794691086, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.06925713270902634, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.057998813688755035, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.04449518769979477, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.07867534458637238, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.07087504863739014, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.06365039944648743, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.04307910054922104, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.04038940370082855, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.040947407484054565, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.03580312430858612, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03018876165151596, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.028745872899889946, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.022227667272090912, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.019075660035014153, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.0186677947640419, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.016930177807807922, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.016037961468100548, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.014542250894010067, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.016043171286582947, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.013213376514613628, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.014124653302133083, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.04307910054922104, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.04307910054922104, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.8.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.1774619221687317, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.1681230068206787, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.16537800431251526, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1513436883687973, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.08160378038883209, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.07880212366580963, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.09040442854166031, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.08347572386264801, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.0825745165348053, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.07506605982780457, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0715615525841713, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04592430591583252, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.039773862808942795, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.03894473612308502, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.038750775158405304, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02298123762011528, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.020116960629820824, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.020066814497113228, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.018823785707354546, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.01860552839934826, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.012289206497371197, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.012255697511136532, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.012025241740047932, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.008330203592777252, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04592430591583252, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04592430591583252, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.19968196749687195, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.18926164507865906, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.18613587319850922, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1704045683145523, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.0918768122792244, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08873386681079865, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.1016436442732811, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09399638324975967, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09296993166208267, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08453603088855743, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08047353476285934, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05145260691642761, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.044706474989652634, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04377658665180206, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04355136677622795, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.025680799037218094, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.022381700575351715, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.022322053089737892, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.020903874188661575, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.020651694387197495, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.013440033420920372, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.013287413865327835, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013133336789906025, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.008599461056292057, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05145260691642761, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05145260691642761, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.15673582255840302, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.14504225552082062, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.14056365191936493, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.12846073508262634, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.07115045934915543, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.06715381145477295, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.08176489174365997, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.07504624128341675, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.07255573570728302, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.06451771408319473, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.06213760748505592, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.04165245220065117, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.036184098571538925, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.03447958081960678, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.03408384695649147, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.021141603589057922, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.018760837614536285, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.01862563192844391, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.017555557191371918, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.01731572113931179, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.012112731114029884, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01284792274236679, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.011637948453426361, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.010039018467068672, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.04165245220065117, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.04165245220065117, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.03900262713432312, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.035552334040403366, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.0334370918571949, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.02962639182806015, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.018025940284132957, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.016340430825948715, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.021996285766363144, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.020470278337597847, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.01848152093589306, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.015799568966031075, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.014787459746003151, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.011181912384927273, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.009831099770963192, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.008711293339729309, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.008432266302406788, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.005615214351564646, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.004602720029652119, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.004474011715501547, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.004168267361819744, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.003966024145483971, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.003003643359988928, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.003095670137554407, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.0026687190402299166, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.00216535571962595, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.03900262713432312, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.03900262713432312, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.033127471804618835, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.030142594128847122, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.028185725212097168, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.02491278573870659, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.015161869116127491, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.013663928024470806, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.018884165212512016, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.017471982166171074, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.015549080446362495, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.013285847380757332, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.012509926222264767, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.009552371688187122, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.0083365673199296, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.007312617264688015, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.007044243160635233, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.004794432781636715, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.0038294626865535975, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.0037034268025308847, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.0034560689236968756, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.0032673694659024477, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.002526273485273123, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.002548335585743189, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.002201242372393608, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0017150028143078089, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.033127471804618835, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.033127471804618835, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.17294979095458984, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.15960095822811127, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.15505975484848022, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.1384250670671463, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.08030673861503601, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.07565025240182877, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.09177481383085251, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.0841262936592102, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.08193816989660263, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.07108189165592194, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.06644138693809509, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.04661721736192703, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.040137484669685364, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.03838692232966423, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.03797631710767746, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.023244718089699745, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.019444484263658524, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.01930324174463749, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.017523150891065598, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.017113227397203445, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.011877329088747501, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.011418587528169155, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.011160749942064285, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.006830597296357155, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.04661721736192703, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.04661721736192703, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.16041111946105957, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.12666518986225128, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.11491222679615021, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.09266161918640137, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07085587084293365, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.05895195156335831, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08776389807462692, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.07956692576408386, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.07555455714464188, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.05222810432314873, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.04779225215315819, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.045332517474889755, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.039482101798057556, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.035616349428892136, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03466692566871643, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.02389504760503769, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.020892543718218803, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.020648444071412086, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.01785285398364067, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.017155809327960014, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.0148643609136343, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.016092853620648384, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.013853012584149837, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.013643648475408554, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.05222810432314873, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.05222810432314873, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.9.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.18052446842193604, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.1708761751651764, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.16794715821743011, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.153473898768425, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.08328855782747269, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08032238483428955, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.09225060045719147, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.0852823406457901, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.08433672785758972, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.0764046162366867, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07260239124298096, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.046753350645303726, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.040602996945381165, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.03972065448760986, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.039505455642938614, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.0233476459980011, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.020369287580251694, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.0203104205429554, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.018984338268637657, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.018748875707387924, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.012294384650886059, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.012187870219349861, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.012008624151349068, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.00799817219376564, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.046753350645303726, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.046753350645303726, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.20373325049877167, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.19289711117744446, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.1895700842142105, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.17324548959732056, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09406565874814987, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09073910862207413, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10420256108045578, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09631112962961197, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09525132924318314, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08631597459316254, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0819535106420517, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.052791524678468704, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.045848384499549866, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.044854145497083664, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04461720585823059, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02640126831829548, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02289973571896553, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.022833246737718582, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02132526785135269, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.021055102348327637, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.013883598148822784, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.013546290807425976, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013559638522565365, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.008683455176651478, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.045848384499549866, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.045848384499549866, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.16175037622451782, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.14851883053779602, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.14329031109809875, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.1302400380373001, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.07325305044651031, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.0686274841427803, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.08450532704591751, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.0778525099158287, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.07488533854484558, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.06576549261808395, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.06309740990400314, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.043086037039756775, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.037401314824819565, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.03537176176905632, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.034895215183496475, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.021778056398034096, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.019078675657510757, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.018901996314525604, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.017700765281915665, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.01741253398358822, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.012268675491213799, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.012903153896331787, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.011671516112983227, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.009859936311841011, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.043086037039756775, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.043086037039756775, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.041702575981616974, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.038002971559762955, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.035849012434482574, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.0317336767911911, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.019282126799225807, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.017545001581311226, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.023369835689663887, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.021717362105846405, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.019772684201598167, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.016867971047759056, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.0157412588596344, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.011871310882270336, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.010407580062747002, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.009312113747000694, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.009035935625433922, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.005960552021861076, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.004899062681943178, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.004773578140884638, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.004424353130161762, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.004220594651997089, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.003181731328368187, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.0032528694719076157, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.002847854048013687, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0022485656663775444, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.041702575981616974, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.041702575981616974, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.036851443350315094, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.03347857668995857, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.03142157196998596, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.02773895673453808, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.01686706952750683, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.015247470699250698, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.020795218646526337, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.019276699051260948, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.01731281168758869, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.014758303761482239, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.013768388889729977, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.010496527887880802, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.009212610311806202, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.008127989247441292, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.007856746204197407, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.0052764639258384705, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.004226772114634514, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.004096325486898422, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.0038036173209547997, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.003600246272981167, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.0027886817697435617, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.00277266139164567, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.0024587195366621017, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0018236064352095127, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.036851443350315094, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.036851443350315094, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.17271296679973602, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.1594993770122528, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.1547936648130417, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.13831239938735962, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.08042549341917038, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.07575275748968124, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.09215301275253296, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.08444026112556458, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.08211218565702438, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.07115226238965988, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.06645187735557556, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.046878885477781296, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.040290869772434235, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.03850831463932991, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.03807622566819191, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.02337411604821682, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.01952269673347473, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.01938626728951931, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.017604179680347443, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.01718912087380886, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.011958714574575424, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.011505928821861744, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.011204611510038376, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.006947917863726616, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.046878885477781296, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.046878885477781296, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.15388567745685577, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.1216520443558693, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1106741651892662, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.08707655966281891, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.06844531744718552, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.05725203454494476, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08529834449291229, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.07654840499162674, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.07257044315338135, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.0508783794939518, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.04485522583127022, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04432819038629532, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0383901447057724, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03483309596776962, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.033927831798791885, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.02380639687180519, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02109532803297043, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.02085302770137787, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.018384015187621117, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.017743369564414024, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.015490390360355377, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.016752254217863083, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.014557608403265476, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.014594626612961292, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.0508783794939518, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.0508783794939518, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.10.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.17396563291549683, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.16461586952209473, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.1617468148469925, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.14785976707935333, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.08044295758008957, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.07751542329788208, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.08917225897312164, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.08239221572875977, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.08146776258945465, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.07373891770839691, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07004114240407944, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04523726552724838, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.039258986711502075, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.038388848304748535, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.03818303346633911, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02263668365776539, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.01973116211593151, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.019673097878694534, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.01838628761470318, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.018155977129936218, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.012000100687146187, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01187367644160986, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.011719133704900742, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.007873055525124073, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04523726552724838, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04523726552724838, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.1995519995689392, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.18885767459869385, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.18561658263206482, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.16967305541038513, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.0923047587275505, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08897756040096283, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10217374563217163, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.0945531353354454, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09347764402627945, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08463287353515625, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08031328022480011, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05178027227520943, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.045017439872026443, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.044015537947416306, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.043773964047431946, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.025844359770417213, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.022450383752584457, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02238583192229271, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.020888838917016983, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.020623475313186646, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.013478190638124943, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.013255921192467213, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013148737139999866, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.008452467620372772, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05178027227520943, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05178027227520943, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.1517883986234665, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.1386314481496811, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.1331115961074829, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.12064362317323685, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.06878096610307693, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.06402632594108582, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.08080285787582397, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.07371672242879868, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.0703841969370842, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.06155390664935112, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.05894001945853233, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.0410621203482151, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.03579910472035408, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.03364777937531471, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.033123843371868134, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.02091418020427227, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.01878919079899788, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.01858595199882984, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.017523424699902534, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.017206210643053055, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.01215820387005806, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01346709206700325, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.011533001437783241, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.010898214764893055, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.0410621203482151, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.0410621203482151, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.056681547313928604, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.05170358344912529, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.049494706094264984, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.04384079575538635, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.026223760098218918, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.024243593215942383, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.030663039535284042, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.028430812060832977, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.026869291439652443, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.022930366918444633, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.021334664896130562, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.015598524361848831, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0136266415938735, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.012624509632587433, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.012385128065943718, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.007812277413904667, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.006596043705940247, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.006496828980743885, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.005942599847912788, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.005742707755416632, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.0041265422478318214, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.0042390222661197186, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.003807500470429659, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0029153390787541866, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.05170358344912529, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.05170358344912529, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.052004266530275345, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.04729987308382988, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.04513633996248245, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.03982080519199371, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.023809127509593964, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.021924849599599838, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.028193769976496696, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.026056500151753426, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.02441409043967724, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.020796194672584534, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.01938990131020546, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.014269914478063583, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.012448425404727459, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.011428623460233212, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.011177131906151772, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.007144322618842125, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.005894925445318222, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.00579000823199749, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.00528436666354537, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.0050855278968811035, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.003735960926860571, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.0037090701516717672, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.003419205080717802, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.002407320309430361, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.052004266530275345, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.052004266530275345, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.18693780899047852, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.17295196652412415, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.16821612417697906, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.15059439837932587, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.08690600097179413, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.08212021738290787, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.09841024875640869, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.09074173122644424, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.0886630266904831, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.0772235095500946, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.07208747416734695, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.04996560513973236, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04332488775253296, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.04157821461558342, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.04116343706846237, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.02492586150765419, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.021070215851068497, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.02094302698969841, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.01905076764523983, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.018632560968399048, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.012760475277900696, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.012369467876851559, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.012104022316634655, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.007452487479895353, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.04996560513973236, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.04996560513973236, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.16910743713378906, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.13718053698539734, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1232210323214531, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.10120707005262375, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07448739558458328, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06243591010570526, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.09623224288225174, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.08819518238306046, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.08023613691329956, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.05802426114678383, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05344638600945473, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.049881961196660995, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04326291009783745, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.037113260477781296, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03554641455411911, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.025513404980301857, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.021394476294517517, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.02091842144727707, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.018657196313142776, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.01762767694890499, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.014955062419176102, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.016490157693624496, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.01316239032894373, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.013499069958925247, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.049881961196660995, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.049881961196660995, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.18213479220867157, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.17206169664859772, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.16895979642868042, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1542307287454605, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.08425651490688324, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08114445209503174, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.09339826554059982, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.0863691121339798, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.08537523448467255, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.07711832225322723, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0730707123875618, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.047396447509527206, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04116123542189598, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04022684693336487, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.040003832429647446, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.023671936243772507, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.020669862627983093, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.020607545971870422, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.019230574369430542, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.018986854702234268, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.012490004301071167, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01243643183261156, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.012187869288027287, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.00823530275374651, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.047396447509527206, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.047396447509527206, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.21291762590408325, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2012845277786255, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.19774708151817322, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1805347502231598, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09859072417020798, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09493859112262726, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10909774899482727, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.10102236270904541, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09986549615859985, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09025952965021133, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08547703921794891, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.055262308567762375, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0480768047273159, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04698239266872406, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04672897234559059, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.027559418231248856, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.023919064551591873, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.023846231400966644, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.022216765210032463, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.021924704313278198, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01429959200322628, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.014043601229786873, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013942701742053032, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.00883317831903696, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0480768047273159, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0480768047273159, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.19056282937526703, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.17559224367141724, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.16961297392845154, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.1541842818260193, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.08657707273960114, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.08129316568374634, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.0999782457947731, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.09194237738847733, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.0884198471903801, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.07800169289112091, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.07465098798274994, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.050848666578531265, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.044158950448036194, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.04180925711989403, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04124514386057854, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.025750067085027695, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.022489480674266815, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.022283073514699936, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.020908473059535027, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.02057114988565445, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.014506795443594456, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01514759473502636, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.013810818083584309, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.011512926779687405, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.050848666578531265, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.050848666578531265, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.061143334954977036, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.05647014081478119, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.05430835112929344, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.04855528473854065, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.02840159274637699, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.02649572491645813, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.03298654779791832, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.030607381835579872, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.028978845104575157, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.025198187679052353, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.023567605763673782, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.016785940155386925, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.014689866453409195, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.01367208268493414, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.013425551354885101, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.008416704833507538, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.00714498246088624, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.0070391492918133736, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.006509511265903711, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.0063134473748505116, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.004468849860131741, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.004582726396620274, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.004154849331825972, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.00316405831836164, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.04855528473854065, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.04855528473854065, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.055702030658721924, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.05145837366580963, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.04932428523898125, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.04398630931973457, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.02568388730287552, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.023899707943201065, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.030225787311792374, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.02796848677098751, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.02621002495288849, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.022782564163208008, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.021309150382876396, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.015303242020308971, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.013353992253541946, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.012326311320066452, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.01207479927688837, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.00767078623175621, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.006370757706463337, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.006261874455958605, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.005788393784314394, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.005589035805314779, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.004045443143695593, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.004013093654066324, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.003737713908776641, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0026315487921237946, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.05145837366580963, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.05145837366580963, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.19700513780117035, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.18337231874465942, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.17894309759140015, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.16086916625499725, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.09166425466537476, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.08704385161399841, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.10361199826002121, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.09523321688175201, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.09335842728614807, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.08200065791606903, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.07696114480495453, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.052575208246707916, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04540614038705826, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.043785110116004944, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.04339229315519333, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.026202525943517685, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.022135140374302864, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.022021224722266197, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.02010759525001049, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.019724395126104355, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.013382581062614918, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.012864974327385426, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.012718752957880497, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.00763694616034627, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04540614038705826, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04540614038705826, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.17414237558841705, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.13469764590263367, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.12166987359523773, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.10174425691366196, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07613503932952881, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06128949299454689, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.09333693236112595, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.08544611930847168, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.08225402981042862, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.057329267263412476, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05132590979337692, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04849720746278763, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.042295921593904495, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03820912167429924, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03721282258629799, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.02541324682533741, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02242380939424038, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.022234363481402397, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.01935999095439911, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.018667029216885567, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.015769770368933678, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.01728804223239422, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.014725030399858952, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.014677494764328003, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05132590979337692, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, "best_option": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05132590979337692, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } } }, { "key": "model.layers.12.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.18238472938537598, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.17212523519992828, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.1689489483833313, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.15394817292690277, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.08452748507261276, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.0812879279255867, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.09372346103191376, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.08670313656330109, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.08566690236330032, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.07717344164848328, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07298313826322556, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.047473423182964325, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.041287846863269806, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04031495004892349, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04008150100708008, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02368766814470291, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.020579934120178223, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02051602117717266, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.019086115062236786, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.01882747933268547, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.012338043190538883, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.012189248576760292, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.012017106637358665, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.0077957045286893845, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.047473423182964325, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.047473423182964325, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.22406263649463654, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.21161457896232605, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.207808718085289, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.18948964774608612, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10404893755912781, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10010664165019989, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11524084210395813, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.106680728495121, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10542333126068115, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09505362063646317, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08991030603647232, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05845321714878082, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05081278085708618, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04963289573788643, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04935399815440178, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.029138771817088127, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02527211233973503, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.025188926607370377, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.023430682718753815, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.023115145042538643, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015110904350876808, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.014858424663543701, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.014723168686032295, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009357141330838203, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05081278085708618, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05081278085708618, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.1924189329147339, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.17749075591564178, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.17165972292423248, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.15579469501972198, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.08764158934354782, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.08232081681489944, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.100788913667202, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.09277293086051941, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.08945043385028839, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.0787653923034668, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.07528906315565109, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.051259249448776245, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04449431225657463, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.0422322079539299, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04169091209769249, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.025845883414149284, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.0225518885999918, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.022352291271090508, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.02089426852762699, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.02057156339287758, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.014431205578148365, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.014960390515625477, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.013762429356575012, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.011187227442860603, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.051259249448776245, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.051259249448776245, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.0555603988468647, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.051460154354572296, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.049358829855918884, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.044254593551158905, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.025936301797628403, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.024166293442249298, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.03037223219871521, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.028214477002620697, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.026448849588632584, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.02307923324406147, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.02161662094295025, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.015413005836308002, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.013490715064108372, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.01247197762131691, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.012221895158290863, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.0077252634800970554, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.0064687361009418964, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.00635325675830245, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.0058940923772752285, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.005692862905561924, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.0040789102204144, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.004097596276551485, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.0037623427342623472, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0027230812702327967, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.051460154354572296, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.051460154354572296, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.05007592588663101, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.04627576097846031, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.04414444789290428, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.0395294651389122, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.023236097767949104, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.021524813026189804, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.02757970429956913, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.025568846613168716, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.02370273694396019, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.02063489519059658, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.019398119300603867, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.013962271623313427, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.01222320832312107, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.011160853318870068, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.01090369001030922, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.007009729277342558, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.005769779905676842, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.005653643049299717, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.00524557800963521, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.005047807935625315, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.0037049823440611362, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.0036498040426522493, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.003404316259548068, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.002382188104093075, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.05007592588663101, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.05007592588663101, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.21532762050628662, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.200391486287117, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.19550548493862152, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.17596763372421265, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.1005808636546135, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.09547353535890579, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.11381763219833374, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.10441659390926361, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.10240098834037781, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.08988232165575027, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.0843392089009285, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.05777113512158394, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04985794052481651, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.048057250678539276, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.04764439910650253, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.028842870146036148, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.024278422817587852, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.02414107695221901, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.022040562704205513, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.021601999178528786, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.014721505343914032, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.014076535589993, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.013972178101539612, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.008307221345603466, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04985794052481651, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04985794052481651, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.16065599024295807, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.13225609064102173, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1238216906785965, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.10388337820768356, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07255329936742783, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06294717639684677, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08485756814479828, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.07751574367284775, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.07593531906604767, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.05527588352560997, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05034570395946503, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.043668393045663834, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.038035281002521515, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03582344949245453, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.035297930240631104, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.02252812497317791, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02016889490187168, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.020078036934137344, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.017164887860417366, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.016702713444828987, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.013344664126634598, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.014582514762878418, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.012735947035253048, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.011955833062529564, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05034570395946503, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, "best_option": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05034570395946503, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } } }, { "key": "model.layers.13.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.18723967671394348, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.17666581273078918, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.17341041564941406, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.15801368653774261, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.08714068681001663, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08376935124397278, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.09676992148160934, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.08939487487077713, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.0882982462644577, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.07949058711528778, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0752495527267456, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.049215056002140045, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04269108921289444, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04167706519365311, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04143542796373367, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02460799552500248, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02148435078561306, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.021418992429971695, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.01994987018406391, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.0196855366230011, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.013062922284007072, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.013033661060035229, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.012734011746942997, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.008750982582569122, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.049215056002140045, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.049215056002140045, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.22989411652088165, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.21708332002162933, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.21312303841114044, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19434627890586853, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.1070922464132309, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10300149023532867, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11869661509990692, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.10984276235103607, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10852976888418198, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09774802625179291, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09247811138629913, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06023694574832916, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05236715078353882, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05112379044294357, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05082783102989197, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.030067075043916702, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.026061423122882843, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.025979651138186455, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02414989285171032, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02381674386560917, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01565513387322426, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015366881154477596, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015251959674060345, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009728088974952698, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05236715078353882, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05236715078353882, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.19047629833221436, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.17557217180728912, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.16979221999645233, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.15416951477527618, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.08684848994016647, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.08154897391796112, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.10005836933851242, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.09191054850816727, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.08859498053789139, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.07804279774427414, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.07461536675691605, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.05091326683759689, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04413682967424393, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.04189685359597206, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.041363369673490524, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.025715883821249008, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.022452671080827713, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.022255707532167435, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.020813539624214172, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.020487606525421143, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.014443637803196907, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.014983206056058407, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.013774051330983639, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.011293056420981884, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.05091326683759689, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.05091326683759689, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.06412487477064133, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.059221427887678146, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.05693836137652397, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.05099165439605713, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.02996010147035122, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.02792465128004551, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.0348544716835022, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.032346393913030624, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.030588768422603607, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.026585672050714493, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.024865666404366493, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.0177458506077528, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.015513354912400246, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.014429538510739803, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.014163504354655743, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.008884940296411514, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.007542256265878677, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.007430851459503174, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.0068715233355760574, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.00665998412296176, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.004700218327343464, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.004841322544962168, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.004356760066002607, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.003339253831654787, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.05099165439605713, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.05099165439605713, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.05825812742114067, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.05354756489396095, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.05122964084148407, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.045766983181238174, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.026965446770191193, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.024979909881949425, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.031891535967588425, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.0295063816010952, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.027594560757279396, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.02387141063809395, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.022370480000972748, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.01615455187857151, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.014124667271971703, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.012958998791873455, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.012684441171586514, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.0080980584025383, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.006688985507935286, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.00656155776232481, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.006057871505618095, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.0058322204276919365, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.004259791225194931, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.00420589093118906, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.0039001742843538523, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0027307765558362007, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.05122964084148407, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.05122964084148407, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.20983783900737762, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.1949019432067871, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.1898920238018036, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.17066118121147156, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.09796106815338135, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.09277849644422531, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.11089149862527847, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.10201701521873474, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.09979504346847534, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.08737685531377792, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.08189275860786438, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.056350670754909515, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04870337247848511, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.04683095961809158, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.046381086111068726, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.028126345947384834, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.023691251873970032, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.02354813739657402, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.021484695374965668, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.021029207855463028, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01437754649668932, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.013814471662044525, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.01361851580440998, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.008227436803281307, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04870337247848511, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04870337247848511, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.19182591140270233, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.16370908915996552, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.15251167118549347, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.12391838431358337, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.08567852526903152, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.07524222880601883, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.10602877289056778, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.09612898528575897, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.09001905471086502, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.0680171549320221, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06156192719936371, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05467226356267929, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04724421352148056, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04264048486948013, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.0414971262216568, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.028472725301980972, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.024379543960094452, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.0240064337849617, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.021372796967625618, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.02049887925386429, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.017176125198602676, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.018284490332007408, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.015911737456917763, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.01507693063467741, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04724421352148056, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04724421352148056, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.19521839916706085, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.18422487378120422, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.1807577908039093, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.16467474400997162, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09108026325702667, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.0875370055437088, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10113102942705154, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09347578138113022, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09231282770633698, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08302921801805496, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07864619046449661, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.051521651446819305, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0447208434343338, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04364623874425888, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04338966682553291, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02576340362429619, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02264482155442238, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.022575149312615395, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.0210418663918972, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02076740376651287, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.013725950382649899, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.013950063847005367, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013376926071941853, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009621908888220787, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.051521651446819305, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.051521651446819305, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.23573458194732666, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2225804477930069, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.21848835051059723, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19907912611961365, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.1099163293838501, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10568895190954208, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12229743599891663, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11280801147222519, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11141709238290787, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.10025325417518616, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09507977962493896, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06215684115886688, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0538799948990345, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05258488282561302, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05227642506361008, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031131785362958908, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.026986712589859962, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02690308168530464, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.025028416886925697, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.024687914177775383, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.0164779145270586, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.016187751665711403, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.016054421663284302, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.010629190132021904, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05227642506361008, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05227642506361008, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.14.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.1947399377822876, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.17918363213539124, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.17303681373596191, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.15711349248886108, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.0888628140091896, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.08328378945589066, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.1027289554476738, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.09435028582811356, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.09077104926109314, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.07980479300022125, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.07642978429794312, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.052506547421216965, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04548848420381546, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.04307832941412926, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04249193146824837, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.02666942961513996, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.023373087868094444, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.023169774562120438, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.02171301282942295, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.021371804177761078, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.015233893878757954, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.015964383259415627, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.014520833268761635, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.012365422211587429, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04548848420381546, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04548848420381546, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.061734046787023544, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.057227883487939835, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.05490465834736824, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.04929152503609657, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.02889421209692955, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.026897305622696877, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.033832304179668427, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.03145677223801613, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.029477806761860847, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.025752224028110504, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.024138735607266426, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.017218738794326782, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.015074806287884712, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.013891777023673058, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.013604379259049892, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.008614911697804928, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.007177371997386217, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.007047513499855995, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.006540138740092516, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.006311808247119188, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.004517900291830301, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.0045197936706244946, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.0041527338325977325, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0029556096997112036, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.04929152503609657, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.04929152503609657, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.0527782142162323, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.0488334484398365, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.04657730832695961, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.04170835018157959, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.024541618302464485, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.02272791415452957, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.029594136402010918, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.027191121131181717, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.025074295699596405, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.021857451647520065, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.020647088065743446, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.014986755326390266, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.012997405603528023, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.011813485994935036, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.01151243131607771, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.007520394865423441, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.006100567523390055, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.00595969520509243, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.005553506780415773, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.005326348356902599, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.003982706926763058, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.003872244618833065, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.0036055282689630985, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.002503063529729843, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.0488334484398365, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.0488334484398365, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.2244928479194641, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.20919261872768402, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.2038964331150055, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.18330690264701843, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.10522551089525223, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.09980670362710953, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.1194089949131012, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.10940372198820114, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.1071552112698555, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09403370320796967, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.08829683065414429, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.06074796989560127, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.052287571132183075, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05036172270774841, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.04988877475261688, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.030313676223158836, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.025451570749282837, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.0253022201359272, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.023085912689566612, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.022612299770116806, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.015471150167286396, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.014787816442549229, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.014631109312176704, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.008752288296818733, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.052287571132183075, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.052287571132183075, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.18967153131961823, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.1561015248298645, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1455978900194168, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.11732420325279236, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.08454092592000961, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.07301285862922668, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.10180676728487015, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.09270650148391724, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.08886165171861649, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06589753180742264, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05733863636851311, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05232097581028938, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04583379253745079, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04220166802406311, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.041324324905872345, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.027135662734508514, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02455747500061989, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.02432410791516304, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02148512750864029, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.020766712725162506, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.01640467345714569, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.018658634275197983, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.01541043445467949, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.015767106786370277, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05232097581028938, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05232097581028938, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.20058293640613556, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.1892382949590683, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.1857042908668518, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.16914457082748413, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09373395144939423, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09003292769193649, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.1041855737566948, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09624363481998444, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09500032663345337, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08540243655443192, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0808599442243576, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05307083949446678, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.046046458184719086, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.044909000396728516, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.044638268649578094, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.026544125750660896, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.023232867941260338, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.023151947185397148, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02156258188188076, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.021268466487526894, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.014133671298623085, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.014207961969077587, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.0137639744207263, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009677933529019356, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.046046458184719086, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.046046458184719086, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.2407088726758957, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.22719483077526093, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.22293393313884735, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.20306944847106934, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.11235541105270386, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10801397264003754, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12470711022615433, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11536963284015656, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.1139025166630745, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.10242249071598053, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09687302261590958, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06341275572776794, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05505923554301262, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05370459705591202, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05339115113019943, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031649939715862274, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.027426332235336304, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.027332650497555733, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02538679912686348, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.025031153112649918, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.016538208350539207, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01624421402812004, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.016092071309685707, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.01037665642797947, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031649939715862274, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031649939715862274, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.18527022004127502, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.17041507363319397, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.16474686563014984, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.14979568123817444, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.08443881571292877, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.07932058721780777, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.09780078381299973, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.08949261903762817, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.08620191365480423, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.07604996114969254, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.0729459822177887, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.04994046688079834, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04340744391083717, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.04120172932744026, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04066477715969086, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.025563038885593414, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.022779833525419235, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.02258886583149433, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.021276209503412247, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.020965810865163803, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.014928954653441906, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.016011428087949753, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.014291150495409966, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.012826962396502495, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.04994046688079834, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.04994046688079834, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.06585045158863068, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.061186086386442184, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.05905156210064888, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.05306300148367882, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.030827032402157784, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.02892928384244442, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.03566586598753929, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.033061668276786804, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.03142013028264046, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.027542049065232277, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.025839809328317642, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.018131565302610397, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.015817470848560333, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.0148171940818429, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.014575532637536526, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.009079072624444962, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.007656259927898645, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.00755175668746233, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.006990129593759775, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.0067878481931984425, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.00476704491302371, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.0047758109867572784, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.004451217129826546, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0031547690741717815, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.03566586598753929, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.03566586598753929, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.06278941035270691, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.058254461735486984, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.056041110306978226, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.05031905695796013, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.02923649363219738, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.027333548292517662, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.034382108598947525, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.031568653881549835, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.029786646366119385, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.026077672839164734, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.024565668776631355, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.017415538430213928, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.015088335610926151, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.014019734226167202, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.013765417039394379, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.00873146578669548, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.0071891858242452145, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.007075335830450058, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.0065406630747020245, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.006327109411358833, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.004564518108963966, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.00442995922639966, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.004208297003060579, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0028089338447898626, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.05031905695796013, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.05031905695796013, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.21628034114837646, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.2015398144721985, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.19657470285892487, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.17711129784584045, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.10126955807209015, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.09606590867042542, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.11436952650547028, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.10516070574522018, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.10294513404369354, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09061779081821442, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.0849878266453743, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.058116890490055084, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.0502416156232357, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.04840225726366043, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.04796198010444641, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.02898913249373436, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.024507323279976845, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.024351663887500763, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.022273719310760498, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.02183803915977478, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.014814491383731365, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.014272435568273067, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.0140874357894063, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.008530467748641968, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.0502416156232357, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.0502416156232357, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.1745101809501648, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.15034452080726624, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1423688679933548, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.12059640884399414, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07969633489847183, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.0714922845363617, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.09263383597135544, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.08517385274171829, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.08238144218921661, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.063808873295784, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.058594703674316406, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04776502400636673, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.041587766259908676, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03916631266474724, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03860079124569893, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.024255411699414253, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02180495485663414, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.021635552868247032, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.018960438668727875, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.018465174362063408, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.013946245424449444, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.015519496984779835, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.013235787861049175, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.012528770603239536, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04776502400636673, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04776502400636673, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.20699353516101837, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.19535109400749207, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.19172026216983795, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.17471009492874146, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09665894508361816, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.092894047498703, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10752718895673752, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09923521429300308, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09796933084726334, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08809038996696472, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08343548327684402, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.054668426513671875, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04742465168237686, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04626495763659477, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04598677530884743, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.027373945340514183, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.023869305849075317, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02378850057721138, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.022150814533233643, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.021846318617463112, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.014596095308661461, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.014502836391329765, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.014217064715921879, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009751679375767708, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04742465168237686, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04742465168237686, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.24202072620391846, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.22846749424934387, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.22427867352962494, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.2044171541929245, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.113071508705616, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10868231952190399, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.1256980299949646, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.1160455197095871, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11459749191999435, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.10311590880155563, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0976230576634407, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06381439417600632, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05539826303720474, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.054055105894804, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05373997986316681, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031985871493816376, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.027639465406537056, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.027548344805836678, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.025599315762519836, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.025248536840081215, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.016954578459262848, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.016412708908319473, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.016524625942111015, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.010558601468801498, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031985871493816376, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031985871493816376, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.19615334272384644, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.18178628385066986, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.17595236003398895, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.1609894335269928, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.08965840935707092, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.08444412797689438, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.10333909839391708, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.09511559456586838, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.09141851961612701, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08131606876850128, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.07820066809654236, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.05270487815141678, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.045849576592445374, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.043449800461530685, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04287314787507057, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.026728682219982147, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.023573705926537514, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.02336600050330162, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.022047603502869606, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.021707532927393913, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.01519160345196724, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.016093404963612556, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.01448561530560255, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.012461286038160324, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.045849576592445374, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.045849576592445374, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.06519536674022675, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.06041038781404495, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.05807559937238693, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.052083469927310944, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.030498087406158447, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.028471900150179863, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.03550435230135918, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.0329580083489418, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.03110891953110695, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.02714572846889496, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.025471840053796768, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.01808714121580124, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.015829307958483696, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.01469612680375576, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.014423200860619545, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.00906845461577177, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.007683653384447098, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.00756227457895875, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.007015960291028023, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.006798162125051022, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.004811236634850502, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.0049380166456103325, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.004462455399334431, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.003405272029340267, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.052083469927310944, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.052083469927310944, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.06077399477362633, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.05627470090985298, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.053752314299345016, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.04812675341963768, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.028205759823322296, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.02616858296096325, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.03332554176449776, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.03098367713391781, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.028772225603461266, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.02509467303752899, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.02358342334628105, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.016873087733983994, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.014791556634008884, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.013530846685171127, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.013233181089162827, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.008438028395175934, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.0069455355405807495, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.006801999174058437, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.006315979175269604, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.006074689794331789, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.004408183973282576, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.004336116369813681, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.004034160636365414, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.002739453222602606, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.04812675341963768, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.04812675341963768, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.21738114953041077, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.20243209600448608, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.1973143368959427, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.17749814689159393, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.10167894512414932, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.09643983095884323, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.11521840840578079, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.10577040165662766, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.10347611457109451, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09086817502975464, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.0852874219417572, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.058560486882925034, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05044076591730118, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.0485827662050724, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.04811481758952141, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.02919677458703518, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.024570757523179054, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.02443082258105278, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.02231406979262829, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.021873122081160545, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.014909074641764164, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.014275053516030312, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.014110672287642956, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.00846733059734106, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05044076591730118, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05044076591730118, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.1729223132133484, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.14678144454956055, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.13544714450836182, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1093982458114624, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07959040254354477, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06916458159685135, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.0971912071108818, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.08936147391796112, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.0821085199713707, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06181957945227623, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05562108755111694, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.050265196710824966, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.043890099972486496, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.039541568607091904, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.038450900465250015, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.0257688257843256, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.022566523402929306, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.022146426141262054, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.019533850252628326, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.018717512488365173, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.015126854181289673, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.01688672974705696, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.013878817670047283, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.013892577961087227, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.050265196710824966, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.050265196710824966, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.19743390381336212, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.18637651205062866, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.18288055062294006, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1666066199541092, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.0923796072602272, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.0887797549366951, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10270938277244568, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09487160295248032, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09362796694040298, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08421750366687775, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07976307719945908, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.052325040102005005, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04541315883398056, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.044300589710474014, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04403158277273178, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.0261618010699749, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.023009376600384712, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.022928699851036072, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.0213874951004982, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02110157534480095, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.013945507816970348, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.014216755516827106, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013575224205851555, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009840183891355991, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.052325040102005005, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.052325040102005005, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.24084508419036865, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.22743524610996246, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.22324055433273315, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.20347967743873596, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.112550750374794, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10815808922052383, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12501446902751923, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11555437743663788, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11408881843090057, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.10262788087129593, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09711287170648575, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.0634530633687973, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0551290400326252, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.053782932460308075, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05346447974443436, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031738556921482086, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.027425937354564667, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02733454667031765, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.025387398898601532, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.025033965706825256, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.016592692583799362, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.016185320913791656, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.01614067703485489, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.010253936052322388, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031738556921482086, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031738556921482086, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.19755491614341736, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.18188543617725372, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.17574676871299744, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.16033804416656494, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.09013615548610687, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.08448753505945206, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.10421469807624817, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.09580022841691971, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.09215199202299118, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08122382313013077, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.07810333371162415, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.05321395397186279, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04628561809659004, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.0437927320599556, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04318903386592865, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.027024492621421814, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.023909607902169228, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.0236944742500782, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.022283464670181274, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.021933719515800476, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.015423828735947609, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.016505302861332893, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.014704101718962193, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.012928519397974014, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04628561809659004, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04628561809659004, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.07128562033176422, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.06626249849796295, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.06395383924245834, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.05748949944972992, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.033332839608192444, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.03126192465424538, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.038517650216817856, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.03572024032473564, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.03395146504044533, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.029789051041007042, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.02794964797794819, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.019598007202148438, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.01709485612809658, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.01600032113492489, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.01573936827480793, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.009797938168048859, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.008251369930803776, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.008136135526001453, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.007529852911829948, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.00731070339679718, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.005130713805556297, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.005119269713759422, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.004784931894391775, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0033413914497941732, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.038517650216817856, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.038517650216817856, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.06172287464141846, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.057254258543252945, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.05476902797818184, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.04912678897380829, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.02862117812037468, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.026620037853717804, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.03381916880607605, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.03137751668691635, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.02917570434510708, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.025540318340063095, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.024003349244594574, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.01709607243537903, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.014988334849476814, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.013743751682341099, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.013433244079351425, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.008582268841564655, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.007066299673169851, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.0069182091392576694, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.006438474170863628, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.006198415067046881, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.004503547213971615, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.004420860670506954, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.004127743188291788, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0028175166808068752, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.04912678897380829, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.04912678897380829, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.22787317633628845, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.21254965662956238, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.20751582086086273, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.18698889017105103, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.10667350888252258, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10133256018161774, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.12030406296253204, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11067524552345276, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.10845811665058136, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09559791535139084, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.08979999274015427, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.06114053353667259, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.052838973701000214, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05099313333630562, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.050545670092105865, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.030519848689436913, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.025825899094343185, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.025679728016257286, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.023499418050050735, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.023051319643855095, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01565255969762802, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.015048264525830746, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.01490655355155468, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.009017360396683216, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05099313333630562, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05099313333630562, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.16771870851516724, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.14410163462162018, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.13661549985408783, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.11344272643327713, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07634732872247696, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06838679313659668, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08914439380168915, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.08154214918613434, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.07927676290273666, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06176215037703514, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05360700935125351, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04578527435660362, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.03990652412176132, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03762498125433922, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.037067994475364685, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.023419082164764404, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.021043112501502037, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.02090948075056076, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.018446914851665497, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.017954738810658455, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.013633090071380138, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.015092944726347923, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.012965243309736252, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.012284429743885994, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04578527435660362, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04578527435660362, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.19524383544921875, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.18412773311138153, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.180632546544075, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1645534336566925, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09134489297866821, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08774091303348541, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10169464349746704, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09387216717004776, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09262390434741974, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08321565389633179, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07871776074171066, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05172739177942276, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.044899411499500275, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.0437610000371933, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.043491121381521225, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.025891520082950592, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.022610444575548172, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.022529274225234985, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.020969338715076447, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.020675400272011757, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.013740952126681805, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.013786944560706615, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013368029147386551, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.00931523460894823, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05172739177942276, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05172739177942276, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.24329136312007904, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.22958961129188538, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.22523407638072968, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.20526868104934692, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.11382760852575302, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10933252424001694, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12645745277404785, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11688807606697083, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11538000404834747, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.10368087142705917, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0980411171913147, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06425628066062927, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05583295598626137, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.054440103471279144, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05411361902952194, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03214975818991661, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02784123457968235, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.027745693922042847, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.025767732411623, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.025400476530194283, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01688537746667862, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01654316671192646, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.016428979113698006, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.010634856298565865, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03214975818991661, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03214975818991661, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.1955333650112152, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.1802728772163391, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.17414233088493347, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.15859432518482208, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.08933210372924805, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.0838860422372818, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.1033746674656868, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.09494705498218536, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.09118437021970749, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08052164316177368, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.07720930129289627, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.05265434831380844, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04601970687508583, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.04359785094857216, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04300275817513466, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.02685445174574852, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.024083247408270836, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.02386580966413021, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.022488806396722794, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.022154761478304863, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.01542549580335617, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.016916440799832344, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.01472869049757719, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.013534236699342728, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04601970687508583, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04601970687508583, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.0800568163394928, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.0746094286441803, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.07203418761491776, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.06488577276468277, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.037548039108514786, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.03527090698480606, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.0433511808514595, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.04021549969911575, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.03822748735547066, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.03363696113228798, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.031609907746315, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.02206377312541008, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.019278695806860924, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.018035436049103737, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.0177316777408123, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.0110408253967762, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.009315603412687778, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.009183075278997421, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.008522991091012955, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.008276034146547318, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.005774776451289654, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.005811587441712618, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.005377736873924732, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0038303942419588566, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.0433511808514595, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.0433511808514595, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.06306583434343338, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.058671142905950546, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.05639512091875076, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.05076812207698822, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.02933424897491932, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.027447599917650223, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.03437250852584839, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.03187015652656555, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.029856793582439423, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.026255473494529724, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.024713914841413498, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.017338570207357407, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.015178076922893524, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.014086754992604256, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.013817167840898037, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.008692396804690361, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.007240192499011755, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.007116310764104128, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.006611323915421963, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.00639900891110301, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.0045425728894770145, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.004485929850488901, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.004203700926154852, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0028645452111959457, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.05076812207698822, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.05076812207698822, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.22994928061962128, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.21472275257110596, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.20985786616802216, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.18926924467086792, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.10775180160999298, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10250888019800186, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.12130393832921982, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11166514456272125, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.10956413298845291, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09665172547101974, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.09082646667957306, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.06167376786470413, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05329737812280655, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.051482319831848145, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.051060985773801804, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.030799832195043564, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.026036689057946205, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.025907132774591446, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.023716820403933525, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.023272084072232246, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.015731342136859894, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.015117645263671875, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.014994552358984947, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.008981726132333279, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.051482319831848145, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.051482319831848145, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.1490156054496765, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.1282518208026886, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.12107248604297638, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.09895551949739456, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.06802333146333694, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.0609528012573719, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08046796172857285, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.07321269810199738, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.07057972997426987, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.053991448134183884, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.04846474528312683, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.041756901890039444, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0362362414598465, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03392009809613228, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03336554765701294, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.021722877398133278, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.019544363021850586, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.019393669441342354, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.01716786064207554, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.016706667840480804, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.013248293660581112, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.014601638540625572, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.012596477754414082, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.012300914153456688, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.04846474528312683, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, "best_option": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.04846474528312683, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } } }, { "key": "model.layers.19.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.1766899973154068, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.16666333377361298, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.16347594559192657, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1488896906375885, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.08281775563955307, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.07949899882078171, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.09216731786727905, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.08513852953910828, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.08396489918231964, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.07540865242481232, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07134358584880829, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04693777859210968, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.040789924561977386, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.03973788022994995, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.039490677416324615, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.0235038623213768, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.020665615797042847, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.020590128377079964, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.019185202196240425, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.018921038135886192, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.012559954077005386, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.012803012505173683, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.012224727310240269, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.008889377117156982, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04693777859210968, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04693777859210968, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.23912329971790314, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.22561343014240265, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.22140391170978546, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.2017674297094345, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.11188194155693054, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10743915289640427, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12437017261981964, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11496586352586746, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11342711001634598, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.10188955068588257, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09643707424402237, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.0632074847817421, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05489196255803108, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.053492847830057144, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05317321792244911, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03158515319228172, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.027319487184286118, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.027222739532589912, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.025270190089941025, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.024906013160943985, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.016480514779686928, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.016181079670786858, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.016026541590690613, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.010323370806872845, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03158515319228172, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03158515319228172, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.18401025235652924, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.16872592270374298, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.16278427839279175, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.14790743589401245, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.08388025313615799, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.07846924662590027, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.09744541347026825, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.08927325904369354, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.08569003641605377, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.0753122866153717, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.07218066602945328, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.04962795227766037, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.043480899184942245, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.0411311611533165, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04055750370025635, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.02559780329465866, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.023023009300231934, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.022802410647273064, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.021510256454348564, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.021170498803257942, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.015243354253470898, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.016504427418112755, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.014602444134652615, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.013448568992316723, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.04962795227766037, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.04962795227766037, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.0868181437253952, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.08106758445501328, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.07818922400474548, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.07051800191402435, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.0406765453517437, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.038180574774742126, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.047133076936006546, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.043770402669906616, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.041375089436769485, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.03654317557811737, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.03438291326165199, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.02400827780365944, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.02096044272184372, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.019511675462126732, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.019160358235239983, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.011997908353805542, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.010035118088126183, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.009873570874333382, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.009193915873765945, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.00890918355435133, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.006252564024180174, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.006218724884092808, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.0057877409271895885, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.004002041183412075, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.047133076936006546, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.047133076936006546, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.07760342955589294, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.07226648181676865, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.06997619569301605, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.06294287741184235, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.03612152114510536, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.03402983397245407, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.04146623611450195, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.038373950868844986, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.036771055310964584, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.032358672469854355, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.03037627786397934, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.020991096273064613, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.018332241103053093, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.017303837463259697, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.017055803909897804, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.010496069677174091, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.0088436808437109, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.008739065378904343, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.008069249801337719, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.007857128977775574, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.0054562813602387905, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.005356819834560156, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.005138726904988289, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0033657944295555353, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.04146623611450195, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.04146623611450195, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.22681768238544464, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.2120383381843567, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.20716986060142517, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.18703268468379974, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.10637985169887543, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10124880075454712, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.11959806084632874, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11024711281061172, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.10810519754886627, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09557492285966873, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.08979835361242294, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.06084553152322769, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.052711546421051025, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05086896941065788, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.05043993890285492, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.030343426391482353, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.025807736441493034, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.025668693706393242, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.02354195900261402, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.023105306550860405, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.015579993836581707, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.015089381486177444, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.014889953657984734, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.00915494468063116, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05086896941065788, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05086896941065788, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.17408804595470428, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.15690268576145172, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.15056537091732025, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.12260232865810394, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.0806686133146286, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.07466074079275131, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.0936339944601059, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.0860505998134613, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.08259707689285278, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06756065785884857, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.0575808621942997, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04821278899908066, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.042381029576063156, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03991680219769478, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03932316228747368, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.024673327803611755, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.022653933614492416, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.022457638755440712, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.020389854907989502, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.019868506118655205, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.0144779272377491, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.01658223196864128, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.013805687427520752, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.013763556256890297, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04821278899908066, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04821278899908066, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.16551180183887482, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.15616239607334137, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.15308181941509247, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.13954104483127594, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.07783328741788864, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.07470647245645523, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.08653660118579865, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.0801038071513176, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.07890196889638901, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.07092125713825226, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.06719937175512314, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.044328659772872925, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.038688790053129196, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.03766743838787079, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.03742872178554535, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02222132310271263, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02012106031179428, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02004942297935486, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.01880182884633541, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.01855352334678173, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.012136640027165413, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.013199158944189548, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.011819258332252502, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009979363530874252, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.044328659772872925, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.044328659772872925, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.23738962411880493, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.22408273816108704, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.21975329518318176, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.20041613280773163, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.11176801472902298, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10732418298721313, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.1243581622838974, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11496185511350632, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11328023672103882, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.10187104344367981, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0964396595954895, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06358939409255981, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05536225438117981, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05392417311668396, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05359935015439987, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031872913241386414, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02853742241859436, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.028433991596102715, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.026613352820277214, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.026265636086463928, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.017306186258792877, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01837124116718769, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.016848241910338402, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.01354700792580843, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031872913241386414, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031872913241386414, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.07936953008174896, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.07217013835906982, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.06667489558458328, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.060130901634693146, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.035947296768426895, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.03283499553799629, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.04888635128736496, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.042569562792778015, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.0369107723236084, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.03162069991230965, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.030130349099636078, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.021977154538035393, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.019321050494909286, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.017725065350532532, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.017338313162326813, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.011364081874489784, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.00995555054396391, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.009801792912185192, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.009182876907289028, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.008956811390817165, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.006791994441300631, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.007237025070935488, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.006387912668287754, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.005849659908562899, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.04888635128736496, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.04888635128736496, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.07736147940158844, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.07228289544582367, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.0701390951871872, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.06338722258806229, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.03643886744976044, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.03445759788155556, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.042674023658037186, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.03853895887732506, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.03706209361553192, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.032785579562187195, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.03130202740430832, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.021810611709952354, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.018529290333390236, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.01757720299065113, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.017350099980831146, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.010976150631904602, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.009096160531044006, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.009001844562590122, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.008358129300177097, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.008163772523403168, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.005791950970888138, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.005624695681035519, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.005492608994245529, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0037367905024439096, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.042674023658037186, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.042674023658037186, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.07456576079130173, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.06943725794553757, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.06703434139490128, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.06039735674858093, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.03475819528102875, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.03267570212483406, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.04028717428445816, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.03723011165857315, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.035388872027397156, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.031161075457930565, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.02933064103126526, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.020393457263708115, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.0177839957177639, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.01667236164212227, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.016403382644057274, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.010198215954005718, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.008530882187187672, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.00841585360467434, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.007793878670781851, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.0075702546164393425, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.005315812770277262, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.005208464339375496, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.00498307915404439, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0032851961441338062, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.04028717428445816, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.04028717428445816, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.21979451179504395, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.20560596883296967, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.2007257044315338, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.18144187331199646, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.10322875529527664, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.0982198640704155, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.11621373891830444, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.10717529058456421, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.10492445528507233, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09282545745372772, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.08729679137468338, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.05918911471962929, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05119352415204048, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.049353163689374924, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.04894980788230896, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.029553722590208054, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.024977203458547592, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.024815039709210396, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.022785108536481857, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.022338291630148888, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.015111664310097694, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.014535638503730297, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.014397570863366127, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.008662242442369461, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05119352415204048, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05119352415204048, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.18888770043849945, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.16375988721847534, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.15615646541118622, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.12982606887817383, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.08680988848209381, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.07856693863868713, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.0996672734618187, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.09139733016490936, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.08955152332782745, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06957817822694778, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06224960461258888, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05140328034758568, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04523702710866928, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04320776090025902, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04273059219121933, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.026580285280942917, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02484245039522648, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.024741191416978836, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02192007564008236, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.021477067843079567, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.015935715287923813, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.01840512454509735, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.01535522285848856, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.01557063590735197, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05140328034758568, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05140328034758568, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.16912095248699188, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.15971602499485016, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.15671269595623016, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1430199146270752, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.0796259418129921, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.0764862447977066, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.08847519755363464, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.08183795213699341, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.08069343864917755, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.07262907922267914, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.06886184215545654, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.045381542295217514, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.039543185383081436, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.03855106979608536, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.038317468017339706, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02276146039366722, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.020603928714990616, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02053341269493103, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.01926981471478939, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.0190302524715662, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.012489807792007923, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.013522894121706486, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.012172079645097256, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.010250615887343884, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.045381542295217514, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.045381542295217514, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.23174729943275452, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.21889802813529968, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.21484005451202393, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19603005051612854, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10853447020053864, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10427448153495789, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12056764215230942, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11150912195444107, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11001141369342804, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09897219389677048, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09365099668502808, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.061201322823762894, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05322229862213135, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05188097059726715, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.051553864032030106, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.030573414638638496, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02645113877952099, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02635425701737404, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02448459528386593, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.024135924875736237, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015912270173430443, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.0156114362180233, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015463316813111305, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.00987598579376936, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05188097059726715, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05188097059726715, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.21.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.18121787905693054, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.16551673412322998, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.15962710976600647, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.1443566530942917, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.08262761682271957, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.07694366574287415, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.09628846496343613, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.08787623047828674, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.08453547209501266, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.0735742524266243, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.07060880213975906, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.049362894147634506, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04269792139530182, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.0403447300195694, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.039754509925842285, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.025462213903665543, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.022298607975244522, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.02210731990635395, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.02067730389535427, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.020345255732536316, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.01513562723994255, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.015705259516835213, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.014459040015935898, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.012560575269162655, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.049362894147634506, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.049362894147634506, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.0730493888258934, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.06824427098035812, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.0661693587899208, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.05982910469174385, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.03440222889184952, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.032497357577085495, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.040700726211071014, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.036441538482904434, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.03499140217900276, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.03096850775182247, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.02987268753349781, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.020956726744771004, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.017562462016940117, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.016626674681901932, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.016399459913372993, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.010577707551419735, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.008619406260550022, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.008525380864739418, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.007929026149213314, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.007739011198282242, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.005599383730441332, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.0053493427112698555, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.005314767360687256, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0035623747389763594, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.040700726211071014, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.040700726211071014, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.06985271722078323, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.06502795219421387, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.06257544457912445, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.056259915232658386, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.03250567987561226, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.030441008508205414, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.0378047339618206, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.03502620756626129, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.033078696578741074, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.0290919728577137, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.02734050154685974, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.019136536866426468, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.016753923147916794, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.015587340109050274, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.015308771282434464, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.009575544856488705, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.008014258928596973, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.007895056158304214, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.00731926504522562, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.00709544075652957, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.0050224680453538895, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.00495951808989048, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.004677726421505213, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0031915027648210526, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.0378047339618206, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.0378047339618206, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.2205304354429245, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.2062806785106659, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.20150026679039001, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.18197843432426453, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.10349924862384796, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.09846258908510208, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.11669301241636276, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.10745816677808762, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.10523271560668945, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09298515319824219, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.08762633055448532, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.05942042917013168, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.051350634545087814, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.04951327294111252, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.04907132685184479, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.02964363433420658, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.025080550462007523, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.024923987686634064, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.022862518206238747, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.02242789790034294, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.015164578333497047, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.014607482589781284, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.014435033313930035, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.008740785531699657, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.051350634545087814, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.051350634545087814, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.1686016321182251, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.1505778431892395, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.14549462497234344, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1257058084011078, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07702910155057907, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.07142207026481628, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08860407769680023, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.08046114444732666, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.07910647243261337, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06540298461914062, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06030876561999321, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04574282094836235, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.040587469935417175, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.039068497717380524, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03870577737689018, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.02457093819975853, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02347657084465027, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.023405153304338455, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.021551819518208504, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.021256733685731888, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.01580512709915638, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.018166767433285713, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.015437483787536621, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.015998102724552155, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04574282094836235, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04574282094836235, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.16337180137634277, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.15433432161808014, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.15136387944221497, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.13822689652442932, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.07662015408277512, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.07358574122190475, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.08526047319173813, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.07885917276144028, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.0776582583785057, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.06991573423147202, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.06627285480499268, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04348558187484741, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.037826258689165115, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.03680993989109993, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.03656603395938873, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02175399661064148, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.019206751137971878, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.01913030631840229, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.01787959225475788, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.01762867346405983, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.011600730009377003, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.012012640945613384, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.011264780536293983, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.00845206156373024, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04348558187484741, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04348558187484741, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.22427964210510254, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2119484394788742, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.20798134803771973, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.18995732069015503, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10521053522825241, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.1010938435792923, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11722857505083084, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.10819002240896225, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10663371533155441, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09601138532161713, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09102340042591095, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05975573509931564, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.051757778972387314, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.050427697598934174, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.0500984862446785, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.029891587793827057, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02606201171875, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.025971123948693275, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.024216417223215103, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.023880550637841225, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01589857041835785, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015927283093333244, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.01544796209782362, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.01081026066094637, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.051757778972387314, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.051757778972387314, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.03767954930663109, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.03399723395705223, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.03256601840257645, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.029328422620892525, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.01731359027326107, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.015939917415380478, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.02055360935628414, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.018644655123353004, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.017770124599337578, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.01524955965578556, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.01457764208316803, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.01053939014673233, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.009215348400175571, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.008610926568508148, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.008461984805762768, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.005482505541294813, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.004966840613633394, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.004908916540443897, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.0046158600598573685, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.004521950613707304, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.0033373255282640457, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.00371616892516613, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.0031750944908708334, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.003112725680693984, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.03767954930663109, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.03767954930663109, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.06808502972126007, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.06344801187515259, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.06146909296512604, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.055413618683815, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.03188256919384003, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.03005298599600792, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.03663479536771774, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.033823538571596146, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.03244517371058464, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.02855715900659561, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.02687845565378666, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.018631987273693085, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.01617281511425972, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.015293678268790245, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.015085053630173206, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.00931584183126688, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.007855704054236412, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.007768326438963413, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.007175179198384285, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.006991253234446049, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.0048519643023610115, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.004802415147423744, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.004563535563647747, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0030932454392313957, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.03663479536771774, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.03663479536771774, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.07051093131303787, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.0655023530125618, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.063033826649189, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.05657416954636574, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.03271663561463356, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.030573274940252304, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.038005780428647995, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.03533671423792839, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.0333196297287941, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.02922886610031128, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.027426205575466156, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.01924103870987892, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.016874391585588455, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.01567293331027031, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.01538103073835373, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.009615283459424973, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.008018435910344124, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.007881782948970795, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.007300710771232843, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.007059480529278517, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.005003231577575207, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.004908574279397726, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.004644062370061874, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0030657490715384483, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.038005780428647995, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.038005780428647995, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.20556651055812836, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.1920468658208847, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.18734754621982574, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.16907504200935364, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.09620031714439392, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.09139373898506165, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.10877157002687454, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.10010118782520294, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.09782416373491287, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.0863763839006424, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.08122952282428741, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.05538318306207657, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04777536913752556, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.04597495496273041, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.045553527772426605, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.027644017711281776, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.023258335888385773, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.023107115179300308, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.021197352558374405, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.020777126774191856, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.014135394245386124, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.013547012582421303, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.013412736356258392, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.008057201281189919, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04777536913752556, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04777536913752556, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.19522260129451752, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.17493963241577148, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.16751360893249512, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.14883746206760406, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.08923063427209854, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.08216635137796402, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.10429921001195908, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.09592372179031372, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.0922476202249527, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.07785093039274216, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.07195411622524261, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05389827862381935, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04687486216425896, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.043878354132175446, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.043158501386642456, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.027411848306655884, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02453608252108097, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.024315090849995613, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.022509310394525528, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.021958325058221817, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.015841729938983917, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.017626047134399414, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.014959723688662052, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.014309033751487732, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04687486216425896, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04687486216425896, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.17649777233600616, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.1668255776166916, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.16376793384552002, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1495250016450882, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.08262359350919724, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.07944205403327942, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.09173844009637833, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.08484145998954773, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.08373597264289856, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.07543999701738358, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07153479009866714, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.0467044971883297, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04056617245078087, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.03957526013255119, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.03934142738580704, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.023360196501016617, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.020433930680155754, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02036970853805542, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.01898704655468464, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.01873188279569149, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.012384146451950073, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.012441113591194153, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.012065611779689789, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.008395014330744743, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.0467044971883297, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.0467044971883297, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.2278326153755188, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2152920961380005, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.21145786345005035, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1931130737066269, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10651854425668716, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.1024341732263565, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11843353509902954, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.10932108014822006, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10796628147363663, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09726738184690475, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0923440158367157, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06015831604599953, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05219258368015289, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.050932541489601135, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05063420906662941, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03008430451154709, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.026034507900476456, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.025950273498892784, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.024138523265719414, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.0238096471875906, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01577516458928585, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015449493192136288, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015362035483121872, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009913596324622631, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05219258368015289, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05219258368015289, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.20190350711345673, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.18523551523685455, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.17915409803390503, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.16202622652053833, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.09236571937799454, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.08645603060722351, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.10652428865432739, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.09734007716178894, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.09424959868192673, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08222030103206635, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.07851589471101761, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.054316695779561996, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04702722281217575, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.044788558036088943, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.044253360480070114, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.02773422747850418, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.02432604692876339, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.024145185947418213, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.022465219721198082, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.022152744233608246, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.015944959595799446, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01658668927848339, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.015253936871886253, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.012895106337964535, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04702722281217575, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04702722281217575, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.08473436534404755, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.07901640236377716, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.07679802179336548, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.06925468146800995, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.03963378816843033, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.037507131695747375, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.04500171169638634, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.04165557771921158, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.04032936319708824, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.03553321585059166, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.03336026147007942, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.022885099053382874, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.01992916315793991, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.01901361532509327, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.018791504204273224, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.011437127366662025, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.009803112596273422, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.009721514768898487, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.008973921649158001, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.008776138536632061, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.005976726766675711, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.006025387905538082, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.005674517713487148, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.003987886011600494, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.04500171169638634, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.04500171169638634, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.08550874888896942, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.0795091912150383, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.07688000053167343, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.0691184401512146, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.03963477164506912, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.0372827984392643, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.0455814003944397, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.042207375168800354, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.04034649580717087, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.035482391715049744, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.03332408890128136, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.023122230544686317, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.020171605050563812, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.018969209864735603, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.018690815195441246, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.011549542658030987, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.009713378734886646, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.009595423936843872, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.008870420046150684, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.008630677126348019, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.0060134995728731155, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.005921138916164637, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.005650635343044996, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0037640624213963747, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.0455814003944397, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.0455814003944397, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.19986361265182495, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.18681465089321136, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.18239721655845642, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.1646519899368286, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.0932173952460289, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.08876791596412659, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.10490157455205917, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.09691172093153, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.09483586251735687, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.08385612815618515, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.07874137163162231, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.05325530469417572, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04622172564268112, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.04456412047147751, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.04414327070116997, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.026571199297904968, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.022550532594323158, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.02241242676973343, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.020568421110510826, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.020169463008642197, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.013598380610346794, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.013143608346581459, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.012975933961570263, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.007865643128752708, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04622172564268112, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04622172564268112, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.19814230501651764, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.17633260786533356, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.16918958723545074, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.14531050622463226, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.090785451233387, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.08348099887371063, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.10478843003511429, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.09666559100151062, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.09369037300348282, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.07713142782449722, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06961502134799957, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.054076701402664185, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04737582802772522, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04476233944296837, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.044133950024843216, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.027670077979564667, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02516249381005764, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.025005068629980087, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.022752780467271805, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.022231915965676308, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.016123570501804352, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.018146943300962448, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.015370385721325874, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.014880665577948093, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04737582802772522, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04737582802772522, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.18105743825435638, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.1709916740655899, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.16782374680042267, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.15315788984298706, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.08474097400903702, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08144045621156693, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.09403704106807709, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.08702270686626434, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.08590507507324219, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.07729079574346542, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07320819050073624, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.047887180000543594, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04159995913505554, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.040582966059446335, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.040338922291994095, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.023921433836221695, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.020949741825461388, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.020879492163658142, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.019450819119811058, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.019186314195394516, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.012635006569325924, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.012747634202241898, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.012299769558012486, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.008595293387770653, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.047887180000543594, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.047887180000543594, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.23192955553531647, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.21912115812301636, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.21511583030223846, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19639940559864044, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10835375636816025, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10412321984767914, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12078002095222473, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11119338124990463, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.1098063737154007, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09882794320583344, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09376131743192673, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06123778969049454, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05309275910258293, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05180717632174492, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05150234326720238, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03073778562247753, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.026487575843930244, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.026398098096251488, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.024539008736610413, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.024199899286031723, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.016277190297842026, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015725206583738327, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.01585971936583519, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.010105148889124393, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05180717632174492, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05180717632174492, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.24.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.206402987241745, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.1898224651813507, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.18370096385478973, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.16598351299762726, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.09446409344673157, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.08860909938812256, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.10825538635253906, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.09950840473175049, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.09635820239782333, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08424834907054901, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.08023836463689804, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.05512252077460289, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.047915346920490265, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.045668914914131165, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.045139607042074203, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.027835329994559288, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.02456429786980152, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.02437233179807663, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.022663362324237823, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.02233966812491417, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.015547866933047771, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.016486508771777153, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.014879902824759483, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.012560068629682064, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.047915346920490265, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.047915346920490265, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.08331633359193802, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.07784710824489594, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.0757342278957367, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.06835246086120605, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.03897469863295555, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.036956362426280975, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.044319022446870804, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.04090554267168045, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.03963882848620415, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.03502185270190239, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.03296492248773575, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.022540969774127007, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.019566811621189117, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.018702423200011253, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.018494872376322746, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.011296604759991169, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.009632019326090813, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.009552828036248684, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.00882552657276392, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.008635308593511581, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.005935321561992168, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.0058868275955319405, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.005649931263178587, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0038671570364385843, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.044319022446870804, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.044319022446870804, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.08378814160823822, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.07806634157896042, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.07558473944664001, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.0680074691772461, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.03888250142335892, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.036652013659477234, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.04455021023750305, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.04126398637890816, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.03953058645129204, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.03482286259531975, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.032676368951797485, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.02257278375327587, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.0197160542011261, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.018619123846292496, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.018356911838054657, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.0113036734983325, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.009550211951136589, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.009430354461073875, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.008724258281290531, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.008489411324262619, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.0059069241397082806, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.005818104837089777, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.005561661906540394, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.003706955583766103, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.04455021023750305, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.04455021023750305, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.21838213503360748, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.2043820172548294, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.1996987909078598, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.18036292493343353, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.102029949426651, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.09721678495407104, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.11453379690647125, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.10571780800819397, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.10368844866752625, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09187747538089752, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.08629391342401505, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.058183703571558, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05046914890408516, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.04874751716852188, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.048338137567043304, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.029026396572589874, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.024633683264255524, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.024515509605407715, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.02250140905380249, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.02208832837641239, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.014853246510028839, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.0142990592867136, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.014205207116901875, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.008507267571985722, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05046914890408516, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05046914890408516, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.20734629034996033, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.18374541401863098, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.17605504393577576, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1547529697418213, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.09484105557203293, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.08667175471782684, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.10947262495756149, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.10059376806020737, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.09789136052131653, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.08046677708625793, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.07477200031280518, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05633653327822685, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0490366630256176, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04645663872361183, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04584552347660065, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.02869858220219612, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.025671010836958885, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.025510963052511215, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.023060685023665428, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.022539805620908737, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.016521593555808067, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.018051976338028908, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.015733540058135986, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.014452235773205757, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0490366630256176, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0490366630256176, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.17561277747154236, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.16572976112365723, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.1626182198524475, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.14823174476623535, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.08220837265253067, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.07894649356603622, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.09128788858652115, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.08445088565349579, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.0833434984087944, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.07491283863782883, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07089689373970032, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04647861793637276, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.040412019938230515, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.039397671818733215, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.03915077820420265, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.023223813623189926, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.020401913672685623, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02033316344022751, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.01894293911755085, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.018686257302761078, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.012292744591832161, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.012517609633505344, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.011958330869674683, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.008562175557017326, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04647861793637276, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04647861793637276, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.2271786779165268, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.21441224217414856, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.21041043102741241, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19190259277820587, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10625042766332626, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10205494612455368, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11787134408950806, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.1091248020529747, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.107711561024189, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09684890508651733, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0915980264544487, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05991550534963608, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.052062828093767166, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.0507626086473465, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05045916140079498, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.029914040118455887, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.025876518338918686, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.025790071114897728, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02394193783402443, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.023602696135640144, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015578452497720718, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015250363387167454, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015148435719311237, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009631998836994171, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.052062828093767166, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.052062828093767166, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.2055780440568924, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.18778850138187408, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.18146991729736328, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.16311675310134888, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.09386870265007019, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.08770596235990524, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.10808638483285904, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.09894703328609467, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.09589794278144836, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08308392018079758, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.07899729907512665, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.05508659780025482, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04776708409190178, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.04551425576210022, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04498075321316719, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.028023889288306236, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.02469281293451786, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.024508319795131683, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.022709151729941368, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.022385673597455025, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.01604057475924492, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.016818087548017502, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.01538841426372528, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.013045706786215305, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04776708409190178, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04776708409190178, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.08481285721063614, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.07920902967453003, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.07704499363899231, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.06953918933868408, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.039679840207099915, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.03757915273308754, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.04523913934826851, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.04169332608580589, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.040350575000047684, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.03563791885972023, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.0335826650261879, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.02300543710589409, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.019980300217866898, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.019054099917411804, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.018831849098205566, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.01153179258108139, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.009853354655206203, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.00976647064089775, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.009040259756147861, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.008842973969876766, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.006074382457882166, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.006088165566325188, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.005777781829237938, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.004075435921549797, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.04523913934826851, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.04523913934826851, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.08416741341352463, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.0783933699131012, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.07597482204437256, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.06830689311027527, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.03900289908051491, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.0367598682641983, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.0446300134062767, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.041379306465387344, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.03968913108110428, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.034961771219968796, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.03282078728079796, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.022601064294576645, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.019749615341424942, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.0186682790517807, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.018401652574539185, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.011291214264929295, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.009563609026372433, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.009453491307795048, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.008728931657969952, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.008503034710884094, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.005862982012331486, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.005813729949295521, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.005519519560039043, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0036967990454286337, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.0446300134062767, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.0446300134062767, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.19953325390815735, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.18670068681240082, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.18220175802707672, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.16456905007362366, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.09318090975284576, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.08873981237411499, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.10468383878469467, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.0967065840959549, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.09467824548482895, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.08381021022796631, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.07871723175048828, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.05316135287284851, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04615926370024681, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.04452115297317505, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.04413476213812828, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.0265304297208786, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.022540444508194923, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.022403912618756294, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.020588936284184456, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.020206691697239876, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.013586766086518764, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.013158720917999744, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.012984169647097588, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.007909758016467094, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04615926370024681, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04615926370024681, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.1690014749765396, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.14986585080623627, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.14134033024311066, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.12007459253072739, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07766378670930862, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.07023008167743683, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.09454771131277084, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.08596272021532059, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.08014050871133804, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06524275243282318, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.059356871992349625, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04832230508327484, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04242303594946861, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03877357393503189, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03785792738199234, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.024994362145662308, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02243620529770851, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.022090379148721695, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02032996341586113, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.019657714292407036, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.014992390759289265, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.016989691182971, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.013970698229968548, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.014239408075809479, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04832230508327484, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04832230508327484, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.1743377149105072, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.16449704766273499, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.16140024363994598, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1472078114748001, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.08150649815797806, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.0782981738448143, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.0904175192117691, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.08376255631446838, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.08264310657978058, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.07428736984729767, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07030045986175537, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04596881940960884, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.040028464049100876, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.03901715576648712, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.038776688277721405, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02295122668147087, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.020117158070206642, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.020047234371304512, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.018662700429558754, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.018400482833385468, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.012049458920955658, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01221926137804985, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.011716247536242008, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.00819613877683878, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04596881940960884, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04596881940960884, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.22293958067893982, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.21039603650569916, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.20647214353084564, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1883000284433365, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10415050387382507, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10002816468477249, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11546508222818375, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.10698875784873962, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10558560490608215, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09491488337516785, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08972864598035812, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05857216194272041, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.051001206040382385, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.0497291162610054, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04942064732313156, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.029223889112472534, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.025276469066739082, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.025187956169247627, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.023367317393422127, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.023032598197460175, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015078742988407612, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01479175966233015, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.014646806754171848, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009186132811009884, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.051001206040382385, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.051001206040382385, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.2123309224843979, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.1943424791097641, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.1878432333469391, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.16885897517204285, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.0971386730670929, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.09075597673654556, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.1110927164554596, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.10225014388561249, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.09922675788402557, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08587586879730225, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.08144419640302658, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.0566122904419899, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04917202144861221, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.04689215123653412, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04634373262524605, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.02853572554886341, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.02509935200214386, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.024916913360357285, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.022990340366959572, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.022659823298454285, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.015824751928448677, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01669210009276867, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.015128704719245434, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.012582916766405106, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04917202144861221, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04917202144861221, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.09237917512655258, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.08578693866729736, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.08325227349996567, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.07473486661911011, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.0430760532617569, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.040636248886585236, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.0489838570356369, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.045356251299381256, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.04389255866408348, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.03843610733747482, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.03599528223276138, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.024933146312832832, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0217321515083313, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.020679479464888573, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.020428122952580452, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.01247275248169899, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.010686103254556656, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.010592170059680939, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.0097524244338274, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.00952593982219696, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.00653552170842886, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.006607494782656431, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.006186345126479864, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.004409384913742542, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.0489838570356369, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.0489838570356369, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.09419047832489014, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.0871843546628952, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.08440476655960083, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.0755627378821373, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.04345930740237236, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.04090610891580582, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.049710988998413086, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.04602827876806259, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.044321686029434204, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.038710035383701324, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.03622649982571602, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.02520062029361725, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.022035904228687286, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.02083921618759632, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.02055564522743225, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.012622944079339504, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.010732009075582027, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.01062107179313898, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.009768667630851269, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.009526047855615616, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.006589365191757679, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.006615266669541597, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.006208476610481739, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.004343921318650246, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.049710988998413086, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.049710988998413086, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.19669470191001892, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.18370282649993896, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.17927369475364685, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.16153185069561005, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.0919017568230629, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.08739946037530899, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.10348668694496155, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.09556278586387634, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.09346817433834076, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.08250901848077774, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.07741408795118332, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.05258937180042267, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04563429579138756, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.04396756738424301, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.04356853663921356, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.02624049037694931, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.022308534011244774, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.022168930619955063, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.02036380022764206, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.019951291382312775, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01347322203218937, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.013100228272378445, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.012859098613262177, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.007975694723427296, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04563429579138756, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04563429579138756, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.20311471819877625, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.1820465326309204, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.17429839074611664, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1497991681098938, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.09352056682109833, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.08619730174541473, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.10923353582620621, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.10012833774089813, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.09605159610509872, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.07952654361724854, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.07208491861820221, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.056214284151792526, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.048595309257507324, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04567249119281769, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04496786743402481, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.028376078233122826, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.024967195466160774, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.024732457473874092, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.022385641932487488, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.02179328352212906, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.01586069166660309, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.017327930778265, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.014945296570658684, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.01357248891144991, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.048595309257507324, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.048595309257507324, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.17841559648513794, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.16822417080402374, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.16501371562480927, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.15028028190135956, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.08345603942871094, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08010122179985046, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.0926542654633522, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.08579909801483154, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.0846358984708786, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.07594876736402512, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07182136178016663, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04711848124861717, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04103478416800499, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.039982106536626816, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.0397346094250679, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.023543542250990868, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02066538669168949, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.020592521876096725, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.019152985885739326, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.018886875361204147, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.012398854829370975, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.012624549679458141, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.012053271755576134, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.00854906439781189, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04711848124861717, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04711848124861717, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.233567014336586, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2202499806880951, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.2160787582397461, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19672852754592896, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.1091843694448471, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10479839891195297, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12117717415094376, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11219160258769989, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11071314662694931, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09933239966630936, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09386684000492096, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.061561428010463715, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.053528793156147, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.0521741583943367, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05185000225901604, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.030758755281567574, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.026593882590532303, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.026504071429371834, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.024564623832702637, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02420830726623535, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015992240980267525, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015681281685829163, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015543242916464806, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009896733798086643, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.0521741583943367, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05185000225901604, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.27.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.21954067051410675, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.20050205290317535, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.19336573779582977, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.1733929067850113, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.10046852380037308, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.09362263977527618, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.11540723592042923, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.10627633333206177, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.10269146412611008, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08857528120279312, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.08384864777326584, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.05891073867678642, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05111771449446678, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.0485597662627697, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04795002192258835, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.029740549623966217, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.026087909936904907, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.025875737890601158, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.0238647423684597, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.023497046902775764, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.016646333038806915, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01748613268136978, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.01588911935687065, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.013268064707517624, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05111771449446678, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05111771449446678, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.09798036515712738, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.09108809381723404, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.08850299566984177, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.07955233007669449, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.045634400099515915, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.04314906895160675, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.051925867795944214, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.04792151600122452, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.04647686704993248, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.04080204665660858, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.03826497495174408, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.026372026652097702, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0229277815669775, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.02189783938229084, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.02165335789322853, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.013202797621488571, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.011311359703540802, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.011218800209462643, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.010332254692912102, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.010106602683663368, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.006929093971848488, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.006974921096116304, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.006589018274098635, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.00465272506698966, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.051925867795944214, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.051925867795944214, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.09162269532680511, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.08483752608299255, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.08213455975055695, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.07361412048339844, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.042246561497449875, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.03979663923382759, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.048366013914346695, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.044785309582948685, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.04310409724712372, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.03772572800517082, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.0353095643222332, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.02451223134994507, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.02141985110938549, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.020293375477194786, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.020012065768241882, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.012272791005671024, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.010456995107233524, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.010351486504077911, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.009540834464132786, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.009303256869316101, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.006426466628909111, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.006468561943620443, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.006062669213861227, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.004278210457414389, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.048366013914346695, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.048366013914346695, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.21012760698795319, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.19627176225185394, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.19172696769237518, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.1729508489370346, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.09806396812200546, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.09336294233798981, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.11000406742095947, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.1015997901558876, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.09971065074205399, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.08803950995206833, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.08264639973640442, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.055847492069005966, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04847372695803642, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.04684890806674957, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.04644973948597908, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.0278506837785244, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.023688070476055145, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.0235732551664114, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.02160949632525444, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.021207818761467934, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.014260580763220787, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.013777405954897404, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.013667836785316467, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.008220160380005836, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04847372695803642, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04847372695803642, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.20216131210327148, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.17685158550739288, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.16865165531635284, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.14641810953617096, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.09201371669769287, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.08340117335319519, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.10736727714538574, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.09833759814500809, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.09542043507099152, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.07730764895677567, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.07063768804073334, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.055280931293964386, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04784870147705078, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04501350596547127, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04435228556394577, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.028116585686802864, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.024777108803391457, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.02460266835987568, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.022084182128310204, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.02150733582675457, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.016026537865400314, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.01734631508588791, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.015155652537941933, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.013729865662753582, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04784870147705078, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04784870147705078, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.17945724725723267, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.1690315157175064, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.1656953990459442, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1507824808359146, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.08405905961990356, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08060579746961594, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.09343290328979492, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.08647023141384125, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.08526725322008133, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.07635879516601562, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07205689698457718, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04753289744257927, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04136279597878456, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04027482494711876, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.040013909339904785, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.023752668872475624, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.020787827670574188, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02071242779493332, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.019237367436289787, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.018958894535899162, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.012499839067459106, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.012669563293457031, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.012141804210841656, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.008536403067409992, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04753289744257927, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04753289744257927, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.23485402762889862, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.22116558253765106, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.21690072119235992, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19729231297969818, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10988222807645798, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10541213303804398, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12200648337602615, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11300314217805862, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11148358881473541, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09982287883758545, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0941518172621727, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06196366995573044, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0539456382393837, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.052535418421030045, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05220498517155647, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03094877116382122, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.026791062206029892, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02669636160135269, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.024719128385186195, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02435356006026268, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.016081076115369797, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015814853832125664, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015610504895448685, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.00999405700713396, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05220498517155647, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05220498517155647, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.28.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.22022707760334015, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.19977042078971863, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.19222567975521088, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.17148928344249725, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.10063120722770691, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.0932503417134285, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.11583727598190308, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.1067076027393341, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.10308923572301865, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08798650652170181, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.08295052498579025, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.059081535786390305, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05134430155158043, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.04857570305466652, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.0479382760822773, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.029720976948738098, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.02603304013609886, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.025803284719586372, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.023668916895985603, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.02325635775923729, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.016443440690636635, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01740454137325287, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.015615562908351421, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.013088974170386791, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05134430155158043, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05134430155158043, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.1176077350974083, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.10967015475034714, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.10674187541007996, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.09618603438138962, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.05490630120038986, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.052053503692150116, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.06213098019361496, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.05746975168585777, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.05585727095603943, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.04926728457212448, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.04621446132659912, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03160630539059639, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.027561558410525322, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.026405036449432373, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.02613152377307415, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.015831448137760162, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.013731628656387329, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.013633152469992638, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.012603689916431904, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.012352054938673973, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.008360340259969234, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.008586086332798004, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.007982457056641579, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.005917116068303585, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.04926728457212448, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.04926728457212448, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.10086064040660858, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.09368714690208435, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.09082962572574615, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.08150357007980347, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.046622369438409805, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.04399596527218819, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.0532677136361599, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.04924473911523819, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.047480449080467224, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.041691020131111145, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.03908795863389969, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.026962580159306526, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.023535190150141716, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.02234024740755558, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.022049717605113983, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.013492257334291935, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.011443696916103363, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.011327403597533703, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.010435565374791622, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.010180020704865456, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.007020277436822653, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.006960669998079538, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.006638044491410255, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.004452844150364399, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.04924473911523819, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.04924473911523819, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.2147233486175537, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.20057588815689087, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.196017786860466, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.1766044944524765, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.10003584623336792, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.0952121689915657, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.11216872930526733, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.10359864681959152, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.10164020955562592, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.08988173305988312, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.08426062017679214, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.056940216571092606, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04948326200246811, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.04780101403594017, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.0474068783223629, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.02841082587838173, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.024287745356559753, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.02415028214454651, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.022193659096956253, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.021790441125631332, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01462071668356657, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.014288206584751606, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.014034167863428593, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.008803347125649452, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04948326200246811, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.04948326200246811, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.2047433853149414, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.1867513358592987, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.17798468470573425, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.15642771124839783, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.0951361283659935, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.08760096877813339, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.11412739753723145, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.10447289794683456, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.09756170958280563, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.08317098021507263, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.07704663276672363, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05881809815764427, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.05084927752614021, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.046647947281599045, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.045623231679201126, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.029898568987846375, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02573990263044834, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.025306541472673416, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02353718876838684, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.022771654650568962, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.01708984375, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.018271900713443756, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.015885232016444206, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.014365112408995628, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.05084927752614021, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.05084927752614021, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.18514209985733032, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.17426437139511108, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.17081694304943085, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.15528415143489838, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.08676548302173615, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08317548781633377, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.09642710536718369, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.08929072320461273, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.08804064989089966, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.07873960584402084, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07427722215652466, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.0490691140294075, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.042719461023807526, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.041581910103559494, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.041311588138341904, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.024500245228409767, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.021484404802322388, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02140437439084053, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.019868861883878708, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.019581399857997894, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.012888338416814804, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.013123790733516216, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.012518852017819881, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.00887410156428814, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.0490691140294075, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.0490691140294075, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.23440217971801758, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.22057923674583435, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.21627682447433472, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19647245109081268, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10968402028083801, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10513650625944138, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12174199521541595, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11282603442668915, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11128973960876465, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.0995001494884491, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09373809397220612, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06180852651596069, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05382156744599342, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.052397217601537704, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.0520569272339344, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.030843161046504974, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.026675432920455933, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02657761424779892, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.024573002010583878, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.024201249703764915, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015933742746710777, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015678860247135162, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015456093475222588, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009805256500840187, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.0520569272339344, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.0520569272339344, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.29.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.22621802985668182, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.20538313686847687, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.19754153490066528, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.1758357286453247, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.10351023077964783, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.09596666693687439, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.11935947835445404, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.10980619490146637, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.10595569759607315, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.09036828577518463, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.08506570011377335, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06093433499336243, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05282306671142578, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05003592371940613, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04937267303466797, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03066730499267578, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.026853056624531746, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.026637546718120575, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.024403061717748642, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.023995650932192802, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.01697573997080326, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01800377480685711, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.016104884445667267, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.013621674850583076, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05003592371940613, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05003592371940613, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.10490702092647552, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.09795992076396942, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.09542513638734818, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.0860709473490715, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.049036551266908646, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.04656573385000229, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.05550441890954971, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.05116671696305275, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.049867287278175354, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.044026199728250504, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.04139722138643265, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.028203584253787994, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.024471895769238472, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.023517359048128128, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.02328699640929699, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.014108608476817608, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.012101318687200546, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.012021176517009735, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.011077675968408585, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.010863179340958595, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.00738673796877265, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.007365357130765915, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.007071526255458593, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.004846394062042236, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.05116671696305275, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.05116671696305275, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.09140804409980774, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.08522222936153412, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.08261723071336746, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.07438796758651733, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.042380206286907196, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.040028344839811325, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.04834282398223877, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.044791948050260544, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.04311435669660568, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.03801475837826729, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.035660699009895325, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.02449326403439045, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.021402953192591667, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.020294424146413803, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.020022675395011902, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.01224681455641985, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.010370347648859024, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.010259379632771015, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.009473553858697414, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.009236707352101803, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.006361728999763727, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.006264404859393835, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.006008493714034557, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.003946427255868912, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.04834282398223877, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.04834282398223877, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.2218436449766159, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.20714664459228516, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.2024804949760437, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.18230420351028442, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.10342913866043091, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.09850475192070007, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.1160275936126709, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.10707578808069229, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.10517270863056183, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.0927930548787117, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.08706251531839371, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.05888520926237106, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05107852444052696, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.049422938376665115, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.04901793599128723, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.02938166633248329, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.024997148662805557, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.024870436638593674, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.022784989327192307, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.02236323058605194, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.015026443637907505, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.014541003853082657, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.01440401654690504, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.008708961308002472, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05107852444052696, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05107852444052696, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.22544217109680176, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.20520062744617462, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.19788579642772675, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.17504166066646576, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.10405069589614868, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.09682298451662064, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.11966320127248764, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.11028382182121277, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.10656096041202545, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.09055768698453903, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.08433669805526733, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.06142258644104004, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.053239841014146805, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.05042875185608864, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04975055903196335, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.03090118058025837, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02701207622885704, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.0267898291349411, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02444591000676155, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.023869724944233894, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.01694883592426777, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.018043633550405502, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.01607612520456314, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.013567262329161167, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.05042875185608864, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.05042875185608864, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.30.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.18657280504703522, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.17541298270225525, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.17185235023498535, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.15597811341285706, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.087525375187397, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08384088426828384, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.09734091907739639, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09010456502437592, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.08881686627864838, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.07928965985774994, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07476136088371277, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04960549250245094, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04317041486501694, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04200302064418793, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.041724152863025665, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.024789806455373764, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02179793268442154, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02171527035534382, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.020152287557721138, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.019855761900544167, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.013128941878676414, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.013458764180541039, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.01274739857763052, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009271875023841858, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04960549250245094, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04960549250245094, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.23390576243400574, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.21974217891693115, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.21533727645874023, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19537007808685303, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10946647077798843, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10482928156852722, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12167081981897354, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11267772316932678, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11111108213663101, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09911880642175674, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09330841153860092, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06178564578294754, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05377361550927162, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.0523313470184803, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05198143422603607, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.0308383796364069, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.026677316054701805, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02657923847436905, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02455167844891548, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.024174407124519348, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01598401740193367, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015749383717775345, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015497634187340736, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.00994044914841652, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.0523313470184803, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05198143422603607, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.30.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.23030729591846466, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.20909187197685242, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.20142629742622375, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.1790357530117035, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.1054622083902359, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.09793952107429504, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.12144190073013306, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.11148993670940399, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.1079174280166626, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.09201827645301819, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.08649349212646484, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.061961255967617035, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05363547429442406, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05099042132496834, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.05034426599740982, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.031244665384292603, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.027350524440407753, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.027135170996189117, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.024835236370563507, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.024439707398414612, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.017416395246982574, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01829838566482067, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.016604769974946976, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.013843570835888386, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05099042132496834, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05099042132496834, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.11185185611248016, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.10439100116491318, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.10176344960927963, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.09175200760364532, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.05234150588512421, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.049691177904605865, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.05910969525575638, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.05454249680042267, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.05321898311376572, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.04695139452815056, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.044075120240449905, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.030041679739952087, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.026112396270036697, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.02510446310043335, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.02486213482916355, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.015039488673210144, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.012946231290698051, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.012861154042184353, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.011855402030050755, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.011624162085354328, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.007888946682214737, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.007919145748019218, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.007548598572611809, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.005262318998575211, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.05234150588512421, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.05234150588512421, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.09605665504932404, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.08949685841798782, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.08687879890203476, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.07818309217691422, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.04452850669622421, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.04206019267439842, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.05077312886714935, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.04698745161294937, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.04529723525047302, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.03993535786867142, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.03746209666132927, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.025725113227963448, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.022455381229519844, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.021337086334824562, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.021060850471258163, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.012873083353042603, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.010912787169218063, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.01080262754112482, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.009970951825380325, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.009734852239489555, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.006710548885166645, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.006606933660805225, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.006358384620398283, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0042067645117640495, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.05077312886714935, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.05077312886714935, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.2266242802143097, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.2114974558353424, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.20670852065086365, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.18611760437488556, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.10562588274478912, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10056015849113464, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.11839108914136887, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.10935024172067642, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.10740318149328232, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09477841109037399, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.08872857689857483, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.06003979220986366, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05216643586754799, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05046471953392029, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.050058115273714066, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.02994375303387642, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.025528766214847565, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.025392655283212662, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.023252632468938828, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.022832045331597328, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.015320875681936741, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.014835169538855553, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.014692943543195724, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.008853540755808353, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05216643586754799, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05216643586754799, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.23016813397407532, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.2095518857240677, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.20252209901809692, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.178199902176857, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.10586585849523544, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.09891165792942047, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.12154889106750488, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.11194141209125519, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.10881385952234268, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.09256300330162048, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.0855647474527359, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.06258860230445862, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.05427876114845276, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.0515492781996727, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.050896771252155304, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.03153441846370697, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02791423723101616, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.02773873135447502, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.025413990020751953, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.024835124611854553, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.017599793151021004, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.01899576373398304, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.016778964549303055, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.014665089547634125, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.0515492781996727, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.0515492781996727, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.31.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.1929684281349182, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.18128901720046997, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.1775842308998108, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.16107678413391113, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09051068127155304, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08664200454950333, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10071158409118652, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09323528409004211, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09185806661844254, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08192773163318634, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07713471353054047, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05126021057367325, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04465535283088684, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04343084245920181, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.043131303042173386, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.025619298219680786, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.022517843171954155, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.022430991753935814, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.020798876881599426, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.020489243790507317, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.013518543913960457, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.013877065852284431, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013113209046423435, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009524752385914326, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05126021057367325, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05126021057367325, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.23220649361610413, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.21814236044883728, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.2137109339237213, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1937863975763321, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10873054713010788, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10406433790922165, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12083059549331665, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11195091158151627, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11037520319223404, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09838961809873581, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09250947833061218, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06136412173509598, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05343430116772652, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05196770653128624, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05162116885185242, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.030625157058238983, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02648507058620453, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.026385143399238586, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.024355493485927582, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.023977508768439293, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015846025198698044, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01563173159956932, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015355757437646389, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009848516434431076, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05196770653128624, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05196770653128624, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.31.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.23175789415836334, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.21047565340995789, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.20274759829044342, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.17970015108585358, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.10634402185678482, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.09875762462615967, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.12208323180675507, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.1122279167175293, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.10872073471546173, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.09253454953432083, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.086728535592556, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.0623089037835598, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05395643785595894, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.051333412528038025, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.050723180174827576, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03136412426829338, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.027453122660517693, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.027236398309469223, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.024859240278601646, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.024461613968014717, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.017351360991597176, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.018236152827739716, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.01654960587620735, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.013667203485965729, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.051333412528038025, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.051333412528038025, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.32.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.1264035552740097, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.1180279478430748, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.115077443420887, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1037343218922615, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.059133827686309814, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.0562150739133358, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.0665912926197052, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.061585113406181335, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.06014053896069527, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.05311021953821182, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.04981348291039467, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.0338720940053463, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.02947041392326355, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.02836519107222557, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.028101138770580292, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.016934681683778763, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.014619830995798111, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.014526823535561562, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.013395873829722404, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.013141453266143799, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.008859261870384216, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.008926788344979286, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.008484305813908577, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.005926285404711962, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.04981348291039467, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, "best_option": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.04981348291039467, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } } }, { "key": "model.layers.32.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.10356629639863968, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.09649664163589478, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.09373587369918823, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.08431309461593628, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.04801705479621887, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.045415446162223816, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.0545649491250515, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.05051480233669281, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.04882342740893364, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.04304352402687073, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.040341321378946304, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.027654875069856644, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.02415645122528076, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.022994888946413994, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.022721517831087112, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.013827244751155376, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.01176124531775713, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.011649091728031635, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.010752045549452305, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.010507069528102875, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.007177063263952732, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.007108394987881184, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.006808582227677107, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.004530673380941153, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.05051480233669281, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.05051480233669281, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.32.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.2297164499759674, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.21482202410697937, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.2098911851644516, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.18912731111049652, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.10720860213041306, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10215196013450623, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.11996658146381378, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11083327978849411, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.10896197706460953, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09631131589412689, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.09018741548061371, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.060829438269138336, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.052875254303216934, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05118537321686745, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.050782591104507446, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.030336685478687286, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.02586822584271431, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.02574782818555832, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.023581262677907944, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.023162100464105606, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.015503283590078354, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.01497622299939394, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.014889214187860489, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.00891769491136074, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05118537321686745, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05118537321686745, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.32.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.23691017925739288, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.2142287939786911, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.20623870193958282, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.18025434017181396, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.10870692133903503, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.1010298803448677, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.12694871425628662, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.11612801253795624, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.11219241470098495, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.09437181055545807, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.08764258027076721, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.06554760038852692, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.056549035012722015, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.05321492254734039, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.052419546991586685, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.03321642428636551, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.029295869171619415, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.029058940708637238, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02665998786687851, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.026002176105976105, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.018902074545621872, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.020532071590423584, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.017901381477713585, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.01628883183002472, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.03321642428636551, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.03321642428636551, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.32.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.20138423144817352, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.18918606638908386, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.1852678805589676, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.16799652576446533, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09459978342056274, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09050972014665604, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10533508658409119, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09742564707994461, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09601354598999023, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08555283397436142, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08062954246997833, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05370679125189781, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0467132031917572, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04543481767177582, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.045136384665966034, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02690793387591839, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.023637555539608, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.023548074066638947, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.021838190034031868, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02151963859796524, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01434389315545559, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.014676627703011036, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.01393558457493782, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.01020722184330225, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0467132031917572, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0467132031917572, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.32.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.23972438275814056, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.22510521113872528, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.22054032981395721, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19989724457263947, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.11227564513683319, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10741870105266571, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12499278038740158, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11559652537107468, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11396121978759766, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.10150819271802902, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09561389684677124, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06349330395460129, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05521465837955475, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05370648577809334, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.0533507838845253, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03174418583512306, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02745489589869976, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.027353860437870026, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.025251781567931175, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02485976368188858, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.016541894525289536, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.016322242096066475, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.01604592241346836, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.01044289581477642, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03174418583512306, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03174418583512306, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.32.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.23796682059764862, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.21596944332122803, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.20804738998413086, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.1844709813594818, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.10911484062671661, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.10130692273378372, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.1253986358642578, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.11522766202688217, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.11163431406021118, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.0949825718998909, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.08903609216213226, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06392231583595276, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.055256281048059464, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.052567195147275925, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.05191430449485779, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03209880366921425, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.02786809392273426, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.027654029428958893, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.02518005669116974, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.024780578911304474, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.01757792942225933, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01822987012565136, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.016744127497076988, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.013382919132709503, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.05191430449485779, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.05191430449485779, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.33.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.12191952019929886, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.11395987123250961, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.11102679371833801, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1001310721039772, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.057173628360033035, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.054292384535074234, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.06456384062767029, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.059722039848566055, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.05813578516244888, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.051359523087739944, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.04821907356381416, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.032884061336517334, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.028607375919818878, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.02744062803685665, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.027159662917256355, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.016446810215711594, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.014157131314277649, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.014055718667805195, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.012978541664779186, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.012719537131488323, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.008624332025647163, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.008680143393576145, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.008242185227572918, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.005778057500720024, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.051359523087739944, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.051359523087739944, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.33.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.10035642981529236, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.09360421448945999, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.09080930054187775, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.08174729347229004, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.046723492443561554, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.04416974261403084, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.05322019010782242, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.049334749579429626, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.04755254089832306, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.041967712342739105, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.03933987393975258, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.027003508061170578, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.02359544113278389, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.022395387291908264, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.02211693674325943, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.013507899828255177, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.011483828537166119, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.011363651603460312, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.010500210337340832, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.010248351842164993, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.007026432082056999, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.006984476465731859, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.006659327540546656, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.004492375534027815, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.049334749579429626, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.049334749579429626, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.33.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.23678655922412872, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.22096781432628632, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.21589288115501404, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.1943434625864029, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.11042864620685577, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10516874492168427, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.12384800612926483, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11429000645875931, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.11231384426355362, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09900444746017456, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.09278061240911484, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.06289482116699219, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05449667572975159, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05278044193983078, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.052369918674230576, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03137503191828728, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.026712754741311073, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.026574723422527313, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.024330513551831245, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.023885289207100868, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01605958305299282, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.015518167056143284, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.015387339517474174, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.009315124712884426, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.052369918674230576, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.052369918674230576, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.33.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.2258114218711853, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.20458903908729553, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.19651493430137634, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.16993951797485352, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.10381326079368591, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.09627152979373932, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.12144837528467178, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.11147648841142654, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.10684888064861298, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.08993532508611679, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.08180441707372665, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.06246696785092354, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.05403686687350273, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.05053797736763954, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.0496966652572155, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.03156447038054466, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.027425266802310944, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.027133379131555557, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.024830691516399384, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.02413364313542843, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.01747606135904789, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.018825776875019073, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.01632160320878029, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.014484098181128502, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.05053797736763954, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.05053797736763954, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.33.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.20288680493831635, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.19060708582401276, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.1866966336965561, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1692626178264618, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09532923251390457, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09122899174690247, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10619347542524338, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09822846204042435, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09677214920520782, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08625586330890656, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08126217871904373, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05412881076335907, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04708143696188927, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.045779578387737274, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04547109454870224, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.027084268629550934, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02378065511584282, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.023688899353146553, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.021961316466331482, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02163678966462612, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.014380630105733871, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.014716467820107937, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013957303948700428, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.010168586857616901, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04708143696188927, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04708143696188927, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.33.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.2387281358242035, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2242574393749237, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.2196892350912094, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19910618662834167, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.11194220930337906, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.1071448028087616, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12445402890443802, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11528672277927399, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11363495141267776, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.10124807804822922, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09519536793231964, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06328175216913223, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05504344403743744, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05352754145860672, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05317305028438568, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03157028928399086, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.027286479249596596, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.027181532233953476, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.025079622864723206, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.024690311402082443, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.016335967928171158, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01610192470252514, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015834558755159378, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.01014113798737526, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03157028928399086, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03157028928399086, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.33.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.2373579889535904, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.21497474610805511, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.2067292332649231, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.1828385889530182, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.108889140188694, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.10081890225410461, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.1256559044122696, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.1153959184885025, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.11149094253778458, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.09445127844810486, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.0884285643696785, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.0641036406159401, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.055384162813425064, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05253015458583832, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.051832590252161026, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03220752626657486, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.027928000316023827, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.027691571041941643, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.025191882625222206, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.024766622111201286, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.01767481304705143, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.018390290439128876, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.016788454726338387, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.013582616113126278, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.051832590252161026, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.051832590252161026, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.34.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.12728717923164368, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.11889682710170746, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.11589276045560837, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.10446009784936905, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.05960492789745331, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.05663925036787987, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.06718525290489197, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.06219885125756264, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.06059814617037773, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.0535370372235775, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05019444227218628, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03417225927114487, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.02974766492843628, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.028579888865351677, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.028304291889071465, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.01708240993320942, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.014701914973556995, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.01459992490708828, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.013464763760566711, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.01319985929876566, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.00891862716525793, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.008943215943872929, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.008532905019819736, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.005876320414245129, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05019444227218628, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, "best_option": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05019444227218628, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } } }, { "key": "model.layers.34.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.1039757952094078, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.09700198471546173, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.09427684545516968, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.08486111462116241, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.048313938081264496, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.04576671123504639, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.05490562692284584, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.050766535103321075, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.04917077347636223, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.04333646595478058, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.04063406586647034, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.02780900150537491, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.024254485964775085, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.02313927561044693, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.02287469245493412, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.013903189450502396, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.011829594150185585, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.011723512783646584, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.010801314376294613, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.010559828020632267, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.007210410200059414, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.007123416289687157, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.0068541020154953, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0045110126957297325, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.050766535103321075, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.050766535103321075, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.34.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.24064163863658905, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.22460652887821198, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.21942934393882751, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.19746918976306915, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.11221565306186676, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10687928646802902, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.12570621073246002, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11608926951885223, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.11412303894758224, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.10066988319158554, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.09424085915088654, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.06381712853908539, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05537159740924835, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05360487103462219, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.05320213362574577, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03184718266129494, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.027152081951498985, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.027021069079637527, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.024738024920225143, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.024305950850248337, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.016300102695822716, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.015802348032593727, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.015655653551220894, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.009529448114335537, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03184718266129494, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03184718266129494, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.34.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.21171483397483826, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.19000764191150665, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1799771934747696, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1560133993625641, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.09749056398868561, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.08874738216400146, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.11775634437799454, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.10804684460163116, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.10056383907794952, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.08363381773233414, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.076560840010643, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.06071165204048157, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.05272901430726051, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04784320294857025, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04666295275092125, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.031103065237402916, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.026616312563419342, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.026140520349144936, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02411775477230549, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.02323240227997303, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.017885960638523102, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.019177181646227837, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.016434505581855774, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.015203641727566719, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04784320294857025, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04784320294857025, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.34.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.20034612715244293, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.18838056921958923, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.18450988829135895, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.16741438210010529, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09425734728574753, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09022575616836548, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.1048676073551178, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.0971205085515976, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.0956505611538887, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.085334412753582, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08035934716463089, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.053453389555215836, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04656988009810448, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04528188332915306, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04497396573424339, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02671695314347744, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.023542821407318115, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.023457275703549385, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.021760549396276474, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.021438274532556534, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.014120109379291534, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.014600363560020924, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013698747381567955, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.01012498140335083, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04656988009810448, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04656988009810448, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.34.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.23164305090904236, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2177741974592209, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.2133256494998932, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1934538036584854, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10871154069900513, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10403432697057724, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12080475687980652, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11195003241300583, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11030702292919159, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09835484623908997, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09247956424951553, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06138189136981964, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05345134064555168, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.0519741028547287, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.0516214519739151, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.030629059299826622, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02649468369781971, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.026392988860607147, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02436932362616062, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.023987989872694016, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015831174328923225, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015647631138563156, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015335018746554852, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009868280030786991, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.0519741028547287, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.0519741028547287, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.34.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.2331145852804184, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.21120759844779968, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.20305849611759186, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.17939278483390808, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.10698409378528595, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.0990109071135521, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.12345974147319794, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.11342242360115051, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.10950427502393723, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.09270500391721725, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.08663962036371231, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06291592866182327, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.054368384182453156, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.051504574716091156, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.05082711949944496, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03160597011446953, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.027261067181825638, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.027019303292036057, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.024536747485399246, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.024109002202749252, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.01725117117166519, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.017786333337426186, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.01636647991836071, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.01295508537441492, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.051504574716091156, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.051504574716091156, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.35.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.12448859959840775, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.11639396101236343, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.11348439007997513, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.10231919586658478, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.05831150338053703, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.05542249232530594, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.06565172225236893, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.06077659875154495, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.059274666011333466, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.05241640284657478, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.04918445274233818, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.033403970301151276, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.02909526601433754, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.02796865440905094, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.02769825980067253, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.016699479892849922, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.014384469017386436, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.014286288060247898, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.013180533424019814, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.012922383844852448, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.008709625340998173, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.008745950646698475, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.008330744691193104, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.005738100968301296, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.04918445274233818, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, "best_option": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.04918445274233818, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } } }, { "key": "model.layers.35.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.10224935412406921, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.09536494314670563, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.09247968345880508, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.08316444605588913, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.04747488722205162, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.044883474707603455, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.05404505878686905, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.050058700144290924, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.0482565313577652, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.04256080463528633, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.03989825397729874, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.027369344606995583, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.023933080956339836, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.022762024775147438, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.022472906857728958, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.01369890570640564, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.011700503528118134, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.011586138978600502, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.010700861923396587, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.010454042814671993, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.0071473633870482445, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.007155701518058777, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.006776085589081049, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.004675808362662792, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.050058700144290924, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.050058700144290924, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.35.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.23458616435527802, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.2190786749124527, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.21401357650756836, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.19276581704616547, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.1095409244298935, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10432905703783035, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.12288273870944977, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11340480297803879, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.11140263080596924, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.098270483314991, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.09216523915529251, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.062298864126205444, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.054110582917928696, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.052349407225847244, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.05194694548845291, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.031114334240555763, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.026471085846424103, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.02634553238749504, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.024137644097208977, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.023699786514043808, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01591351628303528, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.015364614315330982, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.015266750007867813, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.009168016724288464, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.052349407225847244, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.05194694548845291, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.35.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.21642787754535675, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.1932191550731659, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.18337972462177277, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1611606478691101, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.09840915352106094, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.08978955447673798, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.11915400624275208, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.10917370766401291, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.10221149027347565, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.08520065248012543, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.07928895205259323, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.0609864667057991, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.052900996059179306, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04807833582162857, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04687705263495445, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.030932944267988205, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.026329314336180687, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.02591017633676529, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.023889873176813126, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.02303016372025013, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.01733611896634102, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.018554383888840675, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.015849897637963295, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.014355731196701527, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04807833582162857, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04807833582162857, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.35.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.19893178343772888, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.18714183568954468, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.18327830731868744, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.16633255779743195, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.0936364084482193, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.0896567553281784, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10417426377534866, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.0964890867471695, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09500966221094131, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08479828387498856, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07988430559635162, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05312591791152954, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04628913104534149, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.0450039766728878, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.044700466096401215, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02656741440296173, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02341855876147747, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02332954853773117, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.021658286452293396, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.021341398358345032, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.014069078490138054, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.014556941576302052, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013649503700435162, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.010132385417819023, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04628913104534149, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04628913104534149, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.35.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.2242049127817154, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.21086058020591736, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.20657652616500854, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.18744660913944244, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10526000708341599, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10075574368238449, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11702600121498108, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.10842093825340271, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10681246221065521, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.095306895673275, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0896206870675087, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.059466585516929626, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.051771607249975204, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.050333019345998764, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.049994416534900665, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02968425303697586, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.025662872940301895, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.025560036301612854, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02361304685473442, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02324213832616806, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015359197743237019, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015153218992054462, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.014873328618705273, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009547902271151543, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.051771607249975204, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.051771607249975204, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.35.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.22852309048175812, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.2061590999364853, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.19767941534519196, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.174435093998909, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.1046595424413681, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.09648363292217255, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.12170132249593735, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.11157502979040146, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.10731084644794464, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.09044796973466873, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.0845789909362793, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06201520189642906, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.053531941026449203, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05047585815191269, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04974298179149628, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.031237062066793442, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.02685456909239292, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.026583828032016754, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.024162674322724342, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.023691637441515923, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.017205605283379555, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.017730049788951874, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.016250930726528168, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.013040225952863693, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05047585815191269, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05047585815191269, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.36.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.14076487720012665, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.13169923424720764, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.12845760583877563, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.11607953906059265, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.06591467559337616, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06271872669458389, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.07417083531618118, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.06862713396549225, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.06697814911603928, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.05936267599463463, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05573221296072006, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03770511969923973, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.032826077193021774, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03161301836371422, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03131614997982979, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.018838943913578987, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.01626632548868656, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.01616325043141842, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.01493130810558796, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.014650443568825722, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.009826034307479858, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.009892052039504051, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.009415803477168083, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.006520864088088274, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03770511969923973, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03770511969923973, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.36.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.103662870824337, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.09649718552827835, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.0936000719666481, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.08423946797847748, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.0480157807469368, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.04534832760691643, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.054825522005558014, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.05074886977672577, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.04885907471179962, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.043040789663791656, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.04039324074983597, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.0277685709297657, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.02424521930515766, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.02300785854458809, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.022697359323501587, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.01387921255081892, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.01179096382111311, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.011675634421408176, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.010781863704323769, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.010521645657718182, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.007220137864351273, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.007178559899330139, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.006822530645877123, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.004617013037204742, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.05074886977672577, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.05074886977672577, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.36.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.2357032150030136, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.22009854018688202, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.2149759829044342, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.19388915598392487, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.1100921779870987, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10483536869287491, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.12367191165685654, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11408780515193939, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.11190129816532135, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09888489544391632, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.09283668547868729, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.0627388283610344, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.054407596588134766, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05261237546801567, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.0521879680454731, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03129199519753456, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.026623155921697617, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.026485908776521683, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.024297045543789864, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.023848600685596466, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.016012120991945267, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.015505990944802761, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.015344099141657352, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.009311119094491005, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.0521879680454731, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.0521879680454731, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.36.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.21106775104999542, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.19280534982681274, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.18709121644496918, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.162597194314003, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.09745164960622787, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.0915336087346077, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.11029846966266632, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.10139892250299454, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.09959639608860016, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.08432850241661072, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.07718255370855331, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05667169764637947, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.048926033079624176, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04712238907814026, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04670584574341774, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.028431789949536324, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.025095757097005844, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.024989115074276924, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02258279174566269, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.022142739966511726, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.015584232285618782, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.016453370451927185, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.015032049268484116, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.012294730171561241, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.048926033079624176, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.048926033079624176, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.36.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.19933217763900757, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.18750527501106262, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.18372318148612976, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.16681095957756042, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09375398606061935, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08980098366737366, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10423822700977325, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09655983746051788, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.0951421707868576, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08494239300489426, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07996928691864014, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05305211991071701, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.046216290444135666, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04495168849825859, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04465458542108536, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.0264881681650877, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.023199280723929405, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.023112691938877106, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.021417973563075066, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.021093888208270073, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.013875126838684082, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.014130838215351105, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.01345886755734682, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009496008045971394, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.046216290444135666, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.046216290444135666, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.36.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.23087354004383087, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.21713311970233917, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.2128147929906845, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1932191550731659, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10841942578554153, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10382892936468124, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12040098011493683, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.1116100400686264, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11001583188772202, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.0982053130865097, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09239746630191803, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06117347627878189, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05329369753599167, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05184637010097504, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.051502618938684464, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.030531030148267746, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02642536163330078, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.026327112689614296, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.024320971220731735, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02394888550043106, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015782620757818222, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015589232556521893, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015298157930374146, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009816315025091171, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05184637010097504, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05184637010097504, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.36.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.2279902994632721, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.2049528956413269, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.19615237414836884, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.17277681827545166, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.10424228757619858, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.09570833295583725, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.12139251828193665, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.11146123707294464, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.1070561408996582, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08967751264572144, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.08380308747291565, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06175444647669792, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05335431173443794, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05013357102870941, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04935650900006294, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.030990110710263252, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.026386968791484833, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.026110433042049408, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.023576941341161728, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.02308371104300022, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.016805298626422882, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01709633693099022, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.015792829915881157, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.012182964943349361, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05013357102870941, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05013357102870941, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.37.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.1272200345993042, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.11919252574443817, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.11621011793613434, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.10506618767976761, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.05966212600469589, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.0567377507686615, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.06730560213327408, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.06226988136768341, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.06060998514294624, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.05376436933875084, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05057916045188904, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.034235384315252304, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.029798423871397972, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.02862086333334446, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.028333593159914017, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.0171256922185421, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.014731614850461483, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.01462856400758028, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.013530576601624489, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.013263916596770287, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.008949761278927326, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.008977033197879791, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.008561111986637115, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.005914097186177969, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05057916045188904, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, "best_option": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05057916045188904, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } } }, { "key": "model.layers.37.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.09659624844789505, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.09012565016746521, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.08744773268699646, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.07879503816366196, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.04485167935490608, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.04239378124475479, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.05125110223889351, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.047435883432626724, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.04561098292469978, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.04033050686120987, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.037895724177360535, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.02594617009162903, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.022662753239274025, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.02147674188017845, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.021196532994508743, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.01296937931329012, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.01099754124879837, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.010875092819333076, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.010074139572679996, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.00982511043548584, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.006758811883628368, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.006685849279165268, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.006387651897966862, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0042703840881586075, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.05125110223889351, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.05125110223889351, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.37.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.2370445877313614, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.22115309536457062, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.21578511595726013, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.1944001168012619, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.11077596247196198, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10529295355081558, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.12448885291814804, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11500785499811172, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.11268426477909088, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09937022626399994, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.09314565360546112, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.0632638931274414, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05492325872182846, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05296860262751579, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.052513737231492996, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03160407394170761, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.02694486267864704, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.026783352717757225, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.024574626237154007, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.02411487326025963, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.016243157908320427, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.015884166583418846, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.015548616647720337, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.00983082503080368, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03160407394170761, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03160407394170761, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.37.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.2184346616268158, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.1960901916027069, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.18777191638946533, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1640775501728058, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.10048376023769379, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.09238412231206894, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.11763733625411987, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.10794112086296082, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.10360466688871384, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.0865430161356926, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.07906319946050644, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.06041684001684189, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.052182652056217194, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04878547787666321, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04795828461647034, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.030372746288776398, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.026174942031502724, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.025899143889546394, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.023520024493336678, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.022844428196549416, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.016522176563739777, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.01761600375175476, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.015506012365221977, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.013196096755564213, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.052182652056217194, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.052182652056217194, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.37.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.19316092133522034, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.1817532628774643, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.17809578776359558, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1617061346769333, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09089772403240204, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08707224577665329, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10103768855333328, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.0936245545744896, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09224474430084229, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08240324258804321, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07760605961084366, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.051467202603816986, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04485391080379486, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04362503066658974, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04333419352769852, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.025729719549417496, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02257521077990532, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.022488582879304886, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.020859770476818085, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02055215649306774, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.013540256768465042, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.013849010691046715, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013134087435901165, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009424450807273388, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.051467202603816986, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.051467202603816986, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.37.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.22373361885547638, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2105402797460556, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.2063780575990677, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1873546987771988, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10507059842348099, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.1006525382399559, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11667634546756744, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.1081584244966507, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10662131756544113, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.0952441617846489, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08961715549230576, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05932588502764702, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05166280269622803, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05026177689433098, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04993259534239769, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02959887497127056, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02563578449189663, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02553926222026348, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.023603716865181923, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.023240281268954277, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015310454182326794, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015148471109569073, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.014839245937764645, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.00957211758941412, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05166280269622803, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05166280269622803, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.37.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.21789754927158356, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.19532442092895508, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.18647095561027527, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.16385528445243835, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.09970340877771378, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.0912138894200325, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.11694277077913284, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.10723448544740677, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.1024899035692215, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08553706109523773, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.07986335456371307, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.05963696539402008, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05146263539791107, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.048097163438797, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.0473012700676918, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03006451204419136, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.025569647550582886, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.025281568989157677, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.0228817667812109, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.02236470952630043, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.01659715175628662, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01692800596356392, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.015558759681880474, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.0123823843896389, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05146263539791107, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05146263539791107, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.38.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.13982784748077393, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.13110218942165375, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1279553920030594, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1158573180437088, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.06558526307344437, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.062488704919815063, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.07370138168334961, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.0681622251868248, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.06661804020404816, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.05919421836733818, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05570792406797409, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03749070316553116, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.032639279961586, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03143567219376564, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.031160902231931686, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.01874971017241478, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.016176097095012665, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.01607174426317215, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.014873773790895939, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.014596326276659966, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.009794068522751331, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.009830095805227757, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.009395834058523178, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.006470630876719952, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03749070316553116, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03749070316553116, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.38.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.10377243161201477, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.09685146063566208, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.09391862899065018, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.08464327454566956, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.04816366732120514, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.045519184321165085, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.05515759065747261, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.0509575754404068, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.04899052903056145, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.0432756170630455, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.04067053273320198, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.02793167531490326, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.024350324645638466, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.02307075634598732, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.02275892160832882, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.013961110264062881, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.01181962713599205, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.011695446446537971, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.010820860043168068, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.010550887323915958, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.0072726598009467125, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.007187788374722004, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.0068698157556355, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.004595422185957432, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.0509575754404068, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.0509575754404068, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.38.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.25322121381759644, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.23628142476081848, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.23083335161209106, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.20787057280540466, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.11828352510929108, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.11257139593362808, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.13268494606018066, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.12260246276855469, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.12031982094049454, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.1060737818479538, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.09953288733959198, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.0673055350780487, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.0585373155772686, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.0565568245947361, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.05606164038181305, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03360215574502945, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.028581690043210983, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.02841399982571602, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.02602520026266575, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.025534749031066895, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.017173269763588905, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.0165693461894989, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.016448913142085075, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.009843913838267326, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03360215574502945, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03360215574502945, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.38.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.21579590439796448, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.18999379873275757, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.18192853033542633, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1577061116695404, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.09873370081186295, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.09022818505764008, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.11304892599582672, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.1039857417345047, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.10160095989704132, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.08177421987056732, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.07582247257232666, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05802566558122635, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.050240784883499146, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.047872114926576614, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.047304630279541016, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.029193472117185593, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.025696443393826485, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.025559566915035248, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.022498469799757004, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.021968934684991837, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.01614842377603054, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.017194878309965134, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.015437385067343712, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.013044307008385658, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.050240784883499146, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.050240784883499146, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.38.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.19697169959545135, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.18541313707828522, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.18171662092208862, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.16517460346221924, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09266895800828934, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08880074322223663, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10300039499998093, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.0953865796327591, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09402257204055786, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08404813706874847, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07931279391050339, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05241304636001587, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04567217826843262, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04445716366171837, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.044165946543216705, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.026217171922326088, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02297636866569519, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02289525978267193, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02123122476041317, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.020923199132084846, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01380164735019207, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.014039086177945137, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013404769822955132, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009501607157289982, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04567217826843262, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04567217826843262, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.38.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.23050181567668915, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.21700289845466614, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.21273523569107056, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19329458475112915, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.1082976907491684, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10378066450357437, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12027058750391006, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11140377074480057, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10987347364425659, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09822133183479309, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09250161051750183, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06113351508975029, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05323849618434906, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.051832620054483414, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.051501963287591934, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.030628105625510216, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.026479223743081093, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.026386793702840805, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.024408303201198578, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02404186502099037, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.016103744506835938, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015707407146692276, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.01564684696495533, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.010029015131294727, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.051832620054483414, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.051832620054483414, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.38.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.22243231534957886, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.1985476016998291, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.1889915019273758, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.16637645661830902, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.10138847678899765, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.09232471138238907, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.11928980052471161, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.1095881462097168, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.10436734557151794, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08673319220542908, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.08108509331941605, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06062505394220352, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05242135375738144, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.048788540065288544, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04790131747722626, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03046446107327938, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.02570173889398575, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.025383360683918, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.022885184735059738, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.022326499223709106, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.01652856357395649, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.016767485067248344, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.015382700599730015, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.011927029117941856, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.048788540065288544, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.048788540065288544, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.39.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.12388405948877335, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.11629049479961395, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.11333439499139786, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.10268294811248779, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.05811072513461113, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.055300191044807434, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.06568591296672821, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.06069401651620865, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.0590096116065979, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.052496012300252914, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.049499742686748505, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03338909521698952, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.029043028131127357, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.027861598879098892, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.027574513107538223, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.016723960638046265, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.014321842230856419, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.014212223701179028, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.013182379305362701, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.012919789180159569, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.008752822875976562, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.00871228240430355, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.008362245745956898, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.005702389404177666, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.049499742686748505, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, "best_option": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.049499742686748505, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } } }, { "key": "model.layers.39.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.1083332821726799, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.10139408707618713, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.09858883172273636, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.08909336477518082, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.050489116460084915, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.04787144437432289, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.05729756876826286, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.05307036265730858, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.05127759277820587, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.04547746106982231, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.04279998317360878, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.029037773609161377, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.02535073086619377, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.02415291778743267, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.023862475529313087, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.014502239413559437, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.012347332201898098, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.012229565531015396, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.011324089020490646, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.01106941793113947, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.007534404750913382, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.0074387891218066216, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.0071554300375282764, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.004711783025413752, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.05127759277820587, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.05127759277820587, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.39.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.24404042959213257, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.22889672219753265, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.22396378219127655, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.20259550213813782, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.11448357254266739, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.1092381402850151, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.12801432609558105, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11817245185375214, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.11622567474842072, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.10316063463687897, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.0970519557595253, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.06493157148361206, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05640898644924164, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05468108132481575, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.05425164848566055, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03239269554615021, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.027666805312037468, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.027530686929821968, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.025314515456557274, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.024864010512828827, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01657775603234768, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.01604962907731533, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.01593446545302868, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.009589123539626598, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03239269554615021, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03239269554615021, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.39.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.1933901607990265, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.17041441798210144, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.16311034560203552, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.14018015563488007, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.08833573758602142, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.08064665645360947, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.10202296078205109, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.09407179057598114, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.09171327948570251, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.07366223633289337, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06747590005397797, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05246533453464508, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0455176904797554, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04290357977151871, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04226658493280411, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.026375293731689453, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.023053638637065887, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.022908074781298637, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.020267389714717865, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.019718073308467865, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.0144393565133214, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.015518439002335072, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.013632775284349918, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.011733600869774818, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0455176904797554, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0455176904797554, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.39.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.1951560378074646, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.1838095784187317, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.18020515143871307, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.16393151879310608, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09166385233402252, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08786334842443466, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10198597609996796, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09432007372379303, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09299968183040619, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08316949754953384, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07851731032133102, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05174078419804573, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.045128609985113144, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.043938055634498596, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04365012049674988, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.025919143110513687, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.022647812962532043, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.022564349696040154, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.020924460142850876, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02061707153916359, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01367292832583189, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.013741585426032543, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013290361501276493, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009182746522128582, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05174078419804573, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05174078419804573, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.39.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.2282012403011322, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2149014174938202, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.21072809398174286, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19164061546325684, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10716599225997925, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10273776203393936, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11898521333932877, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11020343750715256, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10872113704681396, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09727010875940323, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09166280925273895, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.0604778416454792, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05265038087964058, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05126677080988884, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.050933610647916794, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.030232401564717293, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.026170849800109863, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.026071911677718163, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.024128735065460205, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.023765316233038902, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015754323452711105, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015489145182073116, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015299906954169273, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009848570451140404, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05126677080988884, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05126677080988884, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.39.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.21467718482017517, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.19151900708675385, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.1821150928735733, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.15982669591903687, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.09795486927032471, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.08909006416797638, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.11559081822633743, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.10615670680999756, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.1008516475558281, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08362171053886414, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.078126460313797, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.05879530310630798, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.050805892795324326, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.04715205356478691, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04626152291893959, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.029524531215429306, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.02486012503504753, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.02453022263944149, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.022103959694504738, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.021533949300646782, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.0159564521163702, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01626080833375454, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.014787555672228336, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.011568120680749416, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.050805892795324326, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.050805892795324326, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.40.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.12858366966247559, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.12071909010410309, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.11772226542234421, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.106722392141819, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.06040149927139282, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.05748973786830902, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.06818927824497223, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.063045933842659, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.0613267756998539, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.0545908585190773, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05147871375083923, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03469526022672653, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.030198577791452408, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.028969531878829002, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.028679579496383667, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.017356403172016144, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.014922680333256721, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.014808818697929382, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.013740858063101768, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.013468689285218716, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.009079265408217907, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.009114273823797703, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.00867753941565752, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.006020853295922279, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05147871375083923, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, "best_option": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05147871375083923, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } } }, { "key": "model.layers.40.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.10577227175235748, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.09899762272834778, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.09616148471832275, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.08683586120605469, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.04933543875813484, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.04677842557430267, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.05625537037849426, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.05205375328660011, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.050126004964113235, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.04446551576256752, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.04185235872864723, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.02851172164082527, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.024890489876270294, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.02367866411805153, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.02339049056172371, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.014271872118115425, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.012173510156571865, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.012051453813910484, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.011183349415659904, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.01092658843845129, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.007477482780814171, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.007453490048646927, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.00709997583180666, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.004850386176258326, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.05205375328660011, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.05205375328660011, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.40.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.2373310774564743, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.22221198678016663, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.21720552444458008, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.1962989866733551, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.11092754453420639, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10578631609678268, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.12426075339317322, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11478094011545181, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.11272109299898148, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09993524104356766, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.09385056793689728, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.0630369782447815, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05479450523853302, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05297790840268135, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.05254925787448883, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03145872429013252, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.026777654886245728, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.02664307877421379, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.024476172402501106, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.024034373462200165, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01608688198029995, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.015542455948889256, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.015438441187143326, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.009242117404937744, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03145872429013252, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03145872429013252, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.40.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.20412734150886536, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.1785130500793457, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.16617929935455322, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.13946662843227386, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.09371385723352432, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.08313871920108795, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.11507675796747208, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.10604708641767502, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.09679023176431656, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.07670465111732483, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06976993381977081, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05920613929629326, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0514998659491539, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04597746208310127, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04461279883980751, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.02992165833711624, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.025500746443867683, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.024916542693972588, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.022441817447543144, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.02139226719737053, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.016762886196374893, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.018431855365633965, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.015116211958229542, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.014439520426094532, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0514998659491539, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0514998659491539, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.40.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.19572590291500092, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.18433474004268646, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.1807282567024231, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.16438323259353638, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09201975911855698, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08822218328714371, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10236304998397827, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09463919699192047, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09333938360214233, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.0835374966263771, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07890856266021729, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.052111007273197174, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04530024528503418, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04411787539720535, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04383048042654991, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.026145024225115776, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02274463139474392, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.022661948576569557, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.021027039736509323, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.020724570378661156, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01388284470885992, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.013809515163302422, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013504167087376118, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009247876703739166, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.052111007273197174, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.052111007273197174, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.40.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.22856932878494263, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.21534161269664764, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.21118992567062378, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1921706348657608, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10739165544509888, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10299372673034668, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11919347941875458, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11041034758090973, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10890551656484604, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09751284122467041, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09205266088247299, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.060679059475660324, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.052769288420677185, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.051400087773799896, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.051073405891656876, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.030348217114806175, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02630568854510784, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.026215054094791412, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02427913434803486, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.023928126320242882, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015856850892305374, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015662651509046555, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015408806502819061, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.010096626356244087, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.051400087773799896, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.051400087773799896, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.40.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.2251739203929901, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.20095647871494293, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.19136761128902435, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.16802477836608887, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.10259795188903809, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.09344179928302765, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.12059193104505539, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.11071480065584183, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.10556892305612564, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.0876680240035057, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.08178394287824631, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.061285894364118576, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05291546881198883, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.049288034439086914, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.048406414687633514, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.030846524983644485, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.025806356221437454, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.02548525109887123, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.0229034461081028, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.022340118885040283, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.016773318871855736, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.016624970361590385, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.015628544613718987, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.011587798595428467, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.049288034439086914, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.049288034439086914, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.41.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.12661492824554443, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.11880096793174744, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1158912181854248, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.10500476509332657, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.05931420996785164, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.05647490173578262, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.0670451819896698, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.06182495877146721, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.06023328751325607, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.053611863404512405, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05061883479356766, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.034111138433218, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.02963828295469284, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.028443915769457817, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.02815997041761875, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.0170978344976902, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.014643676578998566, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.014535374939441681, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.013489311560988426, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.013226532377302647, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.00897904857993126, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.008930666372179985, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.008598089218139648, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.005890991073101759, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05061883479356766, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, "best_option": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05061883479356766, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } } }, { "key": "model.layers.41.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.11062785238027573, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.10354733467102051, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.10065652430057526, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.090936578810215, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.05150309577584267, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.048829421401023865, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.0586385577917099, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.054229553788900375, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.05237925797700882, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.046418726444244385, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.043671753257513046, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.02969125472009182, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.025897551327943802, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.024638747796416283, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.024358589202165604, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.014832607470452785, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.012601496651768684, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.012482506223022938, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.011555695906281471, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.011298357509076595, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.007712236139923334, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.007601067889481783, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.007313684094697237, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.004815506283193827, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.05237925797700882, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.05237925797700882, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.41.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.243515744805336, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.22820566594600677, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.22313448786735535, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.20183885097503662, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.1138891726732254, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10869572311639786, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.1273781657218933, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11774466186761856, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.11573868244886398, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.10262732207775116, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.09636175632476807, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.06462367624044418, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05618104338645935, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05439675971865654, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.053979262709617615, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03223426267504692, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.02761995978653431, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.02748473547399044, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.025285745039582253, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.024835407733917236, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.016560515388846397, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.01618630439043045, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.015931956470012665, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.009905008599162102, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03223426267504692, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03223426267504692, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.41.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.21726273000240326, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.18816792964935303, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.17766551673412323, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.14821839332580566, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.09964506328105927, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.08909274637699127, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.1180957481265068, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.10798240453004837, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.1031135618686676, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.08081698417663574, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.07303518056869507, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.06088852137327194, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.052374836057424545, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.048586465418338776, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04767020419239998, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.030669335275888443, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02641095407307148, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.02611391991376877, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02293924055993557, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.022143354639410973, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.017015136778354645, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.01823895424604416, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.015886832028627396, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.014034854248166084, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.052374836057424545, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.052374836057424545, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.41.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.19437788426876068, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.1831716001033783, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.17963695526123047, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.16346409916877747, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09131008386611938, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08758728206157684, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10135739296674728, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09391901642084122, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09263293445110321, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08294624090194702, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07825511693954468, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05151524022221565, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04488147422671318, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04371067136526108, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.0434318482875824, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.025735970586538315, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.022422071546316147, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.022340187802910805, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.020711559802293777, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02040967531502247, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01343371719121933, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01344025507569313, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013046897016465664, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.008773854933679104, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05151524022221565, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05151524022221565, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.41.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.22638382017612457, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2133757621049881, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.20928175747394562, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19043831527233124, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10630834102630615, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10195447504520416, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11792981624603271, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.10928016901016235, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.1078042984008789, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.0965539738535881, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09110820293426514, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.059987686574459076, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05218687281012535, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05083828791975975, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.050517935305833817, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02995416149497032, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.025962872430682182, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.025869136676192284, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02396349050104618, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02361172065138817, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015584704466164112, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015385673381388187, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015137413516640663, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009811307303607464, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05218687281012535, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05218687281012535, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.41.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.22114358842372894, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.19683097302913666, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.1870013028383255, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.16380420327186584, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.10073176771402359, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.0914490818977356, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.11926870048046112, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.10918880254030228, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.10375809669494629, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08579409122467041, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.07997287809848785, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06059369444847107, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05223385989665985, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.048480305820703506, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04756259545683861, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.030503802001476288, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.025540223345160484, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.02519857883453369, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.022665761411190033, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.022071748971939087, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.016689525917172432, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.016685644164681435, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.015501928515732288, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.011835177429020405, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05223385989665985, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05223385989665985, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.42.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.13510310649871826, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.1265740841627121, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.12332940101623535, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.11159633100032806, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.06324809044599533, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06014300882816315, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.07152578234672546, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.06611628085374832, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.06426190584897995, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.057036180049180984, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05374186858534813, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.036343589425086975, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.031607527285814285, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.030315155163407326, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.0300051998347044, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.01816997304558754, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.01558254100382328, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.015468539670109749, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.014316870830953121, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.014029441401362419, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.009493133053183556, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.009472021833062172, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.009073770605027676, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.00619849469512701, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.036343589425086975, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.036343589425086975, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.42.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.10527767986059189, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.0982663631439209, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.09534697234630585, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.08596737682819366, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.04888270050287247, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.04620320349931717, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.056004926562309265, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.051719579845666885, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.049731381237506866, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.043951455503702164, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.0413241945207119, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.02832518145442009, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.0247093066573143, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.023433195427060127, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.02312544547021389, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.014160008169710636, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.012006580829620361, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.011880537495017052, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.01100130658596754, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.010727074928581715, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.0074022081680595875, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.007320847362279892, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.006994267459958792, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.004711693152785301, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.051719579845666885, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.051719579845666885, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.42.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.23795188963413239, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.22167108952999115, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.21631017327308655, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.19445928931236267, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.11068572103977203, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10521772503852844, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.12434155493974686, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11493321508169174, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.11265566200017929, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09915827214717865, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.09283388406038284, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.06295284628868103, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.054773714393377304, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05282112583518028, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.05234971642494202, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.031403154134750366, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.0267320666462183, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.026569250971078873, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.024312198162078857, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.023842472583055496, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01606227643787861, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.015555256977677345, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.015379801392555237, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.009297669865190983, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.05234971642494202, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.05234971642494202, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.42.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.18080931901931763, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.15190677344799042, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1425071507692337, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.12306376546621323, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.08078426122665405, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.07099611312150955, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.09516597539186478, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.08783818781375885, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.08500021696090698, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.064629927277565, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05976784601807594, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.048429518938064575, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04194871708750725, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.038767702877521515, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03798861429095268, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.0242102462798357, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.020174384117126465, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.019985588267445564, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.016995852813124657, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.016313321888446808, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.01282358169555664, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.012894881889224052, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.01180474553257227, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.008729854598641396, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.048429518938064575, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.048429518938064575, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.42.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.1953708976507187, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.18414443731307983, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.180599182844162, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1643654853105545, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09179504215717316, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08804979175329208, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10184086114168167, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.094374880194664, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09310328215360641, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08340685814619064, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0786682590842247, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05171217396855354, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04508495330810547, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04391254112124443, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04363106191158295, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.025821929797530174, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.022466003894805908, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.022388551384210587, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02074507810175419, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02044442668557167, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.013407263904809952, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01338227279484272, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.01301992405205965, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.008624717593193054, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05171217396855354, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05171217396855354, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.42.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.22852760553359985, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.21541470289230347, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.2113199234008789, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19236873090267181, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10730496793985367, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10294751822948456, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11896035075187683, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.110275998711586, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10881444811820984, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09751461446285248, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09198232740163803, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.060449663549661636, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.052632804960012436, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05128902941942215, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.050968583673238754, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.030148696154356003, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.026132827624678612, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.026041965931653976, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.024111822247505188, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.023757893592119217, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015574006363749504, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01538786944001913, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015124073252081871, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009672369807958603, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05128902941942215, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05128902941942215, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.42.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.2207426279783249, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.196722611784935, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.18705041706562042, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.16393226385116577, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.10047458112239838, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.09133701026439667, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.11844341456890106, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.10883528739213943, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.10349848866462708, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08565255999565125, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.07985380291938782, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.060009825974702835, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05193960294127464, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.04823021590709686, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04731911048293114, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.0301778893917799, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.02517896145582199, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.02484382875263691, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.022288057953119278, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.02169959619641304, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.016317855566740036, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.016157208010554314, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.015161578543484211, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.011117756366729736, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05193960294127464, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05193960294127464, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.43.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.14034995436668396, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.13140985369682312, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.12808860838413239, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.11592982709407806, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.06565362215042114, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06243440881371498, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.0742100179195404, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.06858804076910019, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.06671340763568878, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.059224262833595276, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05577113851904869, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03770218789577484, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.032799359411001205, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.031465064734220505, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.031141553074121475, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.01885301060974598, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.016150301322340965, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.0160341989248991, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.014837656170129776, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.014539938420057297, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.00982643198221922, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.009793254546821117, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.009389312006533146, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.006376307457685471, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03770218789577484, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03770218789577484, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.43.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.11125659197568893, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.10380731523036957, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.10071086138486862, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.09091304987668991, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.051610298454761505, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.04884682595729828, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.059059176594018936, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.05457654222846031, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.052528709173202515, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.046411942690610886, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.04364362731575966, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.029845595359802246, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.026043694466352463, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.024710675701498985, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.024395762011408806, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.014911199919879436, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.012624436058104038, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.01249696034938097, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.011560396291315556, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.011278330348432064, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.007758202031254768, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.0076329512521624565, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.007338930852711201, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.004825132433325052, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.051610298454761505, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.051610298454761505, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.43.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.239909827709198, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.22413545846939087, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.2189813256263733, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.19726355373859406, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.11168696731328964, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10629178583621979, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.12500494718551636, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11560589075088501, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.11349544674158096, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.10032819956541061, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.09403525292873383, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.0634014755487442, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.055111996829509735, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05329573154449463, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.05286698415875435, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03161609172821045, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.02697095461189747, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.026815226301550865, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.024585390463471413, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.02414293959736824, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.016168169677257538, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.015675688162446022, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.015520798973739147, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.009389443323016167, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03161609172821045, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03161609172821045, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.43.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.20435452461242676, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.17325520515441895, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1617814153432846, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1331571787595749, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.0925271138548851, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.08079018443822861, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.11085862666368484, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.10202420502901077, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.0968557596206665, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.07306017726659775, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.0664176419377327, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05703403800725937, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04940428212285042, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04507674276828766, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04402592405676842, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.02867637760937214, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02452753484249115, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.024203680455684662, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02093862183392048, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.020043859258294106, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.015866881236433983, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.017087552696466446, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.01456193346530199, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.01307665091007948, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04940428212285042, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04940428212285042, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.43.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.19613143801689148, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.18486376106739044, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.1812923699617386, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1649939864873886, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09213613718748093, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08835878223180771, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10224278271198273, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09472443908452988, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09344486892223358, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08369049429893494, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07898665219545364, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.0519472174346447, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04524698853492737, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04408017173409462, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.043793585151433945, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.025921162217855453, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.022541657090187073, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.022460704669356346, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.020804202184081078, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02050003409385681, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.013452071696519852, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01340470276772976, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013067039661109447, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.008610203862190247, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.0519472174346447, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.0519472174346447, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.43.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.23006655275821686, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2169022262096405, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.21278595924377441, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1936779022216797, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10802453756332397, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.1036248505115509, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11974527686834335, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11099564284086227, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.1095641702413559, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09814075380563736, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09258110076189041, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06084967404603958, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05299391224980354, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05163706839084625, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.051316361874341965, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.030353421345353127, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.026303913444280624, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02621215395629406, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.024267761036753654, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.023911332711577415, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01568583771586418, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015485880896449089, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015235344879329205, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009734143503010273, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05163706839084625, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05163706839084625, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.43.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.2208617627620697, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.19685712456703186, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.1871025711297989, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.1642671674489975, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.10051269084215164, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.09130484610795975, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.11859481036663055, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.10899306833744049, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.10355088859796524, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08582542091608047, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.0800236165523529, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06016945466399193, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05207214877009392, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.04827496036887169, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04735053703188896, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.030311232432723045, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.025267764925956726, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.02492614835500717, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.022414224222302437, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.02181812934577465, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.016529710963368416, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01630261167883873, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.01536430325359106, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.011316247284412384, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05207214877009392, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05207214877009392, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.44.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.1359582096338272, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.12734714150428772, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.12418477237224579, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.11243509501218796, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.06360498815774918, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.0605410598218441, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.07205694913864136, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.06628753244876862, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.0646200105547905, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.0573599711060524, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05418377369642258, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.036633849143981934, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.031693972647190094, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.030475683510303497, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.030189931392669678, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.018336046487092972, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.01564336009323597, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.01554014254361391, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.01436846237629652, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.014091849327087402, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.009566950611770153, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.009472379460930824, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.00912658590823412, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0061766053549945354, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.036633849143981934, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.036633849143981934, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.44.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.11329783499240875, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.10585978627204895, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.10272718966007233, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.09274100512266159, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.05264366418123245, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.049792397767305374, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.0601215623319149, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.05561061203479767, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.05353618413209915, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.04735609143972397, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.044574085623025894, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03045584447681904, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.026531603187322617, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.025195252150297165, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.024873197078704834, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.01519839558750391, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.012863708660006523, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.012725535780191422, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.011774690821766853, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.011493121273815632, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.007904274389147758, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.007746083661913872, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.007480028085410595, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.004866196308284998, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.04735609143972397, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.04735609143972397, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.44.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.23695069551467896, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.22103098034858704, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.21582776308059692, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.1945350617170334, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.11018506437540054, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10483807325363159, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.1235789805650711, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11414283514022827, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.11203081905841827, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09889095276594162, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.09281299263238907, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.06258918344974518, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05445335805416107, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.052628181874752045, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.052176039665937424, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03124515525996685, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.026678482070565224, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.026530908420681953, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.024329977110028267, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.023883696645498276, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.016035331413149834, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.015611041337251663, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.015380414202809334, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.00947683397680521, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.052176039665937424, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.052176039665937424, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.44.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.19654062390327454, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.1679379940032959, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1588129997253418, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.13387799263000488, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.0893278643488884, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.07983705401420593, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.10464191436767578, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.09578365832567215, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.09260719269514084, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.07223869860172272, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06516259908676147, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05402621254324913, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0469169057905674, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04404882341623306, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04336632788181305, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.027541618794202805, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.024838002398610115, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.024655228480696678, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02184465155005455, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.021254949271678925, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.01604795828461647, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.01801523193717003, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.01523742824792862, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.014787925407290459, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0469169057905674, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0469169057905674, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.44.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.19889752566814423, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.18737463653087616, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.1838066130876541, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.16728970408439636, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09338520467281342, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08954422920942307, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10361814498901367, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.0960010215640068, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09474177658557892, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08481539785861969, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0800502598285675, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.052651140838861465, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04585698992013931, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04466205835342407, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04437766596674919, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.026291029527783394, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02281612530350685, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02273627370595932, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.021053463220596313, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.020744821056723595, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.013648038730025291, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01353140827268362, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.01325688511133194, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.008635524660348892, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04585698992013931, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04585698992013931, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.44.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.23172113299369812, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.21841943264007568, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.2142294943332672, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19504186511039734, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10879883915185928, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10438446700572968, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12059091031551361, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11179137229919434, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11032192409038544, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09883930534124374, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09321346133947372, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06128917261958122, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05336802080273628, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.052004534751176834, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05167483538389206, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.030563807114958763, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02647510915994644, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02638358436524868, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.024417437613010406, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.024059440940618515, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015784502029418945, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.0155599070712924, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015331806614995003, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009744800627231598, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.052004534751176834, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.052004534751176834, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.44.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.21898166835308075, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.195152148604393, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.18547143042087555, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.16271820664405823, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.09955881536006927, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.09045824408531189, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.11725138872861862, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.10800999402999878, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.102567158639431, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08500208705663681, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.0792069062590599, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.0593595914542675, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.051477234810590744, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.04771766811609268, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04679959639906883, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.029847148805856705, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.02477153018116951, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.024430101737380028, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.021897369995713234, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.02130199410021305, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.016057971864938736, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.015712833032011986, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.014881444163620472, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.010561048984527588, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.051477234810590744, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.051477234810590744, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.45.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.13763976097106934, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.12878651916980743, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.12557195127010345, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.11362574994564056, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.06439302116632462, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06122251972556114, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.07303420454263687, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.06716517359018326, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.06544691324234009, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.05796591937541962, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05468685179948807, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.037094272673130035, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.03210518881678581, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03084060363471508, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.030543727800250053, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.01854562945663929, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.01581222005188465, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.01570131443440914, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.014495938085019588, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.01421217992901802, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.009688667953014374, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.009538409300148487, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.009255415759980679, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.006162422709167004, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.037094272673130035, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.037094272673130035, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.45.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.11956708878278732, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.11166639626026154, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.10860820859670639, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.09805375337600708, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.055691078305244446, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.05276552960276604, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.06352013349533081, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.05867486074566841, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.05657936632633209, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.05006954446434975, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.04716572165489197, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03215447813272476, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.027988780289888382, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.026647021993994713, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.02633371204137802, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.016080358996987343, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.013612717390060425, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.013480029068887234, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.012460502795875072, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.012170631438493729, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.008357461541891098, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.008198714815080166, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.007920698262751102, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.0051556737162172794, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.05006954446434975, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.05006954446434975, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.45.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.2413565069437027, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.22561614215373993, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.2203759104013443, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.19873903691768646, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.11270181834697723, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10733219236135483, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.12623777985572815, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11676239222288132, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.11461111903190613, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.10131341218948364, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.0950188934803009, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.064023956656456, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05572010204195976, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.053840167820453644, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.053392525762319565, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03193265199661255, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.027193045243620872, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.02705436386168003, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.024793237447738647, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.024331428110599518, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01631544716656208, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.01575281471014023, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.015654081478714943, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.009326399303972721, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03193265199661255, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03193265199661255, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.45.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.16110146045684814, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.13381816446781158, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1263752579689026, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.10948412120342255, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07276192307472229, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06298511475324631, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08407946676015854, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.07635834068059921, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.07483899593353271, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.056832071393728256, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.052848175168037415, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.042970702052116394, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.03804038092494011, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03643162176012993, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03607545420527458, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.022672034800052643, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02140198089182377, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.021317336708307266, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.01879074051976204, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.018481343984603882, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.014283416792750359, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.016218364238739014, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.013871735893189907, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.013997102156281471, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.042970702052116394, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.042970702052116394, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.45.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.19643449783325195, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.1850360780954361, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.18146340548992157, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.16511860489845276, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09226042777299881, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08845386654138565, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10270025581121445, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09487314522266388, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09359738975763321, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08375994116067886, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07904373854398727, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.052205465734004974, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04536769539117813, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04418542608618736, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.0439051054418087, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02623901516199112, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02268327586352825, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.022606994956731796, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.020948855206370354, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.020643508061766624, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01402817852795124, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.013624191284179688, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013657471165060997, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.008926328271627426, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.052205465734004974, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.052205465734004974, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.45.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.2316609025001526, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.21833492815494537, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.21416349709033966, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1949274092912674, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10882459580898285, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10436736792325974, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12074162811040878, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11185374110937119, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11037615686655045, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.0988394096493721, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09328870475292206, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.061430808156728745, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05342085659503937, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05204586312174797, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05172020196914673, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.030673939734697342, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.026544949039816856, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02645079232752323, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.024481937289237976, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.0241222120821476, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015936147421598434, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015672245994210243, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015486180782318115, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009913412854075432, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05204586312174797, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05204586312174797, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.45.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.22271808981895447, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.19901958107948303, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.1895703822374344, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.16634856164455414, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.10148239135742188, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.09246906638145447, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.1196221187710762, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.10962902754545212, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.10442156344652176, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08678776770830154, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.08098902553319931, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06058045104146004, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05237240344285965, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.048732999712228775, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04784691333770752, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.030471211299300194, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.025466151535511017, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.025137735530734062, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.022597841918468475, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.022024549543857574, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.01655144989490509, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01634320802986622, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.015405719168484211, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.011292271316051483, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05237240344285965, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05237240344285965, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.46.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.1401679366827011, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.13119836151599884, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.12800155580043793, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.11600387096405029, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.06564580649137497, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.062419503927230835, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.07423147559165955, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.06827034056186676, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.06671474128961563, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.059140127152204514, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.055796895176172256, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.037679433822631836, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.03263945132493973, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03145107999444008, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.031168565154075623, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.018852055072784424, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.016084818169474602, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.015984652563929558, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.014743870124220848, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.014468960464000702, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.009811675176024437, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.009634166955947876, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.009426110424101353, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.006160268094390631, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.037679433822631836, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.037679433822631836, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.46.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.12372356653213501, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.1157979890704155, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.11267933994531631, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.10188944637775421, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.05765123292803764, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.05470282956957817, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.06553316116333008, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.060541555285453796, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.058575861155986786, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.051918644458055496, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.0488675981760025, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03316197916865349, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.0288829505443573, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.027571648359298706, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.02724749594926834, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.016575662419199944, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.014044288545846939, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.013920542784035206, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.012865358963608742, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.012581484392285347, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.008585132658481598, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.008381620980799198, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.008161678910255432, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.005206411704421043, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.051918644458055496, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.051918644458055496, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.46.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.24016804993152618, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.2245827615261078, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.21960356831550598, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.1983700543642044, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.11188847571611404, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10661238431930542, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.1251009702682495, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.1157064363360405, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.1136840432882309, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.1006808653473854, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.09457479417324066, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.06335087865591049, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05514279007911682, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.0533783920109272, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.05295836925506592, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.031595394015312195, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.02697012387216091, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.026826106011867523, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.0246560238301754, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.024213198572397232, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01615314371883869, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.015614926815032959, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.015528516843914986, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.009260780178010464, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.031595394015312195, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.031595394015312195, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.46.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.22153446078300476, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.17982669174671173, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1645992249250412, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1293114572763443, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.10065105557441711, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.08582834154367447, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.12063804268836975, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.11128412187099457, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.1053697019815445, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.07266857475042343, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06666175276041031, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.062023114413022995, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.05384940654039383, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.049070172011852264, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04788587987422943, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.03114858642220497, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.026632189750671387, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.026269404217600822, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.021517962217330933, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.020496472716331482, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.01715928502380848, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.018504969775676727, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.015703411772847176, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.014085469767451286, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.049070172011852264, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.049070172011852264, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.46.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.1987762749195099, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.18725359439849854, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.18363679945468903, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1669774055480957, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.093327596783638, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08947046846151352, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10357213765382767, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09598450362682343, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09468966722488403, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08468349277973175, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.07990363240242004, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05263441056013107, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04582059383392334, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.044618017971515656, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04433053359389305, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.026262709870934486, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.022742334753274918, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.022661587223410606, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.020959507673978806, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.020646099001169205, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01360330916941166, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.013413060456514359, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013207443989813328, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.008451269008219242, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04582059383392334, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04582059383392334, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.46.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.23470889031887054, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.22110900282859802, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.21686206758022308, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19730031490325928, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.11025575548410416, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.1057191714644432, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12223299592733383, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.1133340522646904, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11184521019458771, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.10008732229471207, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09434890002012253, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06215107813477516, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05411549285054207, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05270863324403763, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05238243192434311, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.030996359884738922, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.026834789663553238, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.026743751019239426, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.024732282385230064, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.024362869560718536, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.016014404594898224, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015770498663187027, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015553261153399944, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009873520582914352, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05238243192434311, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05238243192434311, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.46.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.22521333396434784, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.20129665732383728, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.19176967442035675, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.16855409741401672, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.1025623232126236, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.09351709485054016, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.12013348937034607, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.11072105914354324, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.1055002361536026, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08774952590465546, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.08179206401109695, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06066504865884781, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.052694935351610184, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.04906458407640457, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04817602410912514, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.030461274087429047, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.02528497949242592, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.02494964748620987, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.022329801693558693, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.02174437791109085, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.016290340572595596, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01575564593076706, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.015163146890699863, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.010272213257849216, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.04906458407640457, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.04906458407640457, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.47.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.14251300692558289, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.133541539311409, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.13036471605300903, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.11812025308609009, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.06682570278644562, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06355831027030945, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.07556817680597305, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.06946300715208054, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.06789381057024002, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06021576374769211, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05692102760076523, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.038431186228990555, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.03321368247270584, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03203245997428894, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.031745098531246185, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.019220545887947083, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.016400733962655067, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.01630805805325508, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.015043490566313267, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.01476930733770132, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.010040756314992905, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.009853867813944817, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.009658021852374077, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.006348258815705776, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.038431186228990555, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.038431186228990555, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.47.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.129262313246727, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.12101801484823227, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.11777661740779877, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.10665173828601837, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.06036742776632309, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.05734413117170334, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.06856834143400192, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.06330644339323044, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.06137591600418091, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.0544394813477993, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.051280632615089417, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03474060818552971, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.030229127034544945, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.02888691984117031, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.028566434979438782, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.01733965054154396, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.014704103581607342, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.014578312635421753, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.013480208814144135, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.013186277821660042, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.00898041483014822, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.00876234658062458, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.008544627577066422, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.00543059129267931, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.051280632615089417, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, "best_option": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.051280632615089417, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } } }, { "key": "model.layers.47.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.24604101479053497, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.23026300966739655, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.22508253157138824, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.20322935283184052, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.11509771645069122, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10967010259628296, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.1286381334066391, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11900626122951508, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.11698883771896362, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.10355210304260254, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.0972461849451065, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.06523735076189041, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05675338953733444, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05496659874916077, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.05451131984591484, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.032543133944272995, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.02776690572500229, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.027644233778119087, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.02534572407603264, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.024906739592552185, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01663072407245636, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.016079893335700035, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.016002021729946136, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.009531812742352486, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.032543133944272995, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.032543133944272995, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.47.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.1918797791004181, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.16047605872154236, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.14854644238948822, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.11725685000419617, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.08603005111217499, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.07493114471435547, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.10523837804794312, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.09685087203979492, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.09121983498334885, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06587574630975723, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06070950627326965, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.054256755858659744, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0471806600689888, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04233381897211075, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04111166298389435, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.027404380962252617, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.023616235703229904, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.023267151787877083, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02006646618247032, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.019079409539699554, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.01548528578132391, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.017189841717481613, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.014055401086807251, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.013663822785019875, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0471806600689888, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0471806600689888, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.47.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.1998739242553711, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.1881934106349945, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.18453067541122437, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.16779352724552155, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09388905763626099, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.08997315913438797, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10434599220752716, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09659509360790253, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.0952814370393753, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08516721427440643, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0803169310092926, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05299835652112961, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.046127233654260635, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04490390792489052, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04461592435836792, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02645811438560486, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.022903310135006905, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.022819828242063522, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02109600231051445, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.020776819437742233, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.013736520893871784, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.013522365130484104, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013336211442947388, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.008534560911357403, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.046127233654260635, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.046127233654260635, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.47.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.2373638153076172, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2235036939382553, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.21918906271457672, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1993749588727951, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.11159627884626389, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10696527361869812, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12383102625608444, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11471234261989594, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11320086568593979, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.10123229026794434, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09547360241413116, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06301002949476242, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05480072647333145, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.053369950503110886, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05302794277667999, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031459640711545944, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02720825746655464, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.027110464870929718, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.025071553885936737, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.024697141721844673, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.016338614746928215, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.016046889126300812, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015871167182922363, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.01011721696704626, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031459640711545944, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031459640711545944, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.47.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.232869952917099, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.20841290056705475, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.19893503189086914, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.1746966689825058, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.10615818947553635, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.09706617146730423, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.12366446852684021, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.1140555813908577, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.10912789404392242, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.09084977954626083, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.08457520604133606, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06257444620132446, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05440197139978409, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05087857320904732, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.05001366510987282, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03137432038784027, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.026393119245767593, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.026068922132253647, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.023365840315818787, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.022794410586357117, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.016665279865264893, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.016631729900836945, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.015522079542279243, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.011173216626048088, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05087857320904732, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05087857320904732, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.48.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.1511860191822052, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.14161458611488342, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.13827332854270935, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.12520179152488708, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07093147933483124, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06753598898649216, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08029378205537796, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.07368762791156769, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.07210743427276611, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06391426920890808, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06047136336565018, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.040829792618751526, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.03527588024735451, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03402172029018402, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.033726077526807785, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.02043231949210167, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.01741761714220047, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.017316363751888275, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.015977319329977036, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.015684183686971664, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.01066433172672987, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.010444242507219315, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.010260648094117641, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0067054801620543, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.040829792618751526, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.040829792618751526, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.48.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.13164708018302917, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.12309880554676056, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.11992333084344864, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.10835051536560059, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.06156262382864952, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.05842072516679764, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.06997967511415482, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.0645049661397934, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.06259637326002121, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.05543452128767967, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.05226253718137741, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03544570878148079, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.030829638242721558, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.029484452679753304, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.02916388213634491, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.017730819061398506, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.015041428618133068, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.014921717345714569, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.013765630312263966, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.013469889760017395, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.009184082970023155, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.008972568437457085, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.008753669448196888, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.005621953401714563, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.05226253718137741, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, "best_option": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.05226253718137741, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } } }, { "key": "model.layers.48.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.2504723370075226, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.23404532670974731, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.2287341207265854, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.20634090900421143, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.11720414459705353, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.11151468753814697, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.1310337781906128, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.1212204247713089, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.11913339793682098, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.10516989231109619, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.09859132766723633, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.06640619784593582, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.0578092597424984, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05593825876712799, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.05550018325448036, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03314339742064476, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.028288990259170532, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.028145726770162582, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.025777801871299744, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.025311704725027084, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.016943326219916344, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.01641705073416233, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.01629711128771305, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.009799525141716003, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03314339742064476, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03314339742064476, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.48.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.17020472884178162, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.1458059400320053, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.13840553164482117, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.11531625688076019, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07833825051784515, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06970464438199997, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.089844711124897, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.08231304585933685, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.08079797029495239, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.060677606612443924, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.056119829416275024, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04646670073270798, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04034097492694855, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.038540348410606384, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03811963275074959, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.023881779983639717, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.021448975428938866, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.0213621836155653, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.018306832760572433, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.017894212156534195, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.014124655164778233, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.015178890898823738, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.013641871511936188, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.012313209474086761, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04646670073270798, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04646670073270798, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.48.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.2043573409318924, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.19232119619846344, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.18851663172245026, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.17130671441555023, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09599114954471588, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09194844216108322, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10659071058034897, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09875353425741196, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.09741966426372528, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08697835355997086, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08196796476840973, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05413547158241272, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04714246094226837, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04588735103607178, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.045586958527565, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.027000119909644127, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.023357762023806572, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.023270396515727043, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.021491585299372673, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.021161358803510666, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.013941158540546894, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.013720984570682049, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.013524976558983326, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.008564825169742107, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04714246094226837, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04714246094226837, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.48.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.24114248156547546, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.22701653838157654, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.22254973649978638, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.20236623287200928, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.11337496340274811, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10864803940057755, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12576623260974884, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11659041047096252, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11503281444311142, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.10276569426059723, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09682026505470276, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06395500898361206, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.055674344301223755, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05420240759849548, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.053852517157793045, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031892966479063034, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.027576442807912827, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.027473844587802887, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.025378067046403885, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.0249935295432806, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01645582728087902, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.016175197437405586, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015968164429068565, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.010064620524644852, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031892966479063034, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031892966479063034, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.48.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.24020631611347198, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.21563810110092163, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.2063598930835724, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.1812610775232315, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.10960184037685394, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.10051413625478745, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.12686429917812347, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.11711926758289337, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.1125345304608345, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.09399335831403732, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.08742862194776535, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06418708711862564, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05574307590723038, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.052366554737091064, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.051547370851039886, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.032153405249118805, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.026892825961112976, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.02660415694117546, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.023767853155732155, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.023230865597724915, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.016989218071103096, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.016538064926862717, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.01590307056903839, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.010671820491552353, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.052366554737091064, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.052366554737091064, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.49.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.1560082584619522, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.14600731432437897, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.14251385629177094, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.12898285686969757, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07324894517660141, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06973608583211899, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08264005929231644, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.07615207135677338, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.07445528358221054, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06596574187278748, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06222711503505707, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04206116497516632, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.036437809467315674, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03513327240943909, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.0348154716193676, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.021038057282567024, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.018032729625701904, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.017925361171364784, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.016544202342629433, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.01623954251408577, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.010998884215950966, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.010897355154156685, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.010572502389550209, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.007105835247784853, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04206116497516632, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04206116497516632, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.49.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.13425162434577942, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.12552960216999054, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.12208092957735062, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.11028219759464264, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.06272575259208679, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.05946514010429382, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.07142545282840729, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.0659194067120552, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.06378775835037231, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.05638754367828369, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.05314101651310921, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03623281791806221, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.03150755912065506, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.030050842091441154, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.029704151675105095, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.018107015639543533, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.015337026678025723, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.015202322974801064, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.014028392732143402, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.013717962428927422, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.009405613876879215, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.009201173670589924, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.008933810517191887, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.005792411509901285, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03623281791806221, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03623281791806221, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.49.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.25747501850128174, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.24062395095825195, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.23523367941379547, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.21222752332687378, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.12028030306100845, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.1145557090640068, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.13454940915107727, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.12442310899496078, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.12231800705194473, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.10810631513595581, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.1014804020524025, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.06816398352384567, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.059340961277484894, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05743567645549774, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.05699468404054642, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03400702401995659, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.02902960032224655, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.02889605052769184, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.026479508727788925, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.025998450815677643, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01739010587334633, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.016808655112981796, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.01672755740582943, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.009979328140616417, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03400702401995659, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03400702401995659, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.49.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.21367818117141724, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.17299097776412964, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1578705906867981, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1339304894208908, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.09451502561569214, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.07862527668476105, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.1176568865776062, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.10706988722085953, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.10006237030029297, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.07385046780109406, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06806183606386185, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.06063665822148323, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.052717175334692, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.047198522835969925, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04581422358751297, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.0316203273832798, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.027430346235632896, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.027026861906051636, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02400076761841774, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.023063572123646736, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.019459635019302368, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.020996199920773506, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.018024243414402008, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.017569920048117638, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.047198522835969925, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.047198522835969925, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.49.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.20985636115074158, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.19745144248008728, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.1935528665781021, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1758335828781128, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09869226068258286, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09451419860124588, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.10974584519863129, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.10157116502523422, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10018239170312881, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08938059210777283, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08421371132135391, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05578259751200676, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04852713271975517, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04722457751631737, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.046911075711250305, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.027865469455718994, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02413652278482914, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.024043185636401176, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02221519686281681, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02187715284526348, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.014536471106112003, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.014323495328426361, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.014111381955444813, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.00914358627051115, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04852713271975517, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04852713271975517, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.49.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.24546398222446442, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2310524880886078, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.22648517787456512, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.2058010995388031, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.11552425473928452, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.11065901815891266, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.1283872276544571, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11881884932518005, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11725165694952011, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.10463792085647583, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09861694276332855, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.065329909324646, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.056789904832839966, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05527932941913605, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05492572486400604, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03262510523200035, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.028194881975650787, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.028093386441469193, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02595190890133381, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.025561407208442688, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01697024516761303, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01665898971259594, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.016477875411510468, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.010537511669099331, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03262510523200035, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03262510523200035, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.49.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.2400035709142685, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.21569082140922546, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.20629318058490753, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.181278795003891, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.10963194072246552, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.10053251683712006, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.1269071102142334, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.11735029518604279, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.11255422234535217, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.09413735568523407, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.0875706747174263, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06420551240444183, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.055934593081474304, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05245063453912735, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.05160384625196457, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.032166775315999985, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.02702532336115837, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.026703765615820885, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.02393186464905739, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.02336406707763672, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.016937455162405968, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01676209643483162, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.015824059024453163, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.010946995578706264, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.05160384625196457, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.05160384625196457, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.50.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.15390311181545258, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.14407508075237274, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.14064306020736694, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1272643655538559, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07228390872478485, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06879515200853348, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08144856244325638, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.07514437288045883, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.07345863431692123, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06509816646575928, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.061093732714653015, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04138872027397156, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.03594190627336502, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03464430943131447, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03434031829237938, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.02069442719221115, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.017733758315443993, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.017626579850912094, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.016249608248472214, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.01594345085322857, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.010763291269540787, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.010630466043949127, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.010338835418224335, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.00680961087346077, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04138872027397156, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04138872027397156, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.50.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.1339542716741562, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.12522226572036743, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.12183002382516861, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.11006761342287064, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.0627528578042984, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.05950342118740082, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.07159934937953949, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.06592317670583725, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.06379592418670654, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.05641965568065643, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.053135961294174194, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03632894158363342, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.03149363771080971, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.03007085807621479, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.02974076382815838, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.01816275157034397, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.015360848978161812, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.015226409770548344, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.014051136560738087, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.01374359056353569, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.00944699626415968, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.009217638522386551, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.008985640481114388, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.005805487744510174, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03632894158363342, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03632894158363342, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.50.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.26419398188591003, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.2469787448644638, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.241212397813797, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.21754489839076996, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.12373431771993637, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.11778608709573746, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.13850542902946472, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.12805898487567902, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.12580370903015137, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.11109105497598648, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.10410989075899124, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.07025353610515594, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.06111631542444229, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05913148075342178, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.058676838874816895, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03505125641822815, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.029938776046037674, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.02978632226586342, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.02728429064154625, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.02679399959743023, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01793742924928665, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.017399704083800316, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.017240291461348534, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.010427392087876797, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03505125641822815, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03505125641822815, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.50.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.2197793573141098, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.18454813957214355, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1724601686000824, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.14049480855464935, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.0993976965546608, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.08690524846315384, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.11736489087343216, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.10814777761697769, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.10389687120914459, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.07670757919549942, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06831300258636475, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.06010621041059494, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.051935359835624695, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04798611253499985, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.047013767063617706, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.03010355867445469, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02539299987256527, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.02512672357261181, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.021039219573140144, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.020131925120949745, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.016295237466692924, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.016771551221609116, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.015072091482579708, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.012044643983244896, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.051935359835624695, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.051935359835624695, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.50.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.21356429159641266, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.20079739391803741, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.19677568972110748, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.178582102060318, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10045959800481796, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09615316241979599, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11166434735059738, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.10340036451816559, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10197755694389343, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09090187400579453, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0855923444032669, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05676766857504845, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.049399979412555695, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04806530103087425, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.047741785645484924, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.028339259326457977, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02451917715370655, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.024429544806480408, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02254803478717804, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.022197261452674866, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.014719345606863499, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.014496303163468838, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.01428179256618023, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009165835566818714, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.049399979412555695, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.049399979412555695, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.50.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.24824796617031097, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.23352858424186707, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.22890514135360718, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.2078436315059662, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.11684990674257278, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.11188671737909317, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.1297648698091507, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.12020555138587952, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.118597611784935, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.10576527565717697, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09960485249757767, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06600307673215866, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.057449184358119965, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05591104179620743, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05555325374007225, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03297504782676697, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.028480639681220055, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.0283790472894907, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.026188241317868233, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.025786077603697777, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.017136594280600548, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01676902174949646, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.01663726381957531, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.01052025705575943, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03297504782676697, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03297504782676697, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.50.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.24662819504737854, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.2219267636537552, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.21244005858898163, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.18669742345809937, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.11283204704523087, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.10363912582397461, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.13050399720668793, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.12065564841032028, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.11576993763446808, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.09701846539974213, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.0902397632598877, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06618509441614151, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05765345320105553, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05411094054579735, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.053248826414346695, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.033105771988630295, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.028135167434811592, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.02782023884356022, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.025024645030498505, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.024467401206493378, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.017469387501478195, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01778399758040905, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.01632983796298504, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.012116065248847008, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.033105771988630295, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.033105771988630295, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.51.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.15904930233955383, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.1491205245256424, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.14575256407260895, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.13199788331985474, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07478856295347214, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.07132060080766678, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08425270020961761, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.07759393751621246, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.07599663734436035, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06747452169656754, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06365962326526642, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04286891594529152, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.03709808364510536, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03584374114871025, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03554298356175423, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.0214257650077343, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.01829647831618786, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.018193872645497322, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.016784528270363808, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.016481945291161537, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.011129355058073997, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.010890008881688118, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.010717599652707577, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0068962727673351765, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04286891594529152, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.04286891594529152, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.51.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.1422814577817917, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.1333799958229065, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.13000229001045227, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.11767996847629547, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.06680403649806976, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.06350477039813995, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.07577481865882874, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.06980817764997482, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.06783724576234818, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.06024063378572464, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.0567559190094471, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.038460634648799896, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.03337663784623146, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.03198816254734993, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.031669002026319504, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.01923770271241665, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.016294248402118683, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.016167052090168, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.014932120218873024, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.014620944857597351, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.009971987456083298, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.009691099636256695, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.009498226456344128, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.00603433046489954, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.038460634648799896, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.038460634648799896, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.51.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.2639623284339905, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.24730287492275238, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.24205173552036285, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.21878929436206818, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.12389829754829407, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.11823680251836777, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.13816753029823303, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.12783131003379822, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.1258510947227478, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.11161674559116364, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.10474840551614761, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.0701039656996727, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.06103895604610443, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.059215445071458817, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.05877876281738281, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03497682884335518, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.029922740533947945, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.02979046106338501, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.02734052576124668, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.026882629841566086, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01789156347513199, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.017309146001935005, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.017254646867513657, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.010300939902663231, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03497682884335518, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03497682884335518, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.51.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.20546285808086395, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.17318926751613617, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.16279847919940948, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1330866664648056, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.09289560467004776, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.08207015693187714, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.10912543535232544, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.10066179931163788, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.09739543497562408, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.07252886891365051, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06542903929948807, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.055960219353437424, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.048517826944589615, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04503060132265091, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.044189807027578354, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.028031308203935623, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02405867539346218, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.023852596059441566, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02021121419966221, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.019435742869973183, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.015232807956635952, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.016139140352606773, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.014147177338600159, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.011920834891498089, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.048517826944589615, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.048517826944589615, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.51.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.21711720526218414, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2040412276983261, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.19996829330921173, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.18139228224754333, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10218692570924759, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09777034819126129, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11359915137290955, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.10520105808973312, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10374362021684647, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09241579473018646, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08695875853300095, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05777667090296745, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.050282303243875504, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04890872910618782, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04858247935771942, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02885419689118862, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02499851956963539, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02490612491965294, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.022982385009527206, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.022627025842666626, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01502807904034853, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01485069002956152, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.014581754803657532, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009485243819653988, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.050282303243875504, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.050282303243875504, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.51.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.24971553683280945, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.23475703597068787, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.23005540668964386, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.20885245501995087, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.11755917221307755, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.11252689361572266, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.13057459890842438, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.12098933756351471, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11933738738298416, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.10634779930114746, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.10007063299417496, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.0664493590593338, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.057820312678813934, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.056259557604789734, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.055887166410684586, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03314827382564545, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02865230292081833, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.028547588735818863, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.026331717148423195, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02592509798705578, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.017142122611403465, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01686335727572441, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.016627660021185875, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.010562981478869915, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03314827382564545, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03314827382564545, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.51.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.248120978474617, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.2236989289522171, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.21449309587478638, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.188472718000412, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.11355800926685333, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.10455367714166641, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.1312423050403595, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.12108338624238968, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.11642783135175705, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.09775012731552124, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.09092805534601212, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.0665251612663269, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05772572010755539, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05435093864798546, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.053545016795396805, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03338231146335602, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.028081027790904045, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.027786236256361008, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.024942075833678246, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.024410430341959, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.017804423347115517, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.017472371459007263, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.016736913472414017, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.011604263447225094, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03338231146335602, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03338231146335602, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.52.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.15808822214603424, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.1481350213289261, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.14462532103061676, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1305844932794571, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07438904792070389, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.07082931697368622, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08407154679298401, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.0772547572851181, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.07561258226633072, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06695964187383652, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.0629948079586029, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.042780231684446335, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.03697114810347557, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03567129746079445, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.035368796437978745, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.0213916078209877, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.018256302922964096, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.018155451864004135, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.016733424738049507, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.01642800122499466, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.011144879274070263, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.010940375737845898, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.010726680979132652, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.007025185972452164, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.042780231684446335, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.042780231684446335, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.52.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.14044581353664398, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.1312834620475769, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.127810999751091, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.11537273228168488, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.06590335071086884, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.06250198185443878, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.0748753547668457, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.06906383484601974, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.06701379269361496, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.05919963866472244, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.055749040096998215, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.037995610386133194, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.03301540017127991, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.03157684579491615, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.031222473829984665, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.018997302278876305, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.016095463186502457, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.015960566699504852, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.014703236520290375, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.014385862275958061, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.009833992458879948, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.009596945717930794, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.009356502443552017, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.00598596315830946, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.037995610386133194, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.037995610386133194, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.52.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.26442015171051025, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.24719412624835968, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.24146462976932526, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.21800194680690765, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.12425695359706879, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.11831341683864594, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.1387604922056198, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.12840589880943298, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.126247376203537, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.11148101091384888, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.10448496788740158, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.07037025690078735, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.06128499284386635, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05938117578625679, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.05891399830579758, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03512066230177879, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.02998299151659012, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.029844263568520546, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.027325645089149475, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.026827752590179443, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01794213429093361, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.017333118245005608, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.017287971451878548, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.01025574840605259, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03512066230177879, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03512066230177879, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.52.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.18976564705371857, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.16315147280693054, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.15441086888313293, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.13449017703533173, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.08674302697181702, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.07711975276470184, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.10086849331855774, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.09301162511110306, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.08995264023542404, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.07116513699293137, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06443017721176147, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05165958032011986, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.044751133769750595, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04192342236638069, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04122866690158844, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.025894667953252792, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.022272566333413124, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.022087445482611656, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.0192932840436697, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.018694205209612846, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.014098191633820534, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.014722071588039398, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.013223620131611824, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.010771660134196281, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05165958032011986, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05165958032011986, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.52.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.2195645272731781, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2063070684671402, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.2021590918302536, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.18338724970817566, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10343019664287567, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09894558042287827, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11514369398355484, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.1064923033118248, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10500340163707733, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09347793459892273, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08796872943639755, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05861509218811989, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.050939563661813736, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.049543704837560654, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.049214642494916916, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.029305359348654747, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02537616156041622, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.025282451882958412, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02333478070795536, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.022970598191022873, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01536643598228693, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01516414899379015, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.01491694524884224, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009803203865885735, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.050939563661813736, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.050939563661813736, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.52.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.24902698397636414, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2340230941772461, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.22929736971855164, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.20810779929161072, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.1173124834895134, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.11226391047239304, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.1303183138370514, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.12077858299016953, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11910348385572433, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.10605038702487946, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09972595423460007, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06635542958974838, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05772525444626808, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.056159377098083496, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05579042434692383, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.033113423734903336, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.028629116714000702, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.028523599728941917, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.026299333199858665, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.025890521705150604, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.017160018905997276, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.016892068088054657, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.016645418480038643, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.010642669163644314, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.033113423734903336, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.033113423734903336, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.52.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.2502141296863556, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.22577312588691711, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.21628108620643616, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.19018986821174622, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.11463330686092377, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.1055263876914978, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.13286571204662323, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.12244230508804321, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.11747393012046814, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.09877770394086838, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.09187088906764984, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06721380352973938, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05839120224118233, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05492834746837616, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.05407971516251564, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03368803486227989, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.028479427099227905, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.028169900178909302, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.02534613572061062, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.02480456419289112, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.017954029142856598, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.017878267914056778, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.016866929829120636, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.012066854164004326, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03368803486227989, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03368803486227989, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.53.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.15599428117275238, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.14609777927398682, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1427507847547531, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.12922722101211548, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07329080998897552, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06984508037567139, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08242697268724442, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.0760250985622406, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.07448875159025192, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06600755453109741, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.062057141214609146, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.041896894574165344, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.03639363870024681, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03515562415122986, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03486252576112747, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.020955489948391914, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.017979858443140984, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.017881596460938454, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.016479728743433952, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.016183873638510704, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.010888859629631042, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.010745637118816376, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.010478323325514793, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.006864237133413553, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.041896894574165344, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.041896894574165344, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.53.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.14039430022239685, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.13144879043102264, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.12808950245380402, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.11570746451616287, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.06598976999521255, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.06264779716730118, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.07480404525995255, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.06894567608833313, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.06706323474645615, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.05931882932782173, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.055744659155607224, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03793340176343918, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.03294672444462776, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.03159022703766823, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.03126247599720955, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.01895969547331333, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.016081159934401512, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.015958508476614952, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.014702048152685165, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.014397633261978626, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.009802221320569515, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.009552635252475739, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.009343837387859821, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.005928224418312311, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03793340176343918, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03793340176343918, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.53.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.26291322708129883, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.24600978195667267, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.24042755365371704, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.21720178425312042, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.12368586659431458, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.11782774329185486, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.13798943161964417, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.12776605784893036, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.12567786872386932, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.11105793714523315, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.10399103164672852, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.07007960230112076, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.06102965027093887, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05914885923266411, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.058713071048259735, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.034955576062202454, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.029979798942804337, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.029837101697921753, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.027333859354257584, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.026855725795030594, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01792879030108452, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.017465265467762947, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.017287274822592735, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.01056537963449955, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.034955576062202454, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.034955576062202454, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.53.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.20174382627010345, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.16742222011089325, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.15427817404270172, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1276920586824417, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.09081417322158813, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.07737108319997787, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.11051315069198608, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.10198415815830231, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.0959862768650055, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.07167747616767883, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06364914029836655, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05663944035768509, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04905006289482117, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04398176819086075, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04272273927927017, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.028342988342046738, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02339284121990204, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.022999243810772896, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.019732855260372162, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.01868801936507225, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.015235370956361294, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.015821851789951324, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.013641474768519402, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.01133840624243021, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04905006289482117, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04905006289482117, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.53.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.21938776969909668, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.20608089864253998, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.20186860859394073, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.18305620551109314, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10342612117528915, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09890930354595184, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.1150471419095993, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.10652147978544235, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10502142459154129, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09341604262590408, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08782470226287842, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.058531504124403, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0509377121925354, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04953215271234512, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04919525980949402, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.029240179806947708, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.025356775149703026, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.025260305032134056, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.023295998573303223, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.022932210937142372, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015255739912390709, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01512179896235466, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.014800448901951313, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009736458770930767, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0509377121925354, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0509377121925354, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.53.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.2456027865409851, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2307310402393341, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.2260766178369522, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.20502851903438568, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.11579710245132446, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.11075735092163086, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.1286720186471939, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11920826882123947, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11756435036659241, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.10459994524717331, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09836316853761673, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06552353501319885, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.056982770562171936, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05542831867933273, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05505632236599922, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.032691702246665955, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02826574072241783, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.028158340603113174, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.025954294949769974, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02554433047771454, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.016955485567450523, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.016694238409399986, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.01644408144056797, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.010528646409511566, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.032691702246665955, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.032691702246665955, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.53.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.24795636534690857, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.2240699976682663, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.214796781539917, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.18897031247615814, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.11372587829828262, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.10477681457996368, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.13130441308021545, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.12138543277978897, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.11650749295949936, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.09811314940452576, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.09130924195051193, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06664445996284485, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.057929106056690216, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.0544772632420063, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.0536433644592762, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.0334327332675457, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.02821956016123295, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.027908647432923317, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.02513626217842102, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.024591930210590363, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.017862576991319656, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.017680343240499496, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.016781536862254143, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.011874779127538204, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.0334327332675457, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.0334327332675457, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.54.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.1582973599433899, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.1482415646314621, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.14482420682907104, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.13100796937942505, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.0745309367775917, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.07100284844636917, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08390291035175323, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.07731412351131439, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.07575163245201111, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06707272678613663, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06320658326148987, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.042712245136499405, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.03698248043656349, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03571823239326477, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.035425059497356415, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.02134637162089348, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.018246956169605255, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.018146105110645294, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.016702493652701378, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.01639888435602188, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.01107863150537014, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.010868157260119915, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.01066130492836237, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.0068916198797523975, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.042712245136499405, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.042712245136499405, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.54.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.1412922888994217, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.1321900188922882, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.1287853866815567, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.11632903665304184, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.06644513458013535, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.06307763606309891, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.07545022666454315, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.06947861611843109, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.06755697727203369, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.059734515845775604, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.056178148835897446, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03827947378158569, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.03320043906569481, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.03182883560657501, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.031503185629844666, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.019126173108816147, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.016196900978684425, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.01606808975338936, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.01480557955801487, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.014488283544778824, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.009886931627988815, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.00960028637200594, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.009404429234564304, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.005922784563153982, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03827947378158569, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03827947378158569, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.54.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.2719189524650574, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.2542433440685272, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.24870125949382782, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.224370077252388, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.12789513170719147, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.12181568145751953, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.14265784621238708, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.13200803101062775, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.12993302941322327, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.11483950912952423, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.10735887289047241, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.0724053606390953, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.06304168701171875, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.06110793352127075, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.06066097691655159, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03613773733377457, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.030909352004528046, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.0307764895260334, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.028163332492113113, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.027671046555042267, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01848793216049671, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.017909526824951172, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.017817677929997444, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.010665171779692173, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03613773733377457, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03613773733377457, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.54.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.2075323462486267, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.17444930970668793, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.16142676770687103, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.13882608711719513, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.09325087070465088, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.08041691780090332, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.11396732926368713, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.10491844266653061, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.09841084480285645, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.07599088549613953, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.0688752606511116, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05831080302596092, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.05034051090478897, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04509301483631134, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.043791066855192184, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.029182549566030502, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.023902451619505882, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.023481715470552444, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02057160809636116, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.019539156928658485, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.015688173472881317, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.016078095883131027, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.014023158699274063, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.011409632861614227, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.05034051090478897, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.05034051090478897, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.54.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.22105230391025543, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.20758219063282013, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.2033328264951706, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.18423911929130554, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10422077029943466, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09964533150196075, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11593534052371979, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.10737675428390503, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.1058468222618103, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09409136325120926, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.088420070707798, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05900609493255615, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0513358935713768, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.049907781183719635, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04956681653857231, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.029455330222845078, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.025522541254758835, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02542446181178093, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.023430008441209793, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.023059772327542305, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015319829806685448, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015173820778727531, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.014851164072751999, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009705514647066593, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0513358935713768, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0513358935713768, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.54.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.2394479662179947, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.22484789788722992, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.22024506330490112, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19965453445911407, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.11293251067399979, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10797200351953506, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12564322352409363, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11627687513828278, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11464530974626541, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.10196638107299805, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.09583825618028641, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06396803259849548, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05560152977705002, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.054067663848400116, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.053706854581832886, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03199426084756851, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.027595382183790207, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.027492910623550415, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.025328950956463814, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02493230625987053, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01672951877117157, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01633315160870552, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.016235798597335815, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.01034519076347351, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03199426084756851, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.03199426084756851, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.54.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.25457870960235596, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.23038995265960693, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.22113719582557678, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.19451946020126343, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.11679667979478836, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.10787078738212585, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.1354064643383026, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.12442214787006378, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.11958131939172745, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.10091398656368256, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.09394434094429016, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06857562065124512, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.059354282915592194, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05597629025578499, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.05516264587640762, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03435991331934929, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.02905510924756527, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.02875911444425583, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.025933928787708282, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.025409260764718056, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.018354972824454308, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01826781965792179, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.017258208245038986, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.012414803728461266, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03435991331934929, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03435991331934929, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.55.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.1542617231607437, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.14442114531993866, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1410551220178604, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.12750929594039917, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07266499847173691, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06922148168087006, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08186495304107666, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.0754234567284584, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.07385102659463882, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06537508219480515, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.061555005609989166, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.041691869497299194, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.03609830141067505, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.034844812005758286, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.034545253962278366, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.02086474746465683, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.017837636172771454, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.017738468945026398, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.016342688351869583, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.0160417128354311, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.010878440923988819, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.01069676037877798, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.010466126725077629, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.006865868344902992, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.041691869497299194, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.041691869497299194, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.55.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.14140979945659637, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.13231141865253448, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.12877166271209717, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.11638876050710678, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.06651432812213898, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.0631454586982727, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.07567042112350464, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.06967321038246155, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.06763869524002075, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.05978256091475487, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.056247275322675705, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.038400668650865555, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.03330341726541519, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.031870000064373016, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.0315164253115654, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.019199315458536148, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.016206642612814903, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.0160745307803154, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.014809607528150082, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.014490434899926186, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.009918855503201485, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.009618069976568222, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.009423406794667244, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.005932685919106007, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.038400668650865555, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.038400668650865555, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.55.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.2746703326702118, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.2570076584815979, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.2513786554336548, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.22662486135959625, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.12911377847194672, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.12318169325590134, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.1441357433795929, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.1334117352962494, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.1312563270330429, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.1159292533993721, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.10857630521059036, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.07314129173755646, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.06367161870002747, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.06175138056278229, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.061288982629776, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.0364672988653183, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.031192604452371597, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.0310590248554945, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.028427504003047943, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.027923664078116417, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.018645381554961205, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.018030662089586258, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.01798442006111145, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.010690109804272652, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.0364672988653183, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.0364672988653183, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.55.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.2210783213376999, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.17849718034267426, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.16246232390403748, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.13962797820568085, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.09896168112754822, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.08249274641275406, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.1211216002702713, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.11148010194301605, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.10510122776031494, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.07745207846164703, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.07059410214424133, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.06206962466239929, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.0535721480846405, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04790735989809036, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04649113491177559, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.03109673038125038, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.025483250617980957, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.025065666064620018, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02138374373316765, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.020222501829266548, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.01677355170249939, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.017255475744605064, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.014994870871305466, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.012343951500952244, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04790735989809036, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04790735989809036, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.55.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.21845057606697083, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.20501671731472015, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.20078344643115997, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.18196646869182587, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10309635102748871, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09855091571807861, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11469650268554688, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.10624825209379196, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.1047147884964943, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09302263706922531, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08740859478712082, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05843355134129524, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0508415512740612, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.049416232854127884, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.0490712895989418, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02917574532330036, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.025341743603348732, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02524627186357975, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.023278824985027313, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.0229099802672863, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015229295007884502, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015199919231235981, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.014758946374058723, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009888800792396069, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0508415512740612, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0508415512740612, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.55.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.23121240735054016, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2170465588569641, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.21258759498596191, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.19269071519374847, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10928046703338623, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.10448135435581207, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.12174306809902191, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.11255819350481033, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.11097614467144012, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09865137189626694, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.0927981361746788, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.06231687217950821, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05398683249950409, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.052498623728752136, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.052145954221487045, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.031158845871686935, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.027143921703100204, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.027046801522374153, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.024990927428007126, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.024614963680505753, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.016593802720308304, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01660608872771263, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.016114061698317528, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.011256527155637741, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.052145954221487045, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.052145954221487045, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.55.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.2481527477502823, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.22386640310287476, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.21416710317134857, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.18808315694332123, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.11386456340551376, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.10473117977380753, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.13328784704208374, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.1221395805478096, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.11665985733270645, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.09815232455730438, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.09131373465061188, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06744794547557831, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.058554310351610184, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05491626262664795, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.054028481245040894, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03395251929759979, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.029122915118932724, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.02878504991531372, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.026121579110622406, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.02556217834353447, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.018618132919073105, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.019162047654390335, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.017480919137597084, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.013941699638962746, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03395251929759979, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03395251929759979, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.56.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.15584540367126465, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.145879864692688, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.14240798354148865, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1287219226360321, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.07342600077390671, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06988611072301865, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08272159099578857, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.07626504451036453, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.07462315261363983, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06601867079734802, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06216743588447571, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.042101819068193436, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.03648063540458679, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.0351848378777504, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03488166630268097, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.021045543253421783, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.01800375245511532, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.01789996586740017, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.01647965796291828, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.016173971816897392, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.010946864262223244, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.010778584517538548, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.01051967404782772, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.006895921193063259, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.042101819068193436, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.042101819068193436, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.56.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.14140784740447998, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.1323249638080597, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.1288815587759018, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.11634590476751328, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.0664680227637291, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.06309045851230621, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.0755026713013649, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.06958172470331192, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.06757444143295288, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.05974289029836655, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.056167297065258026, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.038296524435281754, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.033249322324991226, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.03184017539024353, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.03148466348648071, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.01913943514227867, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.01619093120098114, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.016061659902334213, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.014795941300690174, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.014478227123618126, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.00988798774778843, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.009606564417481422, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.009396953508257866, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.005926197860389948, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.038296524435281754, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.038296524435281754, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.56.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.26686912775039673, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.24946288764476776, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.2440321296453476, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.2199632227420807, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.1252467930316925, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.11938025802373886, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.13983456790447235, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.12932690978050232, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.12732696533203125, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.11243156343698502, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.10522519797086716, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.07089278101921082, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.06174924969673157, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05989469587802887, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.059440623968839645, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.035392098128795624, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.030295832082629204, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.030168354511260986, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.027603110298514366, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.02712417207658291, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01810535043478012, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.017555873841047287, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.017460083588957787, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.010486449114978313, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.035392098128795624, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.035392098128795624, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.56.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.20833566784858704, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.169910728931427, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.15711575746536255, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1269378662109375, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.09358245879411697, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.08047696202993393, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.1117120161652565, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.10229963809251785, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.09787413477897644, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06925532966852188, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06367355585098267, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.057284869253635406, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04950591176748276, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04565756395459175, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04468872398138046, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.028826400637626648, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.024915803223848343, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.02464558556675911, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02051706053316593, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.019696742296218872, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.01602528989315033, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.01737947016954422, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.014861739240586758, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.01345933135598898, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04950591176748276, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04950591176748276, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.56.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.21283270418643951, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.19978618621826172, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.19562773406505585, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.17732104659080505, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.1011907309293747, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09676158428192139, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11261308938264847, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.10427050292491913, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.1027582436800003, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09139052033424377, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08606046438217163, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05811034142971039, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05076912045478821, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.049374163150787354, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04904605448246002, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.029239213094115257, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.026830162853002548, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02674313634634018, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.024993211030960083, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.024666188284754753, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.016304077580571175, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.018157873302698135, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.0158830638974905, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.014262434095144272, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05076912045478821, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05076912045478821, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.56.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.22005170583724976, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2066498100757599, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.202341690659523, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.18338674306869507, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10453617572784424, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09997472912073135, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11617687344551086, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.10769736766815186, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10615977644920349, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09442713856697083, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08889977633953094, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.059773851186037064, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05226467549800873, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.05084586143493652, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.05051140487194061, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.029979603365063667, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.027358679100871086, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.027268553152680397, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.025431519374251366, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.025090035051107407, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.01644326187670231, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.018205009400844574, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.016004521399736404, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.014005196280777454, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05226467549800873, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05226467549800873, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.56.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.22618134319782257, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.20422717928886414, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.1947210431098938, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.1710270196199417, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.10416852682828903, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.0954374447464943, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.12340421974658966, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.11277596652507782, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.10674960911273956, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08991435170173645, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.08391579985618591, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06300421804189682, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05443389713764191, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05058705806732178, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04967183619737625, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03183821216225624, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.027440030127763748, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.027074765413999557, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.024809259921312332, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.024238811805844307, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.017930418252944946, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01886189915239811, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.016788728535175323, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.014453103765845299, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05058705806732178, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.05058705806732178, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.57.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.13978880643844604, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.13075284659862518, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1275971233844757, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.11523865908384323, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.06580184400081635, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.0625847578048706, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.0741579458117485, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.06845879554748535, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.06688042730093002, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.0591106042265892, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05556093901395798, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03772620111703873, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.032735854387283325, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03153017908334732, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03124457225203514, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.01885175332427025, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.01613459549844265, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.016037287190556526, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.014761505648493767, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.014479700475931168, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.009820828214287758, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.009677054360508919, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.009428136982023716, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.006195887457579374, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03772620111703873, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03772620111703873, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.57.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.1349632441997528, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.1262252777814865, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.12297925353050232, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.1109417974948883, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.06332322955131531, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.06010536104440689, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.07165800034999847, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.06621560454368591, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.06435144692659378, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.05686089023947716, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.053404826670885086, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03634832054376602, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.03164002299308777, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.030323678627610207, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.03000292181968689, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.018170321360230446, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.015449892729520798, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.015329255722463131, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.01411267276853323, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.01381277572363615, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.009407431818544865, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.009195992723107338, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.008976557292044163, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.005718037020415068, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03634832054376602, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03634832054376602, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.57.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.24796417355537415, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.2317860722541809, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.22666312754154205, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.20420515537261963, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.11635395884513855, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.11083384603261948, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.13007892668247223, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.1201312467455864, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.11824916303157806, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.10436272621154785, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.09777005016803741, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.06599671393632889, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.057386331260204315, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05561944842338562, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.055217448621988297, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.032922688871622086, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.028140783309936523, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.028006060048937798, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.025626162067055702, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.025174055248498917, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01686037704348564, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.016323482617735863, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.016244566068053246, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.009783011861145496, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.032922688871622086, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.032922688871622086, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.57.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.18567851185798645, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.16349124908447266, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.15624350309371948, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.12976805865764618, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.08475799113512039, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.07745195180177689, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.09927552938461304, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.09027941524982452, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.08775324374437332, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06977424770593643, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.06318183988332748, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05137244611978531, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.04487082362174988, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.04247687757015228, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.04190130531787872, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.026934519410133362, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.02482008934020996, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.024684710428118706, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.02233819290995598, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.02185451053082943, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.016765236854553223, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.018785173073410988, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.0161416195333004, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.016085956245660782, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05137244611978531, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.05137244611978531, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.57.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.2143906056880951, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.20119962096214294, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.19698624312877655, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.17842645943164825, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10125702619552612, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09674513339996338, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11279674619436264, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.1044074147939682, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10284553468227386, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09132186323404312, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08578827232122421, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05746665224432945, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04999110847711563, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.048554010689258575, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04821833223104477, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.02870318479835987, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.024946395307779312, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02484920807182789, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.022915532812476158, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.022551098838448524, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015006768517196178, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015029413625597954, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.014541019685566425, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.009865882806479931, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04999110847711563, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.04999110847711563, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.57.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.2197365015745163, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.20623932778835297, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.20191432535648346, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.18295566737651825, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10401387512683868, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09942692518234253, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11603359878063202, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.10721053183078766, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.105657197535038, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09385795146226883, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08833085000514984, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.059477798640728, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.051552318036556244, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.050098735839128494, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.049753203988075256, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.029763085767626762, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02615569904446602, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02605762518942356, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.024132071062922478, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.02377128414809704, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.016013305634260178, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.01637415401637554, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.015555565245449543, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.011532008647918701, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.051552318036556244, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.051552318036556244, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.57.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.21889865398406982, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.1965729147195816, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.1862686425447464, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.163534477353096, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.10058010369539261, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.09162552654743195, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.12101797759532928, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.11037611961364746, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.10323328524827957, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.08668693900108337, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.08125481754541397, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.06202813237905502, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.05385700985789299, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.04958668351173401, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.04856126382946968, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.03185084089636803, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.028059378266334534, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.027652960270643234, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.025643235072493553, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.025044776499271393, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.018756164237856865, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.02058224380016327, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.017543165013194084, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.016859719529747963, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.04958668351173401, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.04958668351173401, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.58.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.13556340336799622, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.12673088908195496, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1235247477889061, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.1114630252122879, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.06385146081447601, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.060652993619441986, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.07219170033931732, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.06659353524446487, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.06492263823747635, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.0573015995323658, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.05388369411230087, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03674578666687012, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.03186003118753433, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.030612172558903694, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.030317356809973717, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.01836869865655899, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.015676992014050484, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.015572297386825085, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.014335455372929573, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.01404742244631052, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.009561858139932156, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.009437037631869316, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.009153511375188828, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.006056827027350664, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03674578666687012, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03674578666687012, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.58.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.12173603475093842, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.11369606852531433, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.11065194010734558, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.09969865530729294, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.05705803260207176, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.0540771447122097, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.06496483832597733, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.059966471046209335, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.05804542824625969, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.0512227937579155, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.04810330271720886, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03294231370091438, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.02863992005586624, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.02734760195016861, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.02702724188566208, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.016460256651043892, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.013936278410255909, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.013816095888614655, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.01272564847022295, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.012435093522071838, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.008510231040418148, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.008330494165420532, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.008082499727606773, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.005184909328818321, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.0512227937579155, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.0512227937579155, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.58.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.22920234501361847, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.2140664905309677, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.2090843766927719, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.18833990395069122, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.10742656141519547, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.1022026464343071, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.12033569067716599, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11116643249988556, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.10919544845819473, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09628787636756897, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.09005668014287949, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.06107533723115921, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05308784916996956, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05136370658874512, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.05095236375927925, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.030473601073026657, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.02599003165960312, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.025860309600830078, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.023657361045479774, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.023214930668473244, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.01558145135641098, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.015110469423234463, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.014979434199631214, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.009060199372470379, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05136370658874512, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05136370658874512, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.58.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.14946530759334564, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.1282193958759308, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.1209205612540245, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.10227011144161224, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.06825551390647888, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06104981154203415, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.08030121773481369, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.0734017938375473, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.07102374732494354, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.05534137412905693, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.0503205731511116, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.042172983288764954, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.037198979407548904, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.035039935261011124, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03449418768286705, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.022727062925696373, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.021414775401353836, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.02128639444708824, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.019399898126721382, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.019020553678274155, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.014817196875810623, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.017033826559782028, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.01426685880869627, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.01512773148715496, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.0503205731511116, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, "best_option": { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.0503205731511116, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } } }, { "key": "model.layers.58.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.20181088149547577, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.1894017904996872, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.18537022173404694, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.16801151633262634, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.09575583785772324, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09150483459234238, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.1068086251616478, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.09880008548498154, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.0972566083073616, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.08644784986972809, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08134903013706207, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05492778494954109, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.047779060900211334, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.046411626040935516, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.0460890494287014, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.027535535395145416, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02471008338034153, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.02462279424071312, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.022903114557266235, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.022579841315746307, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015050778165459633, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.016125112771987915, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.014617307111620903, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.012086628004908562, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.047779060900211334, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.047779060900211334, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.58.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.21483801305294037, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.20166467130184174, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.1973605901002884, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.17890794575214386, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10217747092247009, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.0976666808128357, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11388072371482849, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.10537593066692352, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10377096384763718, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09229040890932083, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08689617365598679, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05870421230792999, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05131581798195839, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04989153891801834, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.049549706280231476, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.0295358095318079, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.02716067247092724, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.027068955823779106, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.025302398949861526, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.024963712319731712, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.016460871323943138, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.018447352573275566, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.016015954315662384, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.014531361870467663, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05131581798195839, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.05131581798195839, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.58.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.16320295631885529, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.14470934867858887, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.1350320726633072, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.11779036372900009, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.07527003437280655, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.06712529808282852, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.09437891840934753, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.08516872674226761, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.0776212215423584, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.06440744549036026, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.06045534461736679, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.0483444407582283, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.04275868088006973, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.03844829276204109, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.037366803735494614, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.02540929801762104, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.023500870913267136, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.023081082850694656, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.021801630035042763, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.021228058263659477, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.015602726489305496, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.018898917362093925, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.014396138489246368, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.01654796302318573, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.0483444407582283, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.0483444407582283, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.59.self_attn.q_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.14579997956752777, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.13636146485805511, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.13293009996414185, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.11994372308254242, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.06862089037895203, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.06517332047224045, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.07726747542619705, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.07161036133766174, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.0697631686925888, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.06161555275321007, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.057737890630960464, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03926268592476845, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.03423372656106949, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.03287804126739502, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.03255203738808632, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.01960478536784649, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.01679433323442936, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.016677873209118843, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.015346190892159939, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.015033494681119919, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.010131524875760078, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.010041417554020882, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.009683755226433277, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.006349786184728146, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03926268592476845, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.03926268592476845, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.59.self_attn.k_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.11596282571554184, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.10832476615905762, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.10529380291700363, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.0948592871427536, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.05437899008393288, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.0514906570315361, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.06187757849693298, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.05724875256419182, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.055300142616033554, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.048758357763290405, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.04571107029914856, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.03138372302055359, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.02735384926199913, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.026068948209285736, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.025762835517525673, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.015690777450799942, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.01330577116459608, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.013185590505599976, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.012137584388256073, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.011859234422445297, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.008138071745634079, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.00798459630459547, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.007713454309850931, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.005010765977203846, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.048758357763290405, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.048758357763290405, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.59.self_attn.v_proj", "numel": 7340032, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1800362723214284, "total_bits": 16001535.999999998, "err": 0.23369288444519043, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.37646484375, "total_bits": 17443328.0, "err": 0.21828676760196686, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.62646484375, "total_bits": 19278336.0, "err": 0.21307578682899475, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.733607700892857, "total_bits": 20064768.0, "err": 0.1917898803949356, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.233607700892857, "total_bits": 23734784.0, "err": 0.10941056907176971, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7157505580357144, "total_bits": 27273728.0, "err": 0.10396426171064377, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0316162109375, "total_bits": 22252160.0, "err": 0.12326505780220032, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.12646484375, "total_bits": 22948352.0, "err": 0.11367419362068176, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1800362723214284, "total_bits": 23341568.0, "err": 0.11124923825263977, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.519321986607143, "total_bits": 25831936.0, "err": 0.09821998327970505, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.670375279017857, "total_bits": 26940672.0, "err": 0.09197083115577698, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.0316162109375, "total_bits": 29592192.0, "err": 0.06256888061761856, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.12646484375, "total_bits": 30288384.0, "err": 0.05437030643224716, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05232866853475571, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.340750558035714, "total_bits": 31861248.0, "err": 0.051826927810907364, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.0316162109375, "total_bits": 36932224.0, "err": 0.03126440569758415, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.233607700892857, "total_bits": 38414848.0, "err": 0.026525337249040604, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.340750558035714, "total_bits": 39201280.0, "err": 0.026354802772402763, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.519321986607143, "total_bits": 40512000.0, "err": 0.024195292964577675, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.75146484375, "total_bits": 42215936.0, "err": 0.023705674335360527, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.0316162109375, "total_bits": 44272256.0, "err": 0.016038766130805016, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.12646484375, "total_bits": 44968448.0, "err": 0.015568389557301998, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245901925223214, "total_bits": 45845120.0, "err": 0.01529218815267086, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.12646484375, "total_bits": 59648512.0, "err": 0.00943443551659584, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05232866853475571, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.233607700892857, "total_bits": 31074815.999999996, "err": 0.05232866853475571, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.59.self_attn.o_proj", "numel": 51380224, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1787806919642856, "total_bits": 111946240.0, "err": 0.07122552394866943, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 122038784.0, "err": 0.05626876652240753, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.05197838693857193, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7323521205357144, "total_bits": 140388864.0, "err": 0.04514689743518829, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2323521205357144, "total_bits": 166078976.0, "err": 0.03143075853586197, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7144949776785716, "total_bits": 190851584.0, "err": 0.026386301964521408, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 155748992.0, "err": 0.03749245032668114, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 160573952.0, "err": 0.03434936702251434, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1787806919642856, "total_bits": 163326464.0, "err": 0.03319566324353218, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.51806640625, "total_bits": 180759040.0, "err": 0.024372918531298637, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6697474888392856, "total_bits": 188552448.0, "err": 0.02279876545071602, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 207129216.0, "err": 0.01953965425491333, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 211954175.99999997, "err": 0.018043944612145424, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.232352120535714, "total_bits": 217459200.0, "err": 0.016813818365335464, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339494977678571, "total_bits": 222964224.0, "err": 0.016537949442863464, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 258509440.0, "err": 0.011045409366488457, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.232352120535714, "total_bits": 268839424.0, "err": 0.011254681274294853, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339494977678571, "total_bits": 274344448.0, "err": 0.011205130256712437, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.51806640625, "total_bits": 283519488.0, "err": 0.010337557643651962, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.750209263392857, "total_bits": 295447040.0, "err": 0.01017865352332592, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 309889664.0, "err": 0.007826343178749084, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 314714624.0, "err": 0.009664434939622879, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.245588030133929, "total_bits": 320899712.0, "err": 0.007551759015768766, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 417475072.0, "err": 0.008979739621281624, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.05197838693857193, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 134883840.0, "err": 0.05197838693857193, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.59.mlp.gate_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.16860505938529968, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.1581663340330124, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.1547240912914276, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.1402004361152649, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.0796593502163887, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.07604105770587921, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.08899170905351639, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.08232049643993378, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.08092343807220459, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.07183768600225449, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.06753049045801163, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04539581760764122, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.03945617377758026, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.038241028785705566, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.037954408675432205, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.022699806839227676, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.01971091330051422, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.0196240171790123, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.018121879547834396, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.01782248727977276, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.011918535456061363, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.011986693367362022, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.011514573357999325, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.00798951368778944, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04539581760764122, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.04539581760764122, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.59.mlp.up_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.1786446707589286, "total_bits": 319826432.0, "err": 0.21483948826789856, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.3750732421875, "total_bits": 348662272.0, "err": 0.2015724629163742, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.6250732421875, "total_bits": 385362432.0, "err": 0.19724398851394653, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.732216099330357, "total_bits": 401091072.0, "err": 0.17873738706111908, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.232216099330357, "total_bits": 474491392.0, "err": 0.10153758525848389, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7143589564732142, "total_bits": 545270272.0, "err": 0.09696734696626663, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.031268310546875, "total_bits": 444992128.0, "err": 0.11336521059274673, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.1250732421875, "total_bits": 458762752.0, "err": 0.10485164076089859, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.1786446707589286, "total_bits": 466627072.0, "err": 0.10312509536743164, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.517930385044643, "total_bits": 516434432.0, "err": 0.09158992767333984, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.669679478236607, "total_bits": 538711296.0, "err": 0.08607399463653564, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031268310546875, "total_bits": 591792768.0, "err": 0.05785679817199707, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0502433106303215, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.2322160993303575, "total_bits": 621292032.0, "err": 0.04873024672269821, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.339358956473214, "total_bits": 637020672.0, "err": 0.04837656766176224, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031268310546875, "total_bits": 738593408.0, "err": 0.028923194855451584, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.2322160993303575, "total_bits": 768092672.0, "err": 0.025144120678305626, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.339358956473214, "total_bits": 783821312.0, "err": 0.025036239996552467, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.517930385044643, "total_bits": 810035712.0, "err": 0.02312263660132885, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.7500732421875, "total_bits": 844114432.0, "err": 0.022750934585928917, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031268310546875, "total_bits": 885394048.0, "err": 0.015204587951302528, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.1250732421875, "total_bits": 899164672.0, "err": 0.015321299433708191, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.24555402483259, "total_bits": 916851328.0, "err": 0.01469634473323822, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.1250732421875, "total_bits": 1192765952.0, "err": 0.010263961739838123, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0502433106303215, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b 32g s4", "bpw": 4.1250732421875, "total_bits": 605563392.0, "err": 0.0502433106303215, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.59.mlp.down_proj", "numel": 146800640, "options": [ { "desc": "0.05:3b/0.95:2b 32g s4", "bpw": 2.175209263392857, "total_bits": 319322112.0, "err": 0.08888993412256241, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b/0.75:2b 32g s4", "bpw": 2.375209263392857, "total_bits": 348682240.0, "err": 0.07852321863174438, "qparams": { "group_size": 32, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b/0.75:2b 32g s4", "bpw": 2.625209263392857, "total_bits": 385382400.0, "err": 0.06843403726816177, "qparams": { "group_size": 32, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", "bpw": 2.7252092633928573, "total_bits": 400062464.0, "err": 0.06023788824677467, "qparams": { "group_size": 32, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b/0.9:3b 32g s4", "bpw": 3.2252092633928573, "total_bits": 473462784.0, "err": 0.041798971593379974, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b/0.8:3b 32g s4", "bpw": 3.7252092633928573, "total_bits": 546863104.0, "err": 0.034714940935373306, "qparams": { "group_size": 32, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b 128g s4", "bpw": 3.0313023158482144, "total_bits": 444997120.0, "err": 0.06018096208572388, "qparams": { "group_size": 128, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b 32g s4", "bpw": 3.125209263392857, "total_bits": 458782720.0, "err": 0.053877782076597214, "qparams": { "group_size": 32, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.04336691275238991, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b/0.6:3b 32g s4", "bpw": 3.525209263392857, "total_bits": 517502976.0, "err": 0.036230623722076416, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b/0.4:3b 64g s4", "bpw": 3.6626046316964285, "total_bits": 537672704.0, "err": 0.03522913530468941, "qparams": { "group_size": 64, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b 128g s4", "bpw": 4.031302315848214, "total_bits": 591797760.0, "err": 0.03164604678750038, "qparams": { "group_size": 128, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b 32g s4", "bpw": 4.125209263392857, "total_bits": 605583360.0, "err": 0.027504632249474525, "qparams": { "group_size": 32, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b/0.9:4b 32g s4", "bpw": 4.225209263392857, "total_bits": 620263424.0, "err": 0.02229609712958336, "qparams": { "group_size": 32, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:4b 32g s4", "bpw": 4.325209263392857, "total_bits": 634943488.0, "err": 0.020949240773916245, "qparams": { "group_size": 32, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b 128g s4", "bpw": 5.031302315848214, "total_bits": 738598400.0, "err": 0.017241060733795166, "qparams": { "group_size": 128, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b/0.9:5b 32g s4", "bpw": 5.225209263392857, "total_bits": 767064064.0, "err": 0.014867127873003483, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", "bpw": 5.325209263392857, "total_bits": 781744128.0, "err": 0.01431587990373373, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b/0.6:5b 32g s4", "bpw": 5.525209263392857, "total_bits": 811104256.0, "err": 0.014103515073657036, "qparams": { "group_size": 32, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", "bpw": 5.725209263392857, "total_bits": 840464384.0, "err": 0.013444996438920498, "qparams": { "group_size": 32, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b 128g s4", "bpw": 6.031302315848214, "total_bits": 885399040.0, "err": 0.011524902656674385, "qparams": { "group_size": 128, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b 32g s4", "bpw": 6.125209263392857, "total_bits": 899184640.0, "err": 0.01305307075381279, "qparams": { "group_size": 32, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b/0.9:6b 128g s4", "bpw": 6.231302315848215, "total_bits": 914759168.0, "err": 0.010246808640658855, "qparams": { "group_size": 128, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b 32g s4", "bpw": 8.125209263392858, "total_bits": 1192785920.0, "err": 0.011706136167049408, "qparams": { "group_size": 32, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.04336691275238991, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:4b/0.95:3b 32g s4", "bpw": 3.175209263392857, "total_bits": 466122752.0, "err": 0.04336691275238991, "qparams": { "group_size": 32, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } } ], "last_module_idx": 122, "base_perplexity": 6.886700486867382 }