diff --git "a/measurement.json" "b/measurement.json" new file mode 100644--- /dev/null +++ "b/measurement.json" @@ -0,0 +1,110886 @@ +{ + "measurement": [ + { + "key": "model.layers.0.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.011670739389955997, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.009957468137145042, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.004948198329657316, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.005195552483201027, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.005195497535169125, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0020751201082021, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.011283217929303646, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.009872549213469028, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.005466081202030182, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.004798813257366419, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.00501887034624815, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.005244550295174122, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.0047972435131669044, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.002839160617440939, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.002169959479942918, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.002782008610665798, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.001970518846064806, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.0016996708000078797, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.0019208249868825078, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.0016355268890038133, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.0018604322103783488, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0019206803990527987, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0014941903064027429, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0016173329204320908, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.011816456913948059, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.010123023763298988, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.004978278186172247, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.005198793485760689, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.005198271945118904, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.001951528713107109, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.01182581391185522, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.01001676358282566, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.005464312620460987, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.004773659631609917, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.004998459946364164, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.00522997323423624, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.004770626313984394, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.0027793010231107473, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.0020139773841947317, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.002756385365501046, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.001775973942130804, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.0014696817379444838, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0017178601119667292, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.0013895106967538595, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0017314222641289234, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0017176703549921513, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0013289498165249825, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0013672629138454795, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.11751881241798401, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0696994885802269, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.04266786575317383, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.048666104674339294, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04863111674785614, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.021470919251441956, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.07781097292900085, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06416983902454376, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.05529089644551277, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03126363083720207, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03729119524359703, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.042132508009672165, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.03096100129187107, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.023608285933732986, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.021563803777098656, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.02176082506775856, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.012574436143040657, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.01163830142468214, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009348759427666664, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.007729807402938604, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.011120607145130634, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.009299270808696747, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.00639934791252017, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.006114166229963303, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11338719725608826, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.07339717447757721, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.048608653247356415, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.04909929260611534, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.04693521559238434, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.023722399026155472, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07278618216514587, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.0647713765501976, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05365147441625595, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.032365575432777405, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.034755486994981766, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.037216298282146454, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03133269026875496, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.023502014577388763, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02128014527261257, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.018841154873371124, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.01372245978564024, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012827781960368156, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011402958072721958, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.010020601563155651, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01053251139819622, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.011262582615017891, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007766671013087034, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.008868837729096413, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.11581765115261078, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.10730424523353577, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.10474319010972977, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.0950913354754448, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.052028052508831024, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.0495615154504776, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.05808285251259804, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.053625304251909256, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.05280172824859619, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.04718589410185814, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.04496973752975464, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.029515957459807396, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.025734538212418556, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.025079278275370598, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.02492346614599228, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.01485193707048893, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.013531137257814407, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.013467305339872837, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01265348494052887, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01256584282964468, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.008290493860840797, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.009058740921318531, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.00804641842842102, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007031166460365057, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.14297965168952942, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.13432621955871582, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1317615807056427, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.11968167126178741, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.0647091343998909, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.0621945746243, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07166222482919693, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0662301555275917, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.06545053422451019, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.05911078304052353, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.05621596798300743, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03617820143699646, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03138483315706253, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03075386956334114, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.030605074018239975, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.018039435148239136, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.0157883632928133, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.015717625617980957, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.014694664627313614, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.014605001546442509, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.009485723450779915, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.0094750439748168, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.009225964546203613, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.0062788971699774265, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.0832681804895401, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.07061649858951569, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.06383025646209717, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.05599171295762062, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.03695223852992058, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.03140588849782944, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.050047047436237335, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.043427422642707825, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.038650430738925934, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.030244367197155952, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.0291561521589756, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.024635467678308487, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.020865021273493767, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.018259787932038307, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.017600229009985924, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.012738027609884739, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.010424073785543442, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.010173478163778782, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.009283546358346939, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.008905410766601562, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.007701719645410776, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.007870122790336609, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.006953845266252756, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.006400358863174915, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.020337766036391258, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.013437071815133095, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.007553038187325001, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.008053069934248924, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.007841002196073532, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0033497833646833897, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.014094826765358448, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.012734318152070045, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.009515172801911831, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.006190047599375248, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0066688042134046555, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0071413638070225716, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.0060754637233912945, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.0039461590349674225, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.0032562795095145702, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.003592640394344926, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.002295122481882572, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.0020210270304232836, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.0020236033014953136, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.0016192288603633642, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.0019652782939374447, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0020062949042767286, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0012198977638036013, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0014759352197870612, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.01803521253168583, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.012109551578760147, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.00655521871522069, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.006999838165938854, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.006840675603598356, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.0027245741803199053, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.013165744952857494, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.011618942953646183, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.008349981158971786, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0056094396859407425, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.006087215151637793, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.006579631008207798, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.005528394598513842, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.0034256295766681433, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.002716770628467202, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.0032972453627735376, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.0019632126204669476, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.001677273423410952, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0017562041757628322, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.0013447469100356102, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0017608014168217778, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0017435196787118912, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0009998306632041931, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0012333464110270143, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.15009085834026337, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09750298410654068, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06927721947431564, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06647533923387527, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.06132911890745163, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.0348401740193367, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.09118407219648361, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.08229342103004456, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.07149013876914978, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04263865202665329, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.04403408244252205, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.04663306102156639, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.03932610899209976, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.029830263927578926, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.027155620977282524, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.02324359491467476, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.015870176255702972, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.014774204231798649, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.012330455705523491, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.010334989987313747, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.012081494554877281, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01175190880894661, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0080671152099967, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00768613163381815, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.16751715540885925, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1328437477350235, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.11805026233196259, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.09496404230594635, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.07509659975767136, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.06092074513435364, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.0981580838561058, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.08694922178983688, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.07952737808227539, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.05462257191538811, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.05169476941227913, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05047295615077019, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.042349256575107574, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.03709624335169792, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.0357840433716774, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.025593258440494537, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02084842510521412, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.020278245210647583, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.017295880243182182, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.016439231112599373, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014329345896840096, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015445340424776077, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.012382891960442066, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0122855668887496, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16540859639644623, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15567649900913239, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15288515388965607, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1381208747625351, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.0764315277338028, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07335873693227768, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0847589373588562, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07803970575332642, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07722149044275284, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06932719796895981, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06579916179180145, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.043389711529016495, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.037823550403118134, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.037113070487976074, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03696317598223686, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.021956508979201317, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.020300105214118958, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.020230887457728386, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01900196261703968, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01890893466770649, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012470955029129982, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013841989450156689, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.012214216403663158, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01103303860872984, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2065252959728241, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.19518250226974487, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.19182386994361877, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.17382144927978516, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.09589404612779617, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.09225866198539734, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10600227862596512, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09767124056816101, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.09678687900304794, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08723217993974686, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.08273196965456009, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05407468229532242, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.047011371701955795, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.046264760196208954, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04608680307865143, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.027081049978733063, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.024661749601364136, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.024581406265497208, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.023038599640130997, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.0229217316955328, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.014818296767771244, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01608123630285263, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.014518986456096172, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012180897407233715, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.021193038672208786, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.020654480904340744, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.007167818956077099, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.006689786445349455, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.005977427586913109, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0033328388817608356, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.022929152473807335, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.020167361944913864, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.00608021579682827, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.005655696149915457, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.005679433234035969, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.005578461568802595, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.005394613370299339, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.0028388460632413626, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.002602367429062724, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.0024323873221874237, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.002313758712261915, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.0010204126592725515, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.002273169346153736, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.0009184947120957077, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.0022731064818799496, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.0022395984269678593, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.0007285683532245457, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.0007759653381071985, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.05958463251590729, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.04571685940027237, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.03827888146042824, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.03417916223406792, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.02539876289665699, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.01882929727435112, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.035628825426101685, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.032453060150146484, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.0280386283993721, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.019857056438922882, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.01951531693339348, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.01806779019534588, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.015474589541554451, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.012318789958953857, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.011469321325421333, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.009024947881698608, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.006557795684784651, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.006184135563671589, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.005500740837305784, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.00492114806547761, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.004709456581622362, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.004729900509119034, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.003487455192953348, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.003189719282090664, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.06204456835985184, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.04558544605970383, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.03581951558589935, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.032733067870140076, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.02573496475815773, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.017412710934877396, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.038103461265563965, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.034684330224990845, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.029015183448791504, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.019804097712039948, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.019821908324956894, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.01921961084008217, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.01646776683628559, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.012552627362310886, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.011433395557105541, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.00961582362651825, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.006712240632623434, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.0062186685390770435, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00559394434094429, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.004827567376196384, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005009208805859089, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.005014446564018726, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.003487834706902504, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.003346336307004094, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.17497766017913818, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.14330244064331055, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.13085021078586578, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.11445695161819458, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.07809683680534363, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.0648961067199707, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.09777778387069702, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.08862165361642838, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.08312574028968811, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.061914704740047455, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.05868503078818321, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.04970449209213257, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04233896732330322, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.037562962621450424, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.03638176620006561, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.024866551160812378, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.0193713940680027, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.018870064988732338, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.016116440296173096, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.01529417373239994, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.012996037490665913, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.012448443099856377, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.010923544876277447, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.007938236929476261, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.16559898853302002, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.14357972145080566, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.13323292136192322, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.11507686972618103, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.07544463872909546, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.06578981131315231, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09533420950174332, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.08612871170043945, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.07890327274799347, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.0627659261226654, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.059093572199344635, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.049571048468351364, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04209405183792114, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.037304360419511795, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.03611291944980621, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.025283677503466606, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.020962385460734367, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.020412741228938103, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01870863512158394, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.017976637929677963, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014470119960606098, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015473341569304466, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.012774289585649967, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012357997708022594, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.20484933257102966, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.19219662249088287, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.18835757672786713, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.16949446499347687, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.0957895889878273, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.09141239523887634, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10640406608581543, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0980847030878067, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.0970003753900528, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08593076467514038, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.08103527128696442, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05434274300932884, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04704863950610161, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04604966938495636, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04581088200211525, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02720026671886444, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.024104049429297447, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02399771846830845, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.022163936868309975, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02202344313263893, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.014635738916695118, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015126324258744717, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01422029361128807, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010819977149367332, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.24420113861560822, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2294626533985138, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22506259381771088, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.20270250737667084, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11418241262435913, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10905171930789948, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12647439539432526, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11674360930919647, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.115565225481987, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10248961299657822, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09656399488449097, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06435512751340866, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05574679374694824, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05461842194199562, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.054347433149814606, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03212914615869522, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.028015220537781715, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02789633348584175, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025649983435869217, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.025477318093180656, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016885830089449883, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016740476712584496, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.016398906707763672, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010999356396496296, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.18372121453285217, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.16004574298858643, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.15102212131023407, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1312943398952484, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.08241275697946548, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.07352856546640396, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.09809686988592148, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.08956402540206909, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.08534353226423264, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.06857014447450638, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.06421580910682678, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.04984714835882187, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04286807030439377, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.0396939292550087, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.03891504928469658, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.025036467239260674, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02105957828462124, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.020774973556399345, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.018436888232827187, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.017935272306203842, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.013624804094433784, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01397622749209404, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.012533977627754211, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.010148194618523121, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.04615885764360428, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.03795035555958748, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.03326525166630745, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.029236551374197006, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.02034893073141575, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.016382046043872833, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.027696508914232254, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.025192830711603165, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.021810242906212807, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.016518838703632355, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.01596180722117424, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.014066657051444054, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.012027458287775517, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.009878759272396564, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.0093072559684515, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.00705003272742033, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.005265533924102783, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.0050039105117321014, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.004535478539764881, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.004160280805081129, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.003737351391464472, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.003744385438039899, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0029297834262251854, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.002578643849119544, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.046827927231788635, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.037378594279289246, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.03136586770415306, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.027667026966810226, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.020139649510383606, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.015269564464688301, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.029104793444275856, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.02634894847869873, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.021937133744359016, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.016216646879911423, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.015959395095705986, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.014717928133904934, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.012550966814160347, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.009745314717292786, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.008966336026787758, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.007362864911556244, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.00511831184849143, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.004761295858770609, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.004352252930402756, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.003831918351352215, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0038110287860035896, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0036821917165070772, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.002720810007303953, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.002353732008486986, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.18516051769256592, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1547824591398239, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.14287962019443512, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.12464328110218048, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.08319615572690964, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.070793516933918, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10219531506299973, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09366101026535034, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.0879063755273819, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.06701307743787766, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.06321793049573898, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05193913355469704, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.044717270880937576, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.03993258625268936, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.038728296756744385, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.025937367230653763, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.020490501075983047, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.019956139847636223, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.017215639352798462, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.01637973077595234, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.013345351442694664, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.012953512370586395, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0113448491320014, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00802308414131403, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.17580892145633698, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1548566222190857, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.14708638191223145, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.12473765015602112, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.08111688494682312, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.07274987548589706, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09597142040729523, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.08731602877378464, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.08340653777122498, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.06679867208003998, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0607164166867733, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.04930860549211502, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04238547757267952, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.039628732949495316, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.038959115743637085, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02489442005753517, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.021647699177265167, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02133612520992756, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01899595931172371, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.018589433282613754, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.013820505701005459, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015020088292658329, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.012813306413590908, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011711505241692066, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2156011164188385, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2019323855638504, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1978222131729126, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.17816829681396484, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10112711787223816, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.0963459387421608, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11237753182649612, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10366849601268768, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.10248103737831116, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09058474749326706, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.08549407124519348, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05736413598060608, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.049691230058670044, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04854872450232506, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04828350618481636, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.028684101998806, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02522171661257744, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.025107111781835556, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.023134522140026093, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02296665869653225, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015301648527383804, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015579079277813435, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.014828003011643887, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010811896994709969, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2540466785430908, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23834431171417236, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.23362207412719727, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.2106465846300125, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11926838010549545, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11380884796380997, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13249202072620392, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12220478802919388, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12084729969501495, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.1070094183087349, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.10113249719142914, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06758727878332138, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05845649912953377, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05718277394771576, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05687443166971207, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03380494937300682, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.029443949460983276, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.029315205290913582, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.026973800733685493, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.026778878644108772, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017939502373337746, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017778906971216202, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01740391179919243, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011877309530973434, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.20616956055164337, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.18185289204120636, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1728634387254715, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1518002450466156, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09315354377031326, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.084195077419281, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.10958679765462875, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10021702945232391, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.09614382684230804, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07859164476394653, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.0738682746887207, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.055585820227861404, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04792459309101105, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04475584998726845, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04398635774850845, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.02789613977074623, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.023521525785326958, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02323596365749836, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.020712921395897865, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.020215557888150215, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.015022256411612034, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.015248016454279423, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.013920853845775127, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.010776648297905922, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.06133921816945076, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.05101052671670914, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.044939782470464706, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.03976137191057205, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.027177957817912102, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.02206295169889927, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.03727862238883972, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.03365711867809296, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.029043201357126236, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.02242189459502697, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.02171250991523266, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.019008595496416092, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.016146646812558174, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.013243562541902065, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.012476440519094467, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.00954992976039648, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.007131634280085564, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.006780518684536219, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.00623356131836772, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.005744597874581814, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.005095695611089468, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.005147893913090229, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.004002124071121216, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0036545605398714542, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.060108914971351624, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.04857328534126282, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.04161671921610832, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.036939769983291626, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.02600707672536373, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.020281272009015083, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.03717245161533356, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.03330157324671745, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.028281494975090027, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.02124270610511303, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.020879298448562622, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.01891479641199112, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.01593152806162834, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.012645337730646133, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.011750034987926483, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.009514016099274158, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.0067490944638848305, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.006352256517857313, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.005848425440490246, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.005262202583253384, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.00499262660741806, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.004914240445941687, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0036992186214774847, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.003346635727211833, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.200699120759964, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.17061837017536163, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.15896178781986237, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.13939344882965088, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09070684015750885, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.07861076295375824, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10996247828006744, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10088314861059189, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.09538368880748749, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07432723790407181, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.06995809078216553, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05595913156867027, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.048264097422361374, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04360741376876831, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.042454011738300323, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.028000833466649055, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02250341698527336, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.021998781710863113, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01920481212437153, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.018401483073830605, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014515024609863758, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014308169484138489, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.012575197964906693, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009175191633403301, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.17239022254943848, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.15187644958496094, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.14438149333000183, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1233237236738205, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.07924927771091461, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.07115841656923294, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09482783824205399, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.08527756482362747, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.0816471204161644, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.06568959355354309, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.06072545796632767, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.04880128055810928, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04154769331216812, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.03885209932923317, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.0382106676697731, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.024711621925234795, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.021341247484087944, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02105599269270897, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.018848951905965805, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.018451405689120293, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.013868119567632675, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.014938329346477985, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.012838291004300117, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011774513870477676, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.19504424929618835, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.18181249499320984, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.17740961909294128, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.15957549214363098, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.0914599820971489, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08655960857868195, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10283448547124863, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09472177922725677, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.0929567813873291, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08151733875274658, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07686258107423782, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.052434101700782776, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.045406538993120193, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.0439457893371582, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04359714686870575, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.026275664567947388, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02284715324640274, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.022690439596772194, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02086661383509636, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02065165899693966, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.014042409136891365, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014182678423821926, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.013449939899146557, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009800674393773079, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.25306436419487, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2365136742591858, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2312483787536621, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.20826475322246552, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11881612986326218, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11282658576965332, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13274967670440674, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12248212844133377, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12059281021356583, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10617857426404953, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.1001518964767456, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06764104217290878, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.058590006083250046, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05695810541510582, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05657557398080826, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03381806239485741, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.029249437153339386, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02907503955066204, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.026679614558815956, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.026435287669301033, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01777869090437889, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017595883458852768, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.017106501385569572, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01154391560703516, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2143469899892807, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19177499413490295, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.18351007997989655, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.16218231618404388, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09753236174583435, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08920693397521973, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11441521346569061, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10404366254806519, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10023380815982819, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08361932635307312, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.07872558385133743, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.058158569037914276, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.049843210726976395, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04692026972770691, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.0462082177400589, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029230615124106407, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02475825697183609, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.024498600512742996, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.022093631327152252, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.021639537066221237, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.015856560319662094, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016121238470077515, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014820045791566372, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011593125760555267, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.07242271304130554, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.06230539828538895, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.056562576442956924, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.049967121332883835, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.03256484493613243, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.02771025523543358, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.042801570147275925, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.03886684402823448, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.03427392989397049, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.02737664058804512, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.026219623163342476, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.021706948056817055, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.01857292279601097, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.015781396999955177, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.015061347745358944, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.010880839079618454, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.008380508050322533, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.008040702901780605, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.0073831938207149506, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.006918157916516066, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.005779687315225601, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.005816943943500519, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.004734067711979151, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.004033850971609354, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.06749626249074936, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.056916963309049606, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.05050086975097656, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.044460881501436234, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.0298235472291708, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.024547679349780083, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.040475696325302124, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.036887139081954956, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03171830624341965, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.024812020361423492, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.023946603760123253, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.020483095198869705, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.017558688297867775, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.01438172534108162, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.013533505611121655, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.010245144367218018, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.007502785883843899, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.0070937275886535645, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.006509321741759777, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.005947052966803312, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005294967442750931, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.00516516761854291, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.004084322601556778, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.003294356632977724, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.21286503970623016, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.18755559623241425, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.17832106351852417, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.15691706538200378, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09777157008647919, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08778339624404907, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11469335108995438, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1054803878068924, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10129155218601227, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08230896294116974, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.0769752711057663, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.058277565985918045, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.0503460131585598, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04683910682797432, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04600020870566368, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.029065348207950592, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.023838872089982033, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02346322312951088, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.020632311701774597, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.020061153918504715, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014941578730940819, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014408157207071781, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.013433035463094711, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008772384375333786, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.2008296102285385, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.17950624227523804, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.17195965349674225, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.14868099987506866, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09283318370580673, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08446495980024338, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10792283713817596, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09864191710948944, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09504956752061844, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07790307700634003, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07161454111337662, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05553353205323219, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04751357436180115, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04486227035522461, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.044219985604286194, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.027937639504671097, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02371271699666977, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.023406323045492172, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.020803943276405334, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.0203879214823246, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015206367708742619, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015463740564882755, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.014150019735097885, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011185302399098873, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.18611690402030945, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.17362576723098755, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1694658100605011, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1525968760251999, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08717998117208481, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.0825795978307724, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09779717028141022, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0903046578168869, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.08856720477342606, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07780659198760986, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07341203093528748, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04978407919406891, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.043217867612838745, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.041822537779808044, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.041496362537145615, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.024910669773817062, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.021588975563645363, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.021440420299768448, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.019695930182933807, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01948816515505314, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013129337690770626, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013187266886234283, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.012568465434014797, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008848857134580612, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.24937880039215088, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23351840674877167, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22834788262844086, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.2059096395969391, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11715219914913177, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11138831079006195, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13086043298244476, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12074805051088333, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11886240541934967, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10488161444664001, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09910919517278671, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06665386259555817, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05774809047579765, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05614578351378441, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.055768437683582306, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.033302903175354004, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.028804978355765343, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.028639979660511017, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.026310069486498833, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.026065155863761902, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017469512298703194, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017299242317676544, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.016812801361083984, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01130450889468193, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.21430589258670807, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.1939592808485031, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.186259925365448, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.16571226716041565, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09814268350601196, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09041787683963776, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11467410624027252, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10446663200855255, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10055553168058395, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08523324877023697, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.0805731862783432, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05844520777463913, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.050038449466228485, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04716336727142334, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.046451136469841, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029403597116470337, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.024750329554080963, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02447843924164772, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02224871702492237, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.021806009113788605, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01593993417918682, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.015934685245156288, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014922686852514744, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011275971308350563, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.06877437978982925, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.05877877399325371, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.05225508660078049, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.046257294714450836, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.030747899785637856, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0255309846252203, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.04223717749118805, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.038093455135822296, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.032559994608163834, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.025866396725177765, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.024987366050481796, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.021448368206620216, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.018209010362625122, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.014935809187591076, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.014071719720959663, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.010750774294137955, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.00796683318912983, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.007553824223577976, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.007028665859252214, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.006474762223660946, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.005670791491866112, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.005670849233865738, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.004416705574840307, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.003913136199116707, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.06469868868589401, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.054090797901153564, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.04704709351062775, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.04162832722067833, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.02841486595571041, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.022807391360402107, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04017258435487747, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.036250244826078415, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03043379820883274, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.02367519959807396, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.023112809285521507, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02039519138634205, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.01731663942337036, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.013765890151262283, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.01279723085463047, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.010180226527154446, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.007245186250656843, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.006784146185964346, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0063207936473190784, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.005687998607754707, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.00530720641836524, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.005160314496606588, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.003951425664126873, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0033561328891664743, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.19221538305282593, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.16940978169441223, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1600136011838913, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.14096608757972717, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.08814044296741486, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.0786733627319336, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10581211000680923, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09693440049886703, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.09135445952415466, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07441218942403793, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.06988146156072617, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05372560769319534, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04626970738172531, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.042272426187992096, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04128270968794823, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.026826122775673866, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.021631527692079544, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02117779292166233, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.018820244818925858, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.018162207677960396, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.013805646449327469, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013412979431450367, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.012123501859605312, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008331308141350746, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.19340375065803528, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.17218972742557526, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.163961723446846, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.14334267377853394, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.0894261971116066, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08052803575992584, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10616705566644669, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09620482474565506, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.0919424369931221, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07518303394317627, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07029053568840027, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.054541219025850296, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04646221548318863, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04336470365524292, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04260680079460144, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.027376683428883553, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.023088159039616585, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02273569256067276, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.020329216495156288, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.019844064489006996, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014794629067182541, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015347644686698914, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.013568520545959473, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011281059123575687, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.17605414986610413, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.16456115245819092, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1605375111103058, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.14480122923851013, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08260276913642883, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07823329418897629, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09293798357248306, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08575711399316788, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.08387608081102371, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0738559365272522, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06973976641893387, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.047361213713884354, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.041059982031583786, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.0396418422460556, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.0393066331744194, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02368621900677681, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.020501160994172096, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02034859172999859, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.018747620284557343, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01854007877409458, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012495125643908978, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012596474029123783, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011915263719856739, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008514315821230412, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.24276569485664368, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.227435901761055, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22234715521335602, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.2006205916404724, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11410912871360779, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10841409862041473, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1277931034564972, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11790457367897034, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11580366641283035, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10224379599094391, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09651187807321548, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06505105644464493, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05637567490339279, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05468061566352844, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05427664890885353, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.032499633729457855, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.028025127947330475, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.027835944667458534, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02560126781463623, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02534216269850731, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.0169881209731102, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016804499551653862, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.016285737976431847, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010895787738263607, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.21568182110786438, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.1967833787202835, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.18983367085456848, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.170060932636261, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09916331619024277, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0921373963356018, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11549939215183258, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10481620579957962, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10139888525009155, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08708571642637253, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08282320946455002, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05876880884170532, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.050240084528923035, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04768887162208557, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04707329347729683, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029499996453523636, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.025061288848519325, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.024820342659950256, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.022714342921972275, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.022328246384859085, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.015889961272478104, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016104010865092278, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014984571374952793, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011467077769339085, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.07402607798576355, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.0654330775141716, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.05838629603385925, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.05168772488832474, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.03367241472005844, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.028310144320130348, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.04689398035407066, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.04215483367443085, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.03506520390510559, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.029025856405496597, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.028133638203144073, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.023947279900312424, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.020300675183534622, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.016399484127759933, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.015345137566328049, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.012016026303172112, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.008836857974529266, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.008302655071020126, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.0079586710780859, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.0073021939024329185, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.006370638497173786, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.006440003402531147, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.004910422954708338, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.004521575756371021, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.06680234521627426, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.05826285481452942, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.05062853917479515, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.044675376266241074, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.029926443472504616, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.02429053746163845, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04327613487839699, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.03905586153268814, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03143388405442238, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.025641152635216713, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.02504711225628853, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.021930571645498276, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.018730472773313522, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.014516143128275871, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.013353162445127964, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.011019451543688774, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.0076855081133544445, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.0070858909748494625, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.006861155852675438, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.0061042592860758305, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005730844102799892, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.005601147189736366, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.004139184486120939, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.003651260631158948, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.20813842117786407, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1865510791540146, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1781642735004425, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.15693612396717072, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09623511880636215, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08743972331285477, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11378981173038483, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10374115407466888, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.09911704063415527, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08215668797492981, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07705342024564743, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05795682221651077, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04961905628442764, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.0462084598839283, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04535621777176857, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.028942840173840523, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.023627517744898796, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.023243136703968048, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02072899229824543, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.020188717171549797, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014940259046852589, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01447373814880848, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.013427321799099445, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00906880758702755, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.20327921211719513, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.18236109614372253, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.17403244972229004, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.14980944991111755, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09416605532169342, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08554982393980026, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11294813454151154, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1014602929353714, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.096645787358284, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07951048016548157, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07378356903791428, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05822610482573509, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.049356259405612946, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.046083904802799225, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.045281749218702316, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02946976013481617, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.025216195732355118, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02483503520488739, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02248971350491047, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02200445719063282, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01649297960102558, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01752360537648201, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015284613706171513, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013677794486284256, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.17317786812782288, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.16218814253807068, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15851560235023499, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.14307458698749542, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08145089447498322, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07734633982181549, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09130401909351349, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08420582115650177, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.08265751600265503, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07296610623598099, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06900607794523239, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04665470868349075, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.0404001846909523, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03915427252650261, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03886546939611435, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02334652654826641, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.020366348326206207, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.0202302448451519, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.018674956634640694, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.018488802015781403, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012429459020495415, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012651180848479271, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011929658241569996, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008768453262746334, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.24647173285484314, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2312462478876114, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2262941598892212, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.20464012026786804, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11619962751865387, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11055321991443634, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12994110584259033, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11964123696088791, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11783793568611145, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10421839356422424, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09866204857826233, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.066374771296978, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05732594057917595, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05578098073601723, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05541373789310455, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.033200252801179886, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.028807874768972397, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.028639165684580803, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.026394635438919067, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.026159539818763733, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01760985143482685, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017568431794643402, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.016965480521321297, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011854724958539009, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.21572014689445496, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.1965319663286209, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.18932218849658966, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.16954943537712097, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09906182438135147, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09186151623725891, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11529162526130676, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10500724613666534, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10135386139154434, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.086758092045784, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08253294229507446, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.058737363666296005, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05024343729019165, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04759030416607857, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04694481939077377, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029451321810483932, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.024897443130612373, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.024649742990732193, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02249661646783352, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.022090613842010498, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01580929011106491, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.015880350023508072, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014884226024150848, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01114284060895443, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.07361430674791336, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.06455706804990768, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.059294018894433975, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.05195745453238487, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.03342738747596741, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.02893754467368126, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.04297981038689613, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.039274998009204865, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.034806571900844574, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.02825535461306572, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.02683815360069275, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.021811269223690033, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.01877126842737198, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.01613919995725155, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.015463571064174175, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01092349924147129, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.008483851328492165, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.008152654394507408, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.00747517216950655, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.007039566524326801, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.0057405512779951096, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.005752722267061472, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.00475848326459527, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0038936834316700697, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.06736426055431366, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.05774711072444916, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.052172329276800156, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.045446790754795074, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.03005664423108101, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.0253269262611866, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.0397573746740818, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.03616144135594368, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.031629182398319244, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.025044787675142288, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.023909524083137512, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.020181037485599518, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.01730504259467125, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.014508737251162529, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.013777196407318115, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01010121125727892, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.00758885545656085, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.007231531199067831, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0066054160706698895, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.0061271158047020435, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005255912896245718, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.00518457917496562, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.004191122017800808, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0034089135006070137, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.19404077529907227, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1715865433216095, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.16289657354354858, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1423696130514145, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.08890541642904282, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.07996560633182526, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10633309930562973, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.0965656116604805, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.09216494858264923, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07483111321926117, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.0700613483786583, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.054186854511499405, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04613717272877693, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04268763214349747, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04181840643286705, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.02707763761281967, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.021957360208034515, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.0215782281011343, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.019106555730104446, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.018562080338597298, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014000722207129002, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013679299503564835, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.012424916960299015, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008824864402413368, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.1967104971408844, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.17915470898151398, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1722593903541565, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.14974306523799896, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09123586863279343, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08404454588890076, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10734805464744568, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09749718010425568, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09344258904457092, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07833980023860931, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07244651764631271, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05513551086187363, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.046895500272512436, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.044069577008485794, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04340023547410965, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.027621906250715256, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.0232288409024477, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.022896315902471542, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02069135196506977, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.020251797512173653, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014770478941500187, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015096432529389858, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.013698353432118893, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01079217717051506, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16645210981369019, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15590697526931763, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1522870510816574, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1375252902507782, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07833945006132126, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07432489097118378, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0880630612373352, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0810977891087532, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07949437946081161, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07016687095165253, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06642942130565643, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04495491459965706, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03887472301721573, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03764032945036888, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03735264018177986, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.0224771611392498, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.01953505165874958, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.019399991258978844, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.017895836383104324, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.017715008929371834, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01190347969532013, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012075833976268768, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011399193666875362, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008293171413242817, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.24032311141490936, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22550299763679504, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.220596581697464, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1992741823196411, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11330566555261612, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10770481079816818, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1269150823354721, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11681855469942093, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11492019891738892, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10154334455728531, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.0960555300116539, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06472878903150558, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.055923208594322205, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.0543515607714653, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.053985580801963806, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03236669674515724, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02796344831585884, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.027783319354057312, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02557368017733097, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.025339342653751373, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016997961327433586, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01690986379981041, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.016328943893313408, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011198689229786396, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.210754856467247, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19289414584636688, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.18618540465831757, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.16664569079875946, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09699835628271103, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0903310775756836, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11266706138849258, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10255611687898636, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.09906433522701263, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.085207499563694, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08086459338665009, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.057355254888534546, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04915500804781914, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04666460305452347, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04605631157755852, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.02886318974196911, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.024560345336794853, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02432655170559883, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.022282391786575317, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02190021052956581, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.015736285597085953, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.015835916623473167, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014878988265991211, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011338524520397186, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.09046163409948349, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.07973463833332062, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.07358886301517487, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.0646321177482605, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.0411522351205349, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.035913798958063126, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.052832793444395065, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.04798102751374245, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.04277633875608444, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03498769551515579, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.03327053040266037, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0269138403236866, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.023011459037661552, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.019938282668590546, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.019154416397213936, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0135028176009655, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.010612104088068008, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.010230948217213154, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.009424387477338314, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.008931777440011501, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.007219426333904266, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.007312002591788769, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.00606675585731864, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005172032862901688, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.08159442991018295, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07099636644124985, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06464869529008865, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.056633830070495605, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.03672807663679123, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.031418491154909134, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04820795729756355, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04372340440750122, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03845018148422241, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.030910063534975052, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.029620053246617317, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02455293759703636, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02093556709587574, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.01772027090191841, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.016883952543139458, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.012269658967852592, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.009265250526368618, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.008854895830154419, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0081259086728096, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.007581005338579416, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006398678291589022, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006288728676736355, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0051594325341284275, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004145585931837559, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.19983415305614471, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.180155947804451, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.17306895554065704, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.15137089788913727, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09255606681108475, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08485855907201767, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10739697515964508, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09830275177955627, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.09491810947656631, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07886575162410736, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.0734882801771164, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05451437085866928, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04692554846405983, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04430703818798065, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04366174712777138, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.027247877791523933, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.022508246824145317, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.022211026400327682, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01967167668044567, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.01925469934940338, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01395669486373663, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013453496620059013, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.012793135829269886, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008195838890969753, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.20149849355220795, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1831200122833252, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.17605778574943542, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.15304675698280334, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09368933737277985, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08611813187599182, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10958636552095413, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.0996762290596962, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09583936631679535, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07972266525030136, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07467030733823776, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.056224972009658813, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.047909047454595566, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.045121267437934875, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.044449884444475174, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02809218503534794, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.023512717336416245, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02319088764488697, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02070956490933895, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.020278744399547577, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014816521666944027, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.014898029156029224, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.013718036003410816, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.010251302272081375, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16556012630462646, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15474548935890198, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15096595883369446, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13616347312927246, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07785751670598984, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07374870777130127, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08772670477628708, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08069774508476257, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07904456555843353, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06958510726690292, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06583866477012634, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0449049212038517, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03875717148184776, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03747985139489174, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03717399388551712, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022499490529298782, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.01959129050374031, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.0194535069167614, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01794992946088314, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.017766712233424187, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012088247574865818, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012329286895692348, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011581262573599815, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008716488257050514, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2324388176202774, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21747583150863647, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21249498426914215, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1916363686323166, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10936789214611053, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10373096168041229, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12271849066019058, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11290571093559265, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.1110190749168396, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09772828221321106, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09242833405733109, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06270654499530792, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.054115355014801025, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05251355096697807, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.052130863070487976, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03137052059173584, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.027167057618498802, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.026995547115802765, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024824196472764015, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.024584399536252022, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016638685017824173, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016659101471304893, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.015968548133969307, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01130617968738079, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2166447788476944, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19791260361671448, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19070184230804443, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.17028257250785828, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09973413497209549, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09259939938783646, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11641717702150345, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10596401244401932, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10201127082109451, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08741238713264465, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08266987651586533, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05908317118883133, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.050706297159194946, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.047925714403390884, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.0472574420273304, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.02963287942111492, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.025061892345547676, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.024788005277514458, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02264910191297531, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.022220512852072716, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.015914669260382652, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01597609929740429, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014948689378798008, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011157331988215446, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.08081857860088348, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.07172662764787674, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.06670236587524414, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.05810534209012985, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.03700857236981392, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.032614197582006454, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.04646606743335724, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.042407914996147156, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.038207292556762695, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03128369152545929, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.029491165652871132, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.023633291944861412, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02025655470788479, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.017838623374700546, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.017225835472345352, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.011822003871202469, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.00933796726167202, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.009029904380440712, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.008211715146899223, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.007815029472112656, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.006210132502019405, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.006204306613653898, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.005275392904877663, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.004185577854514122, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.07580067962408066, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.06647057831287384, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06144655495882034, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.053238097578287125, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.03428655117750168, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.029905226081609726, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04360625892877579, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.03957809507846832, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03556925058364868, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.028737377375364304, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.027128182351589203, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02207995019853115, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.018880348652601242, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.016489198431372643, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.015886645764112473, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01104134414345026, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.008551963604986668, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.008240041323006153, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.007444078102707863, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.007033510133624077, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005722161382436752, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.005608968902379274, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.004766915924847126, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0036197092849761248, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.18594832718372345, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.16624552011489868, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1590840071439743, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1381295770406723, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.08593221008777618, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.07814474403858185, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10137757658958435, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09172610193490982, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.08842143416404724, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0723818838596344, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.06733332574367523, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05159789323806763, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04386475309729576, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.041169311851263046, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04056187719106674, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.025743916630744934, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.021089427173137665, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.020800312981009483, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.018320010975003242, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.01788572408258915, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.013289332389831543, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.012877346947789192, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.012038521468639374, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00817116629332304, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.18803952634334564, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.17092473804950714, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.16418227553367615, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1447068750858307, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.08685404807329178, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.07979889959096909, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10248547047376633, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09314816445112228, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.0891038253903389, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07514569163322449, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0710664838552475, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.052731964737176895, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.044871360063552856, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04199657589197159, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04132194444537163, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.026439392939209938, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.022234782576560974, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.021922850981354713, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.019952908158302307, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.019520917907357216, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014287634752690792, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.014597144909203053, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.01322105061262846, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.010556037537753582, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1608331948518753, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14964208006858826, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.145639106631279, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13094636797904968, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07549033313989639, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07120858132839203, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08564791828393936, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07862918078899384, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07676583528518677, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06710614264011383, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06348302215337753, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.043816570192575455, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.037769488990306854, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03635371848940849, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.0360155813395977, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.021975185722112656, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.019059084355831146, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.01890665851533413, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.017409726977348328, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01720164716243744, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011817313730716705, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012092433869838715, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011246904730796814, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00860492791980505, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.22092215716838837, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2058616280555725, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2006833702325821, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.18041151762008667, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10375795513391495, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.09802889823913574, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11714540421962738, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10763974487781525, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.1054845005273819, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09224983304738998, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.08715466409921646, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05988988280296326, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05157192796468735, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04982394725084305, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.0494098886847496, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.029939059168100357, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02575824409723282, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.025565972551703453, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.023440374061465263, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02317802608013153, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015832627192139626, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015820834785699844, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.015106265433132648, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010686960071325302, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.22114351391792297, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20236557722091675, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19540221989154816, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.17453628778457642, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10223710536956787, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0952240526676178, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.1189398393034935, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10801681876182556, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10454588383436203, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0897689089179039, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.0849892720580101, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06071249768137932, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05182185024023056, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.049221280962228775, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04859142005443573, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03050415962934494, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.025876766070723534, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.025640984997153282, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.023452959954738617, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02305956929922104, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01646314561367035, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016641153022646904, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.01553165540099144, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.0118767274543643, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.0914924368262291, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08110112696886063, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.0755261778831482, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.06562410295009613, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.042066458612680435, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.03709213808178902, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.052582498639822006, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.04784926399588585, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.043442580848932266, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03540562465786934, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.03336585685610771, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.026761122047901154, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.022957434877753258, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.020356420427560806, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.019707826897501945, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.013415593653917313, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.01077623013406992, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.010447527281939983, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.009485991671681404, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.009064062498509884, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.007109404541552067, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.007272049318999052, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.006109125912189484, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0051078153774142265, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.0802135095000267, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07059627026319504, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06473705917596817, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.05601217597723007, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.036390550434589386, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.031497567892074585, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04706237465143204, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04276803508400917, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03769079968333244, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.030578138306736946, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.028898093849420547, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.023884378373622894, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02043125219643116, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.017556246370077133, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.016843050718307495, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01193360798060894, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.009218604303896427, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.008838499896228313, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.008079851977527142, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.007595935836434364, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006283828988671303, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006235305219888687, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005207686219364405, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00418979674577713, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.19854655861854553, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.17585495114326477, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.16750642657279968, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1446259319782257, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09137580543756485, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08233152329921722, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10912305861711502, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09826815873384476, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.09421735256910324, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07622513920068741, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07069210708141327, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.055630333721637726, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04697851836681366, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04383258521556854, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.043067652732133865, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.02785806730389595, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.022483384236693382, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.022134829312562943, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.019386766478419304, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.018881158903241158, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014378653839230537, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013835136778652668, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.012795698828995228, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008820857852697372, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.20470617711544037, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.18350337445735931, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.17075379192829132, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.15035240352153778, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09452121704816818, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08382336795330048, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12227369844913483, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10974428057670593, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09854631125926971, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08152162283658981, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07799028605222702, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06329470872879028, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.053145263344049454, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.046151820570230484, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04438155144453049, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.031808529049158096, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.024954333901405334, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.024155382066965103, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.022508736699819565, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.021403949707746506, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017321670427918434, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017628928646445274, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.014834376983344555, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012986465357244015, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16192936897277832, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15026529133319855, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14600308239459991, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13114462792873383, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07602985203266144, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07149508595466614, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08681640028953552, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07953568547964096, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07740943133831024, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06742832064628601, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.063840851187706, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.044514838606119156, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03830543905496597, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.036732643842697144, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03636253997683525, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022351250052452087, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.019422052428126335, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.019254278391599655, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01774364896118641, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01751168817281723, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01213458925485611, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012567522004246712, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011509852483868599, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009165416471660137, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.22088523209095, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.20515865087509155, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1997871994972229, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.17952419817447662, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10364536941051483, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.09763910621404648, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11750336736440659, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10779108107089996, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.10542134940624237, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09186451882123947, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.086786188185215, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05983193591237068, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05161675065755844, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04972946271300316, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04928163066506386, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02992982789874077, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.025668401271104813, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.025459952652454376, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02328607067465782, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.023004040122032166, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015711115673184395, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015704158693552017, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.014911617152392864, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010495975613594055, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2223363220691681, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.2033076286315918, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1962215006351471, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.17500445246696472, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10302188992500305, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09582766890525818, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12010003626346588, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10892016440629959, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10537058115005493, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09026725590229034, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08549068123102188, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06134677305817604, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05234237387776375, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04965335875749588, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04899412766098976, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03086688369512558, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02612856961786747, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.025884078815579414, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.023651929572224617, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02325187437236309, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01667780801653862, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016856208443641663, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.01571660302579403, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.0120695186778903, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.09674772620201111, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08529195934534073, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.07847898453474045, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.06830418854951859, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.04432675987482071, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.03853241726756096, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.05802515149116516, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.051778100430965424, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.045989587903022766, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03736714646220207, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.03573428466916084, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.029713619500398636, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.024965593591332436, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.021540673449635506, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.020675932988524437, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.014964569360017776, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.01155856717377901, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.011123714968562126, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.010236918926239014, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.009677944704890251, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008049339056015015, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008082789368927479, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.006688147317618132, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005809542257338762, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.08646009862422943, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0756467804312706, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06916679441928864, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.05995124205946922, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.0391739197075367, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03372924029827118, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05130799859762192, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04622320458292961, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.040712933987379074, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.032825589179992676, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.031176188960671425, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.026105517521500587, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.022145025432109833, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.01896284706890583, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.018144406378269196, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013114253990352154, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010028967633843422, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009630664251744747, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.008800948038697243, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008286217227578163, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006913764402270317, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006893666926771402, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005664590746164322, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004769031424075365, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.19774705171585083, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.17694516479969025, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.16869986057281494, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.14692939817905426, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09181050211191177, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08307263255119324, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10964134335517883, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09891220927238464, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.0944470465183258, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0773470401763916, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07230813056230545, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05593828111886978, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.0473187156021595, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.044063881039619446, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04327378422021866, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.02799956314265728, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02255043014883995, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.022173868492245674, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.019592702388763428, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.019049623981118202, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014438947662711143, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013844123110175133, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.01290184073150158, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008739753626286983, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.21529479324817657, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1918140947818756, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18333135545253754, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.16110889613628387, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09948123246431351, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.09014051407575607, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11801689863204956, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.106722891330719, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10252990573644638, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08437664061784744, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07943511009216309, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06076105684041977, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05152111500501633, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04822119325399399, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04743332043290138, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.030557308346033096, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.025577034801244736, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.025215348228812218, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02261895313858986, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02211378701031208, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016512980684638023, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016826000064611435, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015252185054123402, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01221795566380024, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1666330248117447, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1543579399585724, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14997398853302002, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13483302295207977, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07837298512458801, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07359982281923294, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08943382650613785, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0817580297589302, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07979744672775269, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06929313391447067, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06572612375020981, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04590683430433273, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.039471276104450226, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.037959177047014236, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.037602748721838, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.023112703114748, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.020232543349266052, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.020074481144547462, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01848185621201992, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.018265392631292343, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012754998169839382, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013277282938361168, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.012167610228061676, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009915969334542751, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2300390750169754, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21333104372024536, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2077619433403015, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.18660856783390045, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10815077275037766, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10181734710931778, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12207802385091782, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11214348673820496, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11003535240888596, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0955822616815567, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09021871536970139, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06241241469979286, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.053765952587127686, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05196385085582733, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.051539305597543716, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.031215786933898926, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.026937397196888924, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.026747941970825195, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024415696039795876, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.024145357310771942, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016535375267267227, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016640078276395798, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01578752137720585, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011376588605344296, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2273360788822174, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20678231120109558, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19812150299549103, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1767168641090393, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10504496842622757, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09672892093658447, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12455257773399353, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1129411906003952, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10762111842632294, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09162215143442154, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08691135048866272, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06341401487588882, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05421881750226021, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.050706975162029266, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.0498659648001194, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03198550269007683, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.026873070746660233, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.026510488241910934, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024284884333610535, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.0237407349050045, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017461221665143967, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017666984349489212, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.01624956540763378, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012776818126440048, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.09725736081600189, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08707818388938904, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08201629668474197, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.07162266224622726, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.044943682849407196, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0402735099196434, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.054836519062519073, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05003780499100685, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.046200789511203766, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03807133063673973, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.035785313695669174, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.027891697362065315, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.023913661018013954, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.021617745980620384, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.021046170964837074, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.013952676206827164, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.011242285370826721, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.010957857593894005, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.009878796525299549, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.009505427442491055, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.007313643116503954, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.007263985928148031, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.006400927435606718, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.004845886025577784, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.08399330824613571, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0750950425863266, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07004577666521072, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06110180541872978, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.03858526796102524, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03412898629903793, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04838123545050621, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04397690296173096, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03966326266527176, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03272143006324768, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03092113882303238, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.024490412324666977, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02102521061897278, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.018560729920864105, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.017956435680389404, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.012256978079676628, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.00966672133654356, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.00934695266187191, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.008503123186528683, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008103393018245697, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0064013064838945866, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006335626356303692, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005467742681503296, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004187639802694321, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.21189406514167786, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.18974806368350983, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.18193770945072174, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.15835155546665192, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09818762540817261, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08953933417797089, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11546749621629715, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10424469411373138, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10082119703292847, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08274266123771667, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07692133635282516, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.058868758380413055, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04978806525468826, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04699837043881416, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.046324193477630615, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.02937832660973072, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.023924240842461586, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02360590174794197, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02072712779045105, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02027345448732376, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01508781872689724, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014361334964632988, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.01362090278416872, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00883166678249836, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.2265302687883377, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20752209424972534, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.20015250146389008, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.17838959395885468, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10527033358812332, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.09755224734544754, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12284531444311142, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1119040995836258, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.1075698509812355, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09175459295511246, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08760463446378708, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06320882588624954, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05397409945726395, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.05090926215052605, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.05020507797598839, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.031695377081632614, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.026958903297781944, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.026622960343956947, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02433648891746998, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.023878026753664017, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017069917172193527, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01764400489628315, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015958823263645172, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012838316150009632, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.17306490242481232, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1602770835161209, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15565863251686096, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13984628021717072, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08142999559640884, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.076481394469738, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09290821105241776, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08520475775003433, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.08296328783035278, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07207152247428894, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.0682612732052803, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04777123034000397, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04109618440270424, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03938744589686394, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.038985930383205414, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02394692413508892, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02089684084057808, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.020715823397040367, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01908000372350216, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.018831882625818253, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012996613048017025, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013616039417684078, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.012328467331826687, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010043228045105934, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.23630502820014954, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21933622658252716, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21362042427062988, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.19178254902362823, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11118282377719879, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10471631586551666, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12589113414287567, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11547745019197464, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.1131301000714302, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09837942570447922, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09293920546770096, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06438625603914261, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05535821616649628, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.053406935185194016, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05295230448246002, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.032231319695711136, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.027655040845274925, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02744612656533718, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025089159607887268, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02478932775557041, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017125369980931282, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017039818689227104, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01631159894168377, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011564412154257298, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.23841311037540436, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21580463647842407, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.20735816657543182, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.18433862924575806, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.11020278185606003, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.1015884131193161, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12908120453357697, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11710761487483978, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11300823837518692, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09540542960166931, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.0903128907084465, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06601835787296295, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05619366839528084, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.053056277334690094, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.05229318141937256, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03321463614702225, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02782754972577095, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02754133567214012, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024931736290454865, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02444659359753132, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017936773598194122, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017853178083896637, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.01683701016008854, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012593759223818779, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.1000562384724617, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08971063792705536, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08370404690504074, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.07347186654806137, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.04621100425720215, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04106074944138527, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.058177150785923004, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.052770551294088364, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.04757234454154968, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.039486005902290344, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.03746509924530983, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.02962333895266056, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.025282684713602066, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.022335689514875412, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.021587423980236053, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.014843424782156944, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.01176968403160572, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.01139083318412304, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.010452269576489925, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.009975525550544262, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.0078313909471035, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.007896598428487778, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.006682241335511208, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005462789908051491, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.08412180840969086, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07578171044588089, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06905371695756912, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06072581559419632, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.03856407478451729, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03350488096475601, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05116557329893112, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04651562497019768, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.0397234782576561, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03324174880981445, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03175676614046097, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.025894517078995705, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02220246009528637, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.01862439513206482, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.01770824007689953, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01294335164129734, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.009779131039977074, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.00929624680429697, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.008733601309359074, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.00813753716647625, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.00678381510078907, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006706019397825003, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0055123199708759785, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004456734284758568, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.2248137891292572, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.20183658599853516, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.19306786358356476, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.16865302622318268, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10433252900838852, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09498376399278641, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12481950968503952, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11181016266345978, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10712769627571106, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08849243074655533, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08258083462715149, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06395174562931061, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05348973721265793, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05008069425821304, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.049229949712753296, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03197714313864708, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02571876347064972, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02532351203262806, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.022498194128274918, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02194567583501339, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01650356315076351, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015850156545639038, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.014705224893987179, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010189633816480637, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.24305406212806702, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.21503637731075287, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.20453262329101562, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.17631761729717255, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.1118074506521225, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.10125492513179779, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.13428688049316406, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12147322297096252, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.11618588119745255, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09298660606145859, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0878860354423523, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0690736174583435, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.0585845410823822, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.054179899394512177, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.05309150367975235, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03461459279060364, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.028694579377770424, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.028260061517357826, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02502910979092121, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.024337325245141983, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.018529396504163742, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01895691268146038, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.016938328742980957, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01364211831241846, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1620706468820572, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15035152435302734, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14564691483974457, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13105499744415283, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07636857032775879, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07161075621843338, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08802422881126404, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08058476448059082, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07783296704292297, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06782246381044388, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06426090002059937, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04519837722182274, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03892477601766586, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03703697398304939, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03659268096089363, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022685978561639786, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.019786253571510315, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.01958181895315647, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01814202219247818, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01787199079990387, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012355529703199863, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013116487301886082, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011622682213783264, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00981193408370018, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.22997596859931946, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21390779316425323, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.20824147760868073, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.18735642731189728, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10835335403680801, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10210369527339935, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12318975478410721, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11300349980592728, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11024045199155807, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09628751873970032, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09104704856872559, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06305932998657227, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05424826964735985, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.052158717066049576, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05166468024253845, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03159516304731369, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.027158288285136223, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02692756988108158, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024748319759964943, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.024435067549347878, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01686931401491165, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016995009034872055, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.016010494902729988, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011788194067776203, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.22767220437526703, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20590855181217194, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1974327266216278, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.17571236193180084, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10513924807310104, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09669210761785507, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12478604167699814, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11253437399864197, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10784678161144257, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0911923423409462, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08662630617618561, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06395334005355835, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05427934229373932, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.050969526171684265, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.05009040981531143, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03244931623339653, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02722683921456337, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.026849869638681412, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02457202598452568, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.023998988792300224, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.0178634412586689, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018188947811722755, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.016503090038895607, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013422280550003052, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11165320128202438, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10095376521348953, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09480677545070648, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08368989080190659, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.051745809614658356, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04641786962747574, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06490707397460938, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05855473875999451, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.053137317299842834, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.044648803770542145, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0424303263425827, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.033133577555418015, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.028132695704698563, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.025079689919948578, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02430962398648262, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016699327155947685, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.01332243625074625, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012935460545122623, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011951242573559284, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011472618207335472, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.00891187135130167, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009025610983371735, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007671687752008438, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006438964046537876, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.08787642419338226, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07937604188919067, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07216621935367584, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06384798884391785, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.0402684211730957, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03488251566886902, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05443749576807022, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.049020182341337204, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04144969955086708, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.035005923360586166, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.033817108720541, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02769113890826702, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.023472554981708527, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.019482631236314774, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.01841163821518421, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013881721533834934, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010251471772789955, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009706787765026093, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009226606227457523, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.00856282189488411, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007271622307598591, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.00712246261537075, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005779361352324486, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0047718496061861515, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.24128219485282898, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.22075027227401733, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.21281562745571136, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1892167031764984, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.1132299080491066, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.10473830997943878, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.13313037157058716, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12036404758691788, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.1157686859369278, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09854756295681, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09288449585437775, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06792359799146652, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.057661741971969604, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05435579642653465, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05354626476764679, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03397774696350098, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.027782810851931572, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.027395479381084442, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.024769896641373634, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02423834055662155, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.017521878704428673, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.016855424270033836, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.015978367999196053, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010576452128589153, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.2248149812221527, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20321574807167053, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1953609138727188, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.16733312606811523, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10397126525640488, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.09560532867908478, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12121181190013885, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10995757579803467, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10624634474515915, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.0876431092619896, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08011007308959961, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.062195390462875366, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.052765678614377975, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.05002087727189064, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04937410727143288, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.031153889372944832, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.026117192581295967, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02579212374985218, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.022847061976790428, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02241898514330387, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01659436896443367, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016543971374630928, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015523325651884079, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011489930562675, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16106678545475006, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14985758066177368, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14554445445537567, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13110172748565674, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07583389431238174, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07142321765422821, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08693703263998032, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07970935851335526, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07727991789579391, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06765427440404892, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06411150842905045, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04462350904941559, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.038534000515937805, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.0368022657930851, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.036394763737916946, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022449230775237083, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.019706368446350098, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.019519725814461708, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01813659630715847, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.017887739464640617, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01231059618294239, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013091797940433025, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01165592297911644, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009861803613603115, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.23500356078147888, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21947908401489258, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21409031748771667, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.19295081496238708, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.1105610653758049, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10460059344768524, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12499923259019852, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11488448828458786, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11239426583051682, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09866546094417572, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09333180636167526, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06371857970952988, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05500702187418938, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05306001380085945, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.052592683583498, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03185788914561272, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.027301110327243805, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.027088692411780357, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024880312383174896, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02458547055721283, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016716433688998222, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016574405133724213, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.015912653878331184, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01092415302991867, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.22953805327415466, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20767374336719513, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19921846687793732, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1773843616247177, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10565697401762009, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09715336561203003, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12407486885786057, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11285249888896942, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10840269178152084, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09159203618764877, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08660439401865005, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06320130079984665, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05414876341819763, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05087476968765259, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.05007775500416756, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03180979564785957, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02673375979065895, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.026422951370477676, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02399812452495098, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.023489801213145256, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017241451889276505, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017274292185902596, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.016104429960250854, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012232898734509945, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11132950335741043, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10140202194452286, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09616003930568695, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08503697067499161, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05163965001702309, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.046947360038757324, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06277316808700562, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.057246413081884384, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.052838899195194244, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04480012506246567, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04233425855636597, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03195224329829216, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.027388501912355423, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.024848919361829758, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02422751858830452, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016015587374567986, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.012935733422636986, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012618149630725384, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011575917713344097, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011172179132699966, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008420788682997227, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008357047103345394, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007417592220008373, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005612168926745653, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09110677987337112, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08306273818016052, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07672479003667831, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06804455816745758, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04186178743839264, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.0370250903069973, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05434916913509369, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04960862919688225, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.042894382029771805, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03663629665970802, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.034989867359399796, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.027429314330220222, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02362203225493431, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.02014344371855259, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.01925126276910305, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013714775443077087, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.01047459989786148, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009980550967156887, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009437842294573784, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008846938610076904, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007158426567912102, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.00700576975941658, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005893671419471502, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004475085996091366, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.24776744842529297, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.2284787893295288, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.2212757170200348, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.19765256345272064, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.11622009426355362, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.1085767149925232, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.13550643622875214, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12245634198188782, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11865688860416412, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.102206289768219, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.0965050458908081, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06919965893030167, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05852726101875305, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.055745553225278854, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.055030707269907, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.034592702984809875, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02836816944181919, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.028029203414916992, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02543146163225174, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02499077282845974, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.017755378037691116, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.016914820298552513, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.016314376145601273, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010427840054035187, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.23114030063152313, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.2101077288389206, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.20313820242881775, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.18065394461154938, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10692006349563599, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.09869658201932907, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12317497283220291, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11241066455841064, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10933249443769455, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09289060533046722, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08736476302146912, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06342343986034393, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.054356012493371964, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.0517239011824131, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.05111418291926384, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0320441909134388, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.027507320046424866, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.027226807549595833, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.024777941405773163, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.024388402700424194, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017636386677622795, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01806681975722313, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.016634849831461906, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013361772522330284, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1545158177614212, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14400364458560944, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13966628909111023, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1259564459323883, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07278230786323547, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06844323128461838, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08379753679037094, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07676535099744797, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07408454269170761, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06500305235385895, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06171732768416405, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04297453537583351, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03706534206867218, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.035270724445581436, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03484111279249191, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.021605370566248894, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.018804635852575302, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.01860319823026657, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.017310738563537598, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.017048785462975502, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011779178865253925, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012422870844602585, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011104798875749111, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009249066933989525, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.23138639330863953, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21654993295669556, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.211122065782547, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.19041797518730164, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10888592898845673, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10308631509542465, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12329559028148651, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11331555247306824, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11062277853488922, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09741560369729996, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09222348779439926, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06295622140169144, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05426626652479172, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05226905643939972, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05179129168391228, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03146398812532425, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.026938319206237793, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.026713447645306587, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024603035300970078, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.024304216727614403, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016566939651966095, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016423504799604416, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.0157605092972517, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01089288666844368, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.22447055578231812, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20199058949947357, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1930476427078247, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1716967225074768, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.1029348224401474, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09405587613582611, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12168683111667633, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11086958646774292, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10590183734893799, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08883410692214966, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08419621735811234, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06214922294020653, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05320326238870621, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04957543686032295, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04867171868681908, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03131372854113579, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02605293318629265, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.025701159611344337, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.023302992805838585, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.022730911150574684, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016957318410277367, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016907304525375366, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.015677114948630333, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011918226256966591, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.09505610913038254, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08661408722400665, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08114253729581833, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.07239079475402832, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.0439278669655323, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.03947019204497337, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.055220089852809906, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05034983530640602, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.04512782022356987, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03851614147424698, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.036777012050151825, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.028097311034798622, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.024091729894280434, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02120264247059822, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.020474422723054886, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.014068706892430782, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.011134011670947075, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.010758000425994396, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01007192861288786, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.009608493186533451, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.007376404013484716, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.007439097389578819, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.006270051933825016, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005065997131168842, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.08312799036502838, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07528221607208252, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06886503100395203, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06151345744729042, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.03789013624191284, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03317302092909813, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05041510984301567, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04580502212047577, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03915052115917206, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03333932161331177, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.032077617943286896, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.025457315146923065, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.021797247231006622, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.018283093348145485, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.017361773177981377, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.012733888812363148, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.009563250467181206, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009080491960048676, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.008662392385303974, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008073664270341396, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006640531588345766, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0065317098051309586, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005345670040696859, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004266570787876844, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.2084762006998062, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1873328536748886, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.17696240544319153, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1582617461681366, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09554857015609741, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.0861482098698616, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11925658583641052, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10673332214355469, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.09905172884464264, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08329728990793228, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07925831526517868, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06078384816646576, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.0509660504758358, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.045867353677749634, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04461226612329483, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.030373578891158104, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02356755919754505, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.022964173927903175, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.021086378023028374, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.020238786935806274, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.015610883943736553, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014928868040442467, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.013263569213449955, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009403369389474392, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.2252357006072998, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20019440352916718, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.19099602103233337, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.16608907282352448, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10410526394844055, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0940878614783287, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12316381186246872, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11178719997406006, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10720418393611908, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08770442754030228, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08108744025230408, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06333398818969727, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.054028887301683426, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.050551995635032654, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04970691725611687, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.031838126480579376, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02695830538868904, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02656683698296547, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.023785853758454323, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02324177697300911, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017220931127667427, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017941109836101532, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015884488821029663, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013238579966127872, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16142886877059937, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15012533962726593, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14559632539749146, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13155366480350494, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07586482167243958, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07137951999902725, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08730421215295792, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07997871190309525, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07731981575489044, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06785295158624649, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.0645328015089035, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04473458230495453, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.038585152477025986, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03672480583190918, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03628608584403992, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022523628547787666, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.019529810175299644, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.019328836351633072, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.017977111041545868, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.017708731815218925, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01233726367354393, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01282105129212141, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011649230495095253, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009472043253481388, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.23839831352233887, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22303731739521027, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21754151582717896, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.19650858640670776, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11228300631046295, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10637929290533066, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12714341282844543, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11676535755395889, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11407990753650665, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10057338327169418, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09549423307180405, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06506261229515076, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.056004319339990616, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05399620532989502, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.053522344678640366, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.032635245472192764, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.028049400076270103, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.0278288796544075, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025699002668261528, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.025398390367627144, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017507098615169525, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017413463443517685, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01670432649552822, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011971660889685154, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.23442402482032776, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20929376780986786, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1993788629770279, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1767556369304657, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10750631988048553, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09758402407169342, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12819822132587433, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1160782054066658, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11081108450889587, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09204526990652084, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08704780042171478, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06537280976772308, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.0557434968650341, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05187210440635681, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.05092005431652069, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03298581764101982, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02742605097591877, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02706449292600155, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024438761174678802, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02382984757423401, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017988678067922592, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018020734190940857, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.016607016324996948, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012941897846758366, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10915088653564453, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09996100515127182, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09417623281478882, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08403685688972473, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05073053389787674, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04592610150575638, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06338266283273697, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05751388892531395, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05199103802442551, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04460688680410385, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.042467642575502396, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.032372765243053436, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.027653517201542854, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.024587281048297882, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.023806991055607796, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016276765614748, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013079593889415264, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012690406292676926, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011895468458533287, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011425018310546875, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008672786876559258, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008892456069588661, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007486382499337196, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006367461755871773, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09277889132499695, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08362556248903275, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.075612872838974, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06769263744354248, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04219576343894005, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03643909469246864, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05709553882479668, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05208013206720352, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.043681561946868896, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.037125859409570694, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.035857658833265305, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.028836818411946297, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.024825049564242363, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.020377838984131813, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.019201841205358505, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.014462435618042946, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010698246769607067, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.010079222731292248, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009705257602036, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008953229524195194, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007535993587225676, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0074422312900424, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005930895917117596, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004857789725065231, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.22631549835205078, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.20395733416080475, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.19345766305923462, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.17369233071804047, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10458746552467346, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09472565352916718, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1294337809085846, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11577440798282623, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.1081930622458458, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09131025522947311, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08728436380624771, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06641125679016113, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05535738170146942, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.050276950001716614, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04901838302612305, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.033241935074329376, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02585124596953392, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02525605447590351, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02310567907989025, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.022301284596323967, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.017175517976284027, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0162236038595438, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.014742575585842133, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010256044566631317, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.19693973660469055, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.17463235557079315, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.16738033294677734, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.14046530425548553, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.08949059993028641, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08162388950586319, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10464145988225937, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09486177563667297, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09178825467824936, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07375133782625198, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.06818485260009766, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.053640007972717285, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04601074010133743, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.043589331209659576, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04302525147795677, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0271455105394125, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.023680610582232475, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02341744303703308, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02076142467558384, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02037639357149601, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015150224789977074, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016195612028241158, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.014235212467610836, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012523234821856022, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.15326817333698273, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14245536923408508, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13803035020828247, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.12458686530590057, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07192426919937134, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06753043830394745, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08284492790699005, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07618645578622818, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07332918792963028, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06427236646413803, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.060999173671007156, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04237791895866394, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.036677151918411255, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03475875034928322, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.034301530569791794, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.021244652569293976, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.018356507644057274, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.018146615475416183, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01686103083193302, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.016578780487179756, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011410635896027088, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011916508898139, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.010687531903386116, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008611931465566158, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.22995367646217346, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21489667892456055, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.20935280621051788, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.18914200365543365, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10810048133134842, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10219445079565048, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12271689623594284, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11280754208564758, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.10989463329315186, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09681051969528198, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09168244153261185, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0626223161816597, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05405173823237419, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05191950872540474, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05140350013971329, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03137567266821861, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.026827167719602585, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02658943273127079, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024543780833482742, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02422652766108513, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016619862988591194, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016479674726724625, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.0157657191157341, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011041463352739811, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.22419849038124084, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19955649971961975, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.18923284113407135, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.16719679534435272, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10263297706842422, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0926450714468956, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12372081726789474, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11206214874982834, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10600800812244415, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08768445998430252, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08298695087432861, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06324008852243423, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05397895351052284, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04961591586470604, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.048553384840488434, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03198441490530968, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02642146497964859, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.025994237512350082, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.023565057665109634, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.022886507213115692, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01756899803876877, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017691688612103462, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.01604960672557354, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012882840819656849, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11232311278581619, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10325366258621216, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09727254509925842, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08734290301799774, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05213707685470581, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04732345789670944, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06540362536907196, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05932300165295601, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05344604328274727, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04622003436088562, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04432547092437744, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03335073962807655, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02843063324689865, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02516927942633629, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.024353954941034317, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016724321991205215, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013235662132501602, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.01281893439590931, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01207090076059103, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011563757434487343, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008882636204361916, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00882827490568161, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.00763359759002924, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006069305818527937, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.092277891933918, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08404531329870224, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07572648674249649, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06817670166492462, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.042034365236759186, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.036310356110334396, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05754167214035988, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05273367837071419, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.0433402843773365, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.037381306290626526, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.036334116011857986, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.029139947146177292, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02501695603132248, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.02033197693526745, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.019066421315073967, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01461815182119608, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010671574622392654, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.010000588372349739, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00975734181702137, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008966054767370224, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0076485066674649715, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007474154233932495, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0059625995345413685, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00486211059615016, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.24171556532382965, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.22030502557754517, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.2107611894607544, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.18933796882629395, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.11232135444879532, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.10301925987005234, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.13584600389003754, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12214063853025436, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11559794843196869, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09870044887065887, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.0939365103840828, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06950376182794571, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.058414310216903687, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05388246476650238, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.052769407629966736, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03478600084781647, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02747768722474575, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02694813348352909, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02466757409274578, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.0239493977278471, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01781270280480385, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0168201494961977, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.015587033703923225, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010322075337171555, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.21339523792266846, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19071704149246216, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18327385187149048, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.15760360658168793, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09815619140863419, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08961236476898193, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11488311737775803, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10361111164093018, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10060214251279831, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.081927090883255, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0764293223619461, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.059278182685375214, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.050428297370672226, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.047937363386154175, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04735831543803215, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.030000608414411545, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02610607258975506, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02584550343453884, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02306114137172699, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.022672826424241066, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016852792352437973, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017896991223096848, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.01591387204825878, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013927808962762356, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.15069980919361115, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1403449922800064, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13610312342643738, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.12298306077718735, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07071918994188309, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06652948260307312, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08121108263731003, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07469438016414642, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07204879820346832, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06326748430728912, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06006067618727684, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04149032384157181, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.035919468849897385, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.0341329462826252, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03370654210448265, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02082296647131443, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.017966147512197495, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.017764227464795113, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.016505856066942215, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01624281518161297, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01121041364967823, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011571863666176796, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01053717453032732, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008294587023556232, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2276013195514679, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21292179822921753, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2075345516204834, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.18766948580741882, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10700026899576187, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10128471255302429, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12151782959699631, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11161280423402786, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.10879739373922348, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0959535613656044, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09107828885316849, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.062043674290180206, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05351732671260834, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05144678056240082, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.050957974046468735, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03113187849521637, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02671976573765278, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.026487072929739952, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024501752108335495, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02419472299516201, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016725214198231697, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016603389754891396, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.015921976417303085, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011367050930857658, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.21722881495952606, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19296574592590332, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.18207857012748718, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.16048669815063477, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09941811859607697, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08910755068063736, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12118402123451233, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10981253534555435, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10263324528932571, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08465934544801712, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08008098602294922, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06203870475292206, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.053060486912727356, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.048098400235176086, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04693049564957619, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03144482895731926, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.0257726963609457, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.025249438360333443, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02297421544790268, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.022197948768734932, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017532961443066597, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017524709925055504, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.015878496691584587, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012838480994105339, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11127032339572906, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10186322778463364, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09462219476699829, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.0852600634098053, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.051419444382190704, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04586825519800186, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.0669855922460556, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.060534846037626266, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05286723002791405, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04569275677204132, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0441695936024189, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.034197647124528885, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.028999537229537964, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.024912722408771515, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02386857010424137, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.017155051231384277, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013235663063824177, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.01270238310098648, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01212984137237072, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011485201306641102, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009118732064962387, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009134171530604362, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007551827933639288, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006387518718838692, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09264256060123444, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08347854018211365, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07342389225959778, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06622939556837082, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04194728657603264, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.035093385726213455, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06039203703403473, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.054765552282333374, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.043541330844163895, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03721259906888008, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03654489293694496, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.030551470816135406, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02602560818195343, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.020305028185248375, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.0187431201338768, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.015261882916092873, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010733489878475666, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009897513315081596, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009812651202082634, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008820083923637867, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007987302727997303, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007765491493046284, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005903163924813271, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005031142849475145, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.23622660338878632, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21146148443222046, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.2002110332250595, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.17976492643356323, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10832762718200684, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09768552333116531, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.13433127105236053, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12018239498138428, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11228631436824799, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09423863887786865, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09005696326494217, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06898397952318192, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05744157359004021, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05196850374341011, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.0506480447947979, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03455372154712677, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.026637736707925797, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.025995349511504173, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.023772137239575386, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02287961170077324, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.017739076167345047, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.016707735136151314, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.015026422217488289, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010415575467050076, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.23328757286071777, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.2008572518825531, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18955270946025848, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1602603793144226, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10653191804885864, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.09396056830883026, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12675264477729797, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11533182859420776, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.11059662699699402, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08550278097391129, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.079718679189682, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06511512398719788, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.055725980550050735, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.051612693816423416, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.05063644424080849, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03276614472270012, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.027542348951101303, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.027091288939118385, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02353580668568611, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02287783846259117, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01779075711965561, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01844809763133526, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.016208184882998466, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01357044093310833, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.15713560581207275, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14690980315208435, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14289118349552155, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.12913206219673157, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07385904341936111, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.0697961151599884, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08396847546100616, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07742322236299515, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07512113451957703, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0662200078368187, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06277565658092499, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04286263883113861, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03718222305178642, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03558645769953728, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.035210225731134415, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02146754413843155, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.018633633852005005, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.018448999151587486, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.017118457704782486, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01688247174024582, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011482086032629013, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01182106789201498, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.010882125236093998, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008340478874742985, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2397596836090088, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2248278707265854, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21955746412277222, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.19852858781814575, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11274108290672302, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10701889544725418, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12701718509197235, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11696838587522507, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11442403495311737, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10119539499282837, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09596526622772217, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0647168830037117, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05601408705115318, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.054104868322610855, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05365871638059616, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03246349096298218, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.027912991121411324, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.027706366032361984, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025581078603863716, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.025296617299318314, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01726936548948288, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017039423808455467, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.016512049362063408, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01139445137232542, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.21943782269954681, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19517438113689423, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.18511570990085602, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.16307245194911957, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10013332962989807, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09029565751552582, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11969611793756485, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10917185246944427, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10328176617622375, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0851324275135994, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08030002564191818, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06108550354838371, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.052342429757118225, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04818718135356903, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04716401547193527, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.030800333246588707, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.025296291336417198, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.024885451421141624, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02237711101770401, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.021728696301579475, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01672794111073017, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016486994922161102, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.015289992094039917, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011529936455190182, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10741223394870758, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09752125293016434, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09027533233165741, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08137470483779907, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.04946756735444069, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.043831631541252136, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.0645969957113266, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.058403901755809784, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05103814974427223, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04370573163032532, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.042230263352394104, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.032948873937129974, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02792328968644142, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.023931287229061127, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02290060743689537, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01653987169265747, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.012627575546503067, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012093432247638702, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011496949940919876, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.010850832797586918, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008819116279482841, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00862535834312439, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007296743802726269, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0058740004897117615, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09429532289505005, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08454375714063644, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07481729984283447, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06756343692541122, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.042806077748537064, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.035872768610715866, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.061002716422080994, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05508601292967796, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04452977702021599, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0378449410200119, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.0371575802564621, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.030903687700629234, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02636294811964035, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.020701929926872253, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.019178222864866257, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.015439757145941257, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010921318084001541, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.010112658143043518, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00996151752769947, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008978505618870258, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.00805299635976553, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007847803644835949, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0059838914312422276, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005066778510808945, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.23312431573867798, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.20599572360515594, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.19267037510871887, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.17265582084655762, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10583405196666718, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.0937318354845047, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1347084939479828, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12048045545816422, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11054018139839172, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09147099405527115, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08775372058153152, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06934854388237, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05751781538128853, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.050841324031353, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.049135588109493256, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03477668762207031, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.026102030649781227, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.025291094556450844, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.023172147572040558, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02205841988325119, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.017861276865005493, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.016664797440171242, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.014626954682171345, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010324093513190746, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.2402040809392929, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.21464590728282928, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.20579393208026886, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.17071497440338135, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.11032643169164658, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.10075609385967255, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12720425426959991, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11703792214393616, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.11312944442033768, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09056318551301956, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08129115402698517, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06526016443967819, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.056170083582401276, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.053112611174583435, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.0523747093975544, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03274589404463768, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02785079926252365, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02750314027070999, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02396308444440365, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.023470601066946983, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017575405538082123, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017823729664087296, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.01635146141052246, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012549979612231255, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.15563538670539856, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14571747183799744, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14194975793361664, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.12835003435611725, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07325383275747299, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06934473663568497, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0827997624874115, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07643123716115952, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.0744086280465126, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0656740665435791, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06223075091838837, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04223186895251274, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.036669448018074036, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.035237155854701996, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03489646315574646, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.021148208528757095, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.01833728887140751, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.01817876286804676, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.016824569553136826, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.016613150015473366, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011249755509197712, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011459489353001118, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.010704255662858486, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007933128625154495, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.25008922815322876, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23472291231155396, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22952330112457275, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.20753894746303558, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11771421134471893, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11190129816532135, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1318623274564743, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1217266097664833, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11940260976552963, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10573215782642365, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.10002954304218292, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06719762831926346, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05824943259358406, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.056440286338329315, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.0560067854821682, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.033609360456466675, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.029009727761149406, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.028806664049625397, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.026562033221125603, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.0262906551361084, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017696144059300423, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01750830002129078, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.016966896131634712, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01149442046880722, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2207377701997757, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.1963159441947937, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.18605107069015503, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.16445016860961914, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10049339383840561, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09057539701461792, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12014171481132507, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10995110124349594, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10372315347194672, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08560627698898315, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08094929158687592, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.060900840908288956, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05262107029557228, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04834257811307907, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04728339612483978, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.030648818239569664, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.025327706709504128, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.024887483566999435, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.022418279200792313, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.021732274442911148, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016456259414553642, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016462644562125206, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014987073838710785, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01139778271317482, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10738512873649597, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09691121429204941, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08937288075685501, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08047854900360107, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.04931023344397545, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.043386310338974, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06469389796257019, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.058671142905950546, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.051076047122478485, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.043487757444381714, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.042159974575042725, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03311000019311905, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.028147118166089058, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02388647384941578, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.022784091532230377, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016627363860607147, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.012662437744438648, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012111291289329529, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011527645401656628, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.010844514705240726, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008881705813109875, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008763754740357399, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007302199024707079, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006038710940629244, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.10051333904266357, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08995019644498825, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07936451584100723, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.07155728340148926, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04555797576904297, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.0381191186606884, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.0647621750831604, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.059053752571344376, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.047440096735954285, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04022856056690216, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.039364587515592575, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03284512460231781, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.028125043958425522, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.02211725525557995, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.020483599975705147, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01642092503607273, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.011753110215067863, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.010894617065787315, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01071346178650856, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009688162244856358, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.00862308219075203, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008519495837390423, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006470319349318743, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00565060181543231, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.2260676920413971, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.19808335602283478, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.18384340405464172, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1646207720041275, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.1021135225892067, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08933527767658234, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.13235025107860565, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11799085140228271, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10694687068462372, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08778126537799835, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08436689525842667, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06767169386148453, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.056318480521440506, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04905277118086815, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.047186143696308136, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.0338951013982296, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02522323466837406, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02432291954755783, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.022334575653076172, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.021094365045428276, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0173756405711174, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.016318053007125854, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.013963153585791588, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010094597004354, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.22245877981185913, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19108028709888458, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.17958694696426392, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.14322368800640106, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10288450121879578, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0899772197008133, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12187358736991882, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11125506460666656, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10600306838750839, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.0786895826458931, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07149655371904373, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.062489788979291916, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05365823209285736, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04989295452833176, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04896125569939613, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03137227147817612, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.026560766622424126, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.026122601702809334, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.021940886974334717, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.021316325291991234, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017023170366883278, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017669133841991425, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015545474365353584, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01295256894081831, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16797703504562378, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15715546905994415, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1531948745250702, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13834473490715027, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07899484038352966, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07476324588060379, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08920290321111679, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08237466216087341, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.0802423432469368, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07081857323646545, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06698916852474213, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0454842709004879, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03947153314948082, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03794325888156891, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03758078068494797, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022746795788407326, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.019648419693112373, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.019478555768728256, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01800292171537876, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.017773214727640152, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012013151310384274, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012132804840803146, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011428020894527435, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008223091252148151, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2553473114967346, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23948460817337036, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2341403216123581, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.21163307130336761, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.12015470862388611, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11419135332107544, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1345553994178772, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1242605671286583, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12193117290735245, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10784654319286346, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.1020200327038765, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06850341707468033, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05942890793085098, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.057577114552259445, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05712937191128731, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03421453759074211, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02951117418706417, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.029300900176167488, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.026995738968253136, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02671276591718197, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01786459982395172, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017707116901874542, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.017107728868722916, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011474395170807838, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.22755871713161469, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20250049233436584, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19203004240989685, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.17050889134407043, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10365361720323563, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0935329794883728, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12409493327140808, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1131894588470459, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10700863599777222, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08864191919565201, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08408191800117493, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06297295540571213, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.054135967046022415, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04982968419790268, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.048773039132356644, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.0317390151321888, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.026040010154247284, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.025617050006985664, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02308829501271248, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.022413061931729317, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017241457477211952, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01682243123650551, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.01577097736299038, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011582591570913792, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10909388214349747, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09895548224449158, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09175010025501251, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08262575417757034, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05017224699258804, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.044521965086460114, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.065196193754673, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05911868065595627, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.0518532320857048, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04433218762278557, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.042826227843761444, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03320721536874771, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.028273997828364372, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02424338459968567, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.023210298269987106, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016651660203933716, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.012752310372889042, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012223614379763603, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01160493865609169, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.01095234602689743, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008818582631647587, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00865855347365141, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007298703771084547, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005837996490299702, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09857343137264252, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08803454786539078, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07897008210420609, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.07113919407129288, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04462052881717682, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03804450109601021, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.061297859996557236, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05593888461589813, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04652892053127289, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.039270803332328796, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03822016343474388, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.031062910333275795, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02664913795888424, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.02158571593463421, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.020225590094923973, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.015559984371066093, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.011340124532580376, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.010648291558027267, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010289264842867851, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009428630582988262, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008140883408486843, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007955504581332207, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0063036782667040825, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005189589224755764, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.23709672689437866, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.2122778743505478, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.200728639960289, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1799965500831604, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10858580470085144, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09772373735904694, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1338045597076416, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12087646871805191, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11258566379547119, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09445260465145111, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09017506241798401, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06835481524467468, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.057752951979637146, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05209878832101822, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05068298056721687, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03418070077896118, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.026683133095502853, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.026021189987659454, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.023791611194610596, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.022878989577293396, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.017543600872159004, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01671038381755352, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.01496666669845581, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010340533219277859, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.21104438602924347, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.18339064717292786, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.17425444722175598, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.14053170382976532, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09631086140871048, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08546467125415802, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11206463724374771, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10263749957084656, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09964609146118164, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07635313272476196, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0673852190375328, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05734339356422424, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04931328818202019, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04646912217140198, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.045792195945978165, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.028761183843016624, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.024421963840723038, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.024121100082993507, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02053840085864067, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02008403092622757, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015440210700035095, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015763018280267715, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.014280622825026512, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011201359331607819, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.17122210562229156, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1602490097284317, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15623311698436737, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.14107246696949005, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08058445900678635, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07626042515039444, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09106848388910294, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0839742124080658, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.08184116333723068, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07215291261672974, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06829813867807388, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04637061432003975, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04024761542677879, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03872036561369896, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03835688531398773, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.023266129195690155, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.020091883838176727, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.01992405205965042, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01840423047542572, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.018180001527071, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012435880489647388, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012467870488762856, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011855601333081722, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008535416796803474, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.25978994369506836, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.24367102980613708, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.23818281292915344, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.2151537388563156, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.12232260406017303, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11620613932609558, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13693605363368988, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12648965418338776, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12412811070680618, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10971970111131668, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.10365559905767441, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06979217380285263, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06052546948194504, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05864296481013298, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.058195117861032486, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03489886224269867, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.03006782941520214, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02985457517206669, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.027479682117700577, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.027197113260626793, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01830809749662876, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018043097108602524, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.017542680725455284, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011708474718034267, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.22470517456531525, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19985967874526978, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.18915267288684845, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.16831502318382263, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.1024053543806076, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0921081081032753, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12268243730068207, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11245165020227432, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10578833520412445, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08768218010663986, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08337170630693436, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06249650940299034, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05388689041137695, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04922695457935333, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04809552803635597, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03144201263785362, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.025749772787094116, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.025291355326771736, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02285996824502945, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02212986908853054, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01688787341117859, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01671028323471546, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.015276765450835228, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011476625688374043, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11506880074739456, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1047908142209053, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09727826714515686, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08774496614933014, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.0532032772898674, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04722939804196358, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06864962726831436, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.0625542551279068, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05479979142546654, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.047079943120479584, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.045458171516656876, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.034987710416316986, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.029976554214954376, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.025737926363945007, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.024650264531373978, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0175176914781332, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013599058613181114, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.013045085594058037, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012404345907270908, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.01172975730150938, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009247560054063797, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009298352524638176, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007694656029343605, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006379768718034029, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.10591372847557068, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09568781405687332, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08559510856866837, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.07725327461957932, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04833386465907097, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.04117719084024429, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.0671609416604042, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06121590733528137, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.05011782795190811, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0428633876144886, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.041765548288822174, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.034119218587875366, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.029219428077340126, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.02337982878088951, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.02180302143096924, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.017027683556079865, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.012265010736882687, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.011423708871006966, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.011185468174517155, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.01017798762768507, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008915779180824757, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008654256351292133, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0068337079137563705, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005549912340939045, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.25177228450775146, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.22790008783340454, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.21783538162708282, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1954384297132492, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.11630088835954666, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.1062699630856514, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.14086510241031647, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12698812782764435, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11999452114105225, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.1018645241856575, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09696852415800095, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07169375568628311, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.060673780739307404, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.055750276893377304, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05452797934412956, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.035855088382959366, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.0284816175699234, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.027890417724847794, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02550680935382843, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02470562793314457, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.018399393185973167, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01751648634672165, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.016099723055958748, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.01074402965605259, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.2182374894618988, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19003166258335114, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1782897263765335, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.15049158036708832, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10061300545930862, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08863139152526855, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12084223330020905, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11043620109558105, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10348598659038544, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08090060204267502, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07388876378536224, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06173795834183693, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05297141522169113, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04846414551138878, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.047358207404613495, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.030906984582543373, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.025284254923462868, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.024722037836909294, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.021375346928834915, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.020615648478269577, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016351312398910522, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016302457079291344, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.014576860703527927, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011061557568609715, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16697543859481812, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15623103082180023, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15226376056671143, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13742560148239136, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07860790938138962, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07437162101268768, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08866182714700699, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08189330995082855, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.0798490047454834, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0703415647149086, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06657259166240692, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.045199837535619736, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.039259862154722214, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03777562081813812, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03742165118455887, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022626837715506554, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.01958025060594082, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.019418461248278618, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01791660115122795, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.017697710543870926, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011982264928519726, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01211473811417818, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011415814980864525, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00825535785406828, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2622375190258026, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.24590179324150085, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.24032838642597198, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.21711093187332153, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.1235501766204834, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11735615879297256, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13871394097805023, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1277432143688202, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12537555396556854, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11073023080825806, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.10485901683568954, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07067796587944031, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.061156682670116425, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05926789715886116, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.058824632316827774, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03541522100567818, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.030476488173007965, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.03026880882680416, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.027858080342411995, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02757168561220169, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01878434419631958, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.0184229277074337, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.018032867461442947, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01212524063885212, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2267468124628067, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20145246386528015, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19046024978160858, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.17036892473697662, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10308866202831268, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09261950850486755, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12422934174537659, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1135135143995285, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10658351331949234, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08846940100193024, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08441833406686783, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06290768086910248, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05425737425684929, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.049531109631061554, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.048361338675022125, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03168439865112305, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02582532726228237, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.025341248139739037, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.022943435236811638, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.022187435999512672, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01707475259900093, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01668396405875683, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.015474071726202965, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011310530826449394, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.12034586071968079, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10987352579832077, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10228307545185089, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.09226810932159424, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.055730514228343964, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04969993978738785, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07152087241411209, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06513748317956924, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05733850225806236, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04937439411878586, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04758580029010773, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.036417700350284576, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.0311916284263134, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.026902874931693077, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.0258173905313015, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01824694499373436, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.014096946455538273, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.013528427109122276, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012837103568017483, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.012146483175456524, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009580492973327637, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009475324302911758, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.00798816792666912, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006324633955955505, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.10652542859315872, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09664323925971985, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08686123043298721, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.07840367406606674, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04881307855248451, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.04181428998708725, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06679240614175797, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.061337776482105255, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.05043098330497742, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04330621287226677, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.04209325462579727, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03383249416947365, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02928820066154003, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.023579489439725876, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.02205110527575016, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01690560393035412, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.012337940745055676, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.011535362340509892, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.011245488189160824, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.010289319790899754, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008800219744443893, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008631553500890732, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006832864601165056, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005533932708203793, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.2511608898639679, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.228053480386734, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.2177199274301529, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1955273449420929, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.11612720042467117, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.10630348324775696, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.14129751920700073, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12697061896324158, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11971767991781235, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10193823277950287, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09702982008457184, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0722113698720932, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06063264608383179, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05567837879061699, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05447738617658615, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.0359949953854084, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.028432125225663185, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.027850646525621414, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.025507209822535515, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.0247016754001379, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01849721372127533, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01749543286859989, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.016131090000271797, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010742837563157082, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.23338764905929565, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19730332493782043, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18470162153244019, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.15609079599380493, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10701139271259308, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.09191561490297318, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12778332829475403, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11576775461435318, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.11137150973081589, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08374863862991333, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07656052708625793, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0650717169046402, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05560821667313576, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.051570355892181396, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.05055925250053406, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.032766010612249374, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.026807811111211777, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.026366839185357094, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.022159816697239876, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02146257273852825, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01764598861336708, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01706630550324917, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.01594626158475876, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011547278612852097, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16630913317203522, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1554914265871048, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15154147148132324, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13668814301490784, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07837861776351929, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07409675419330597, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08852097392082214, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08168913424015045, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07964184880256653, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0700603723526001, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06628510355949402, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.045194532722234726, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03918144106864929, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03768271952867508, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.0373280830681324, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022654559463262558, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.019568277522921562, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.01940188743174076, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.017899412661790848, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.017674190923571587, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012089978903532028, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012155415490269661, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011521516367793083, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008330915123224258, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.262803316116333, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.24621759355068207, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2407260537147522, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.21732403337955475, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.1239081397652626, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11767838150262833, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1385532170534134, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12811154127120972, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12574756145477295, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.11092781275510788, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.10478149354457855, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.07058562338352203, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.061299946159124374, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05938027799129486, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05892793834209442, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.0352325439453125, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.030388403683900833, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.030175570398569107, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.027715520933270454, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.027426304295659065, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01829480193555355, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018148919567465782, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.017511973157525063, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011649630963802338, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.22795990109443665, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20205888152122498, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19047945737838745, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.17106786370277405, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10343992710113525, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0925723984837532, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12489299476146698, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11469625681638718, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10711552202701569, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08899013698101044, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08512155711650848, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06345754861831665, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05482933670282364, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04970695823431015, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04843364655971527, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.031805526465177536, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.025921545922756195, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.025394851341843605, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02307489700615406, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.022257665172219276, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01684560626745224, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016813531517982483, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.01506488025188446, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011383021250367165, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.113357312977314, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10288307815790176, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09493958950042725, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08552917838096619, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05232799053192139, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04608755186200142, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06835773587226868, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06229005753993988, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.054027240723371506, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04623990133404732, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04472893476486206, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.034846942871809006, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.029881060123443604, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.025314636528491974, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02411961928009987, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.017437992617487907, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013329806737601757, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012726997956633568, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01214070338755846, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011398023925721645, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009107168763875961, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009147065691649914, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007406111340969801, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006169023923575878, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.10154606401920319, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09135876595973969, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08048218488693237, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.07256823033094406, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.046153582632541656, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.038615137338638306, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06588034331798553, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.060103029012680054, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04796554520726204, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04088454693555832, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.04005281999707222, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.033332932740449905, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.028604188933968544, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.022376686334609985, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.020668024197220802, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.016655413433909416, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.011818995699286461, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.010910171084105968, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010791408829391003, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009702268056571484, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008795625530183315, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008532016538083553, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006615372374653816, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005521473940461874, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.24391941726207733, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21730060875415802, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.20430965721607208, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.18317227065563202, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.1113889068365097, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.0994197428226471, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1406850963830948, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12567733228206635, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11590466648340225, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09677416831254959, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09269075095653534, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07212638854980469, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06002766266465187, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.053483784198760986, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05184318870306015, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03609909117221832, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02742764726281166, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.026659680530428886, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02444143407046795, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.023365559056401253, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.018556172028183937, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.017377108335494995, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.015420347452163696, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.01078313309699297, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.2019636631011963, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1752595454454422, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.16735410690307617, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.14252924919128418, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09325975179672241, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08314915746450424, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10639803111553192, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09800498187541962, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09614169597625732, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07582256197929382, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0685233473777771, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05438525602221489, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.0469660609960556, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.044811785221099854, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04430004209280014, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02721424587070942, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.023170210421085358, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.022955067455768585, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.019658848643302917, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.01930733397603035, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014335683546960354, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.014312241226434708, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.013415693305432796, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.009563080966472626, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16665813326835632, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.155771866440773, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15180747210979462, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13692925870418549, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07856863737106323, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07425478845834732, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08871101588010788, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08191698789596558, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07981275022029877, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07018261402845383, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06634769588708878, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.045236118137836456, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03927348926663399, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.037755485624074936, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03739742562174797, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022627675905823708, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.01956762745976448, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.01939677819609642, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.017883172258734703, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01765475422143936, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011951087974011898, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012103540822863579, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011358192190527916, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00822412595152855, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2592669129371643, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.24287956953048706, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.23734238743782043, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.21415884792804718, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.12237776070833206, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11609233915805817, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13718537986278534, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1266418993473053, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.12419014424085617, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10948991775512695, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.10349506884813309, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06997556239366531, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.06064430624246597, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05872279778122902, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05826960504055023, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.034977369010448456, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.030221175402402878, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.030016198754310608, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02760501764714718, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.027315223589539528, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01841798424720764, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.018310653045773506, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.017637979239225388, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012100056745111942, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.219908207654953, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.1939179003238678, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.18167820572853088, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1635185182094574, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09978371858596802, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08844079822301865, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12243061512708664, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11196634918451309, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10360479354858398, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08564632385969162, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08223509788513184, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06225590407848358, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.053742922842502594, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.0481206476688385, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04669762775301933, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.031488172709941864, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02531936764717102, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.024730373173952103, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02256711572408676, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02167695015668869, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017074810341000557, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016824377700686455, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.015188731253147125, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011689983308315277, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11566725373268127, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10517524182796478, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.0983230322599411, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08863865584135056, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05347678065299988, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.047854237258434296, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06779473274946213, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06179483234882355, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05513398349285126, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04725474491715431, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.045385416597127914, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03463054075837135, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02964331954717636, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.025853589177131653, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02489331364631653, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01735844649374485, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013595967553555965, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.013084790669381618, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012368212454020977, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011758875101804733, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009130187332630157, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009143242612481117, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007665663026273251, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006216969806700945, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.10258647054433823, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09348150342702866, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08635962009429932, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.07771174609661102, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04725227132439613, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.04181288927793503, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06160664185881615, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05624667927622795, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.048659540712833405, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0418391153216362, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.04028701037168503, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.031191743910312653, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.026874389499425888, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.022798238322138786, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.021756310015916824, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.015634771436452866, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.011948535218834877, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.011406460776925087, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01087804976850748, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.010223880410194397, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008193561807274818, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.00810808502137661, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0067252847366034985, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005374417640268803, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.22126634418964386, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.200826957821846, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.19151705503463745, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.17189568281173706, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10237577557563782, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09357098489999771, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12317218631505966, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11204346269369125, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10545293241739273, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08972785621881485, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08531586825847626, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06272704154253006, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.053577862679958344, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.049075983464717865, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.047991346567869186, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.031307630240917206, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02503710240125656, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.024504289031028748, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.022441163659095764, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.021724706515669823, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.016083959490060806, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015391666442155838, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.014143969863653183, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00938747264444828, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.18620330095291138, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1653110831975937, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.15841127932071686, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.13565312325954437, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.08614174276590347, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0783245638012886, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10002999007701874, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09149785339832306, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.0889444351196289, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07218757271766663, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.06485670804977417, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05138969421386719, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04405824840068817, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04158481955528259, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.041002411395311356, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.025843074545264244, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.021830681711435318, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.021580127999186516, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.019083084538578987, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.01869942434132099, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014007342047989368, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.014026558957993984, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.01302382256835699, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.009917073883116245, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.15894393622875214, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.148336261510849, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.144348606467247, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1301756650209427, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07489058375358582, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07068706303834915, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08481982350349426, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07834342122077942, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07618008553981781, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06685961037874222, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06318403035402298, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04332268610596657, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03759140893816948, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03604205325245857, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03567509353160858, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02168399468064308, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.01872747391462326, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.018551917746663094, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01710563339293003, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01687716692686081, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011499973945319653, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01166987419128418, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.010902724228799343, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008012726902961731, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.24626260995864868, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23054230213165283, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2250489741563797, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.2029789835214615, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11615406721830368, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11011189967393875, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1302601397037506, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12036988139152527, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11800564080476761, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10383717715740204, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.0980256050825119, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06641410291194916, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05766301229596138, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.055746063590049744, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05529123172163963, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03320707753300667, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.028668833896517754, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02845669351518154, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.026153696700930595, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.025860808789730072, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01739571802318096, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017348358407616615, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01662110537290573, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011412587016820908, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.21926440298557281, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19173920154571533, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.17899172008037567, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.16118262708187103, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09916632622480392, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08724918961524963, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12255728989839554, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1116316169500351, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10322815179824829, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08451315015554428, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08136296272277832, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.062235310673713684, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05344650149345398, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04778875410556793, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.046382222324609756, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03138255700469017, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02514064311981201, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02455589361488819, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02228929102420807, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02138417586684227, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017022881656885147, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016693688929080963, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.015085755847394466, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011552616022527218, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10755029320716858, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.0976623147726059, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08886714279651642, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08018578588962555, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.0495770201086998, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04304835572838783, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06725284457206726, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06084119901061058, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05122172087430954, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04393092915415764, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04284226894378662, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.034340936690568924, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.029143625870347023, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.024061091244220734, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02270631305873394, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01720157451927662, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.01276921946555376, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012069852091372013, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011674508452415466, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.010832477360963821, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009061679244041443, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008998777717351913, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007139190565794706, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006117700133472681, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09410324692726135, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08456689119338989, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.0725243017077446, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06572984158992767, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04272708669304848, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.034563079476356506, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06414905190467834, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.058036480098962784, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04447053745388985, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03800532594323158, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03758743032813072, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0323597677052021, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.027719972655177116, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.020746346563100815, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.018742671236395836, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.016208291053771973, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.011003789491951466, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009948669001460075, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010096057318150997, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008852398954331875, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008438886143267155, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008207647129893303, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005928113125264645, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005270502530038357, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.23469804227352142, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.20843546092510223, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.19442135095596313, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.17429190874099731, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10709373652935028, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09470231831073761, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.13732315599918365, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12300257384777069, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.111642025411129, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09290381520986557, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08932308107614517, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07059017568826675, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05887717381119728, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05152440071105957, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04966803267598152, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03541342169046402, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.026629840955138206, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.025750942528247833, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02377721108496189, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.022573843598365784, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.018228495493531227, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01730748638510704, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.014841475524008274, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.01102153304964304, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.21666377782821655, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.18941941857337952, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18061821162700653, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1493140310049057, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09967105835676193, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08895421773195267, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11534538120031357, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10562722384929657, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10269781202077866, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.0802387148141861, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07176459580659866, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05912821739912033, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.050726015120744705, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04802127555012703, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04739350825548172, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.029589137062430382, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.025149600580334663, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.024842089042067528, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.021330498158931732, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02090054377913475, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015861080959439278, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016067180782556534, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.014746599830687046, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011293268762528896, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.14495757222175598, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1349981278181076, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13105280697345734, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.11812108755111694, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.06824877858161926, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06420060992240906, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0778413712978363, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07184398174285889, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.06949461251497269, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.060830552130937576, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.05757257342338562, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0397295206785202, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.034488849341869354, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03287167474627495, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.032486241310834885, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.019893141463398933, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.017109887674450874, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.016926731914281845, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.015624403022229671, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.015384241007268429, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01054272148758173, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.010742682963609695, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.009922460652887821, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007408487144857645, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.22349849343299866, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2086833268404007, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.20341183245182037, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.18342866003513336, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10540299862623215, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.09962332248687744, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11894781142473221, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10974740982055664, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.10714282840490341, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09410672634840012, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.08887927979230881, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06076090410351753, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05260045826435089, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05064249038696289, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05017579719424248, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.030409859493374825, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.026208750903606415, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.025995297357439995, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.023930532857775688, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.023634033277630806, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016106294468045235, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01614178530871868, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.015322406776249409, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010904481634497643, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.19828279316425323, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.172440767288208, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.16021184623241425, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1431446671485901, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.08982633054256439, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.07840666174888611, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.111396424472332, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10170295089483261, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.09355657547712326, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07573641836643219, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.07260031998157501, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.056988805532455444, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.048918627202510834, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04341313987970352, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04203324019908905, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.0289412010461092, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02305697649717331, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02248617447912693, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02035181224346161, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.019475020468235016, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016040734946727753, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.015627780929207802, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014212478883564472, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011107469908893108, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10622388124465942, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.0964430719614029, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08841642737388611, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.07976982742547989, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.04878826066851616, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.042769692838191986, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06640787422657013, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.059171680361032486, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05054723843932152, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04337875545024872, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.042406681925058365, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03389744460582733, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.028341593220829964, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.023607581853866577, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.022365398705005646, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01699078641831875, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.012454005889594555, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.011823005974292755, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01139844674617052, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.010625499300658703, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008903900161385536, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008645993657410145, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007009850349277258, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005807584151625633, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09215371310710907, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0841929242014885, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07540691643953323, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06807128340005875, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.042216960340738297, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.036179061979055405, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05937441810965538, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05383748933672905, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04359647259116173, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03780341148376465, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03684953600168228, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0301395021378994, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.025646105408668518, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.020451026037335396, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.019041607156395912, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.015070181339979172, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010768463835120201, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.010025418363511562, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009896907024085522, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.00902045238763094, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007876317016780376, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.00765796285122633, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005970261991024017, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004990661982446909, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.24839548766613007, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.22685697674751282, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.21679215133190155, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.19539132714271545, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.11552663147449493, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.10595044493675232, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1400473564863205, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12652631103992462, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.1188843622803688, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10212378948926926, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09743072837591171, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07168642431497574, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.06057228893041611, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.055461399257183075, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05418656766414642, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03596067428588867, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.028456250205636024, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.0278488602489233, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.025697652250528336, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.024880483746528625, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01847752183675766, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.017735064029693604, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.016088372096419334, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011152202263474464, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.17767447233200073, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.15584182739257812, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.14883702993392944, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.12518182396888733, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.08267804980278015, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.07427924871444702, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09557522088289261, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.0863979160785675, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.0845661610364914, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.06585388630628586, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.06056860089302063, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.049039326608181, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04150582477450371, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.03982069343328476, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.03942037373781204, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.024572942405939102, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.0207817479968071, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02060331031680107, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01744501292705536, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.017181549221277237, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.013073320500552654, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.013079751282930374, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.01236504502594471, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.009142585098743439, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.13989295065402985, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.13012894988059998, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.12617041170597076, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.11370158940553665, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.06581268459558487, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.061803363263607025, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07525404542684555, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06944382935762405, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.06705821305513382, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.05859307944774628, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.05547142028808594, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.038477737456560135, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.033372923731803894, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03173782676458359, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.031346019357442856, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.019268561154603958, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.0166116114705801, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.016424020752310753, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.015183140523731709, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.014934436418116093, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.010292137041687965, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.010576747357845306, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.009672770276665688, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007431812584400177, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.19171138107776642, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1788739711046219, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.17406263947486877, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.15689149498939514, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.09102136641740799, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08593463897705078, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10296768695116043, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09506969153881073, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.09255000948905945, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08130539208650589, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07691989839076996, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05317818000912666, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.046483706682920456, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.044695839285850525, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04427046701312065, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02679688110947609, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.024729864671826363, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.024542711675167084, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02295958250761032, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.022707145661115646, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015127881430089474, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01732509769499302, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.014480829238891602, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.0139829246327281, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.1941763013601303, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.16970476508140564, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.15878826379776, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.14094726741313934, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.08849406242370605, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.07798147946596146, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.10854038596153259, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.09855976700782776, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.09161312878131866, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07422184944152832, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.07056449353694916, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05498504638671875, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04721105471253395, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04274091497063637, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04162801802158356, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.027660388499498367, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.022650927305221558, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02218356728553772, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.019904447719454765, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.01918126456439495, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.014894338324666023, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01513774786144495, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.013317419216036797, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.010771756991744041, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10229138284921646, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09266137331724167, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08298857510089874, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.0748482346534729, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.04701241850852966, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04001303017139435, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06646805256605148, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05943142995238304, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.04865235462784767, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.041643742471933365, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0407802052795887, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0340682789683342, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.0285031795501709, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.0228537917137146, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02132333815097809, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01710044965147972, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.012195638380944729, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.011399529874324799, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01116457860916853, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.010218619368970394, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009023599326610565, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008806310594081879, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.006854228675365448, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006008213385939598, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09242072701454163, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08317166566848755, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07093584537506104, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06409300118684769, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04187065362930298, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03366008773446083, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06396082043647766, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.057595789432525635, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.043663449585437775, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03736129403114319, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03709157183766365, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.032451264560222626, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02750035561621189, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.020387478172779083, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.01832122541964054, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.016299815848469734, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010851302184164524, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009776209481060505, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00997735746204853, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008717305958271027, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008482545614242554, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008189016953110695, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005844332743436098, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005327501334249973, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.23038607835769653, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.20351360738277435, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.18832816183567047, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.16897520422935486, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10463175922632217, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09158311039209366, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1369885802268982, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12238563597202301, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10934679955244064, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09074045717716217, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08767065405845642, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07075619697570801, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05854718014597893, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05040236935019493, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04827166721224785, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.035643626004457474, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.026149345561861992, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.025098497048020363, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0233750157058239, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02198242023587227, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.018419666215777397, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.017324641346931458, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.014557396993041039, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.011041156947612762, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.17568247020244598, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.15537308156490326, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.14896798133850098, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1271154135465622, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.08120148628950119, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.07381932437419891, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09348965436220169, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.08522216975688934, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.08324091881513596, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.06622032821178436, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.06187685951590538, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.04798422381281853, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04110221192240715, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.039291828870773315, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.03886299580335617, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.024061542004346848, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.020896680653095245, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02069927752017975, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.018024159595370293, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.017743203788995743, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01304757222533226, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01369171217083931, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.012339254841208458, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.010139242745935917, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1352289766073227, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.12604835629463196, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.12239307165145874, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.11040738224983215, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.06417612731456757, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.060382261872291565, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07313144207000732, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06739860773086548, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.06525041908025742, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.05725349113345146, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.054239727556705475, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03778266906738281, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.0329633466899395, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03153007850050926, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.031184108927845955, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.019094226881861687, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.01741228625178337, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.017259296029806137, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.016146734356880188, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.015945641323924065, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.010802773758769035, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012195530347526073, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.010281804949045181, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00980108417570591, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1379607617855072, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1287311315536499, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.12528152763843536, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.11289409548044205, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.06545328348875046, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06175070255994797, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07409466058015823, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06829538941383362, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.06650522351264954, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.05838746577501297, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.0552593357861042, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.038271673023700714, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03327221795916557, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03198331594467163, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03168449178338051, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.01930483803153038, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.017484385520219803, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.01734752207994461, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.016173649579286575, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.015988437458872795, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.010852287523448467, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012000479735434055, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.010369371622800827, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00947942677885294, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.1561465859413147, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.137118399143219, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.12874087691307068, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.11215908080339432, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.07192478328943253, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.06392403692007065, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.0887330174446106, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.0789029449224472, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.07379864156246185, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.05968324467539787, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.05630314722657204, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.044651370495557785, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.03851735591888428, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.03536607325077057, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.034567940980196, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.022907260805368423, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.019683837890625, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.01934669353067875, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.017488939687609673, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.017001062631607056, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.013297053053975105, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.014142969623208046, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.012341330759227276, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011260007508099079, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.09765104949474335, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08833387494087219, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.07940831035375595, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.07151730358600616, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.04478808864951134, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.03828791156411171, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06226005032658577, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05627072975039482, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.04638752341270447, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.039617400616407394, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.03875594586133957, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.031798508018255234, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.026960868388414383, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02175038494169712, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.020360896363854408, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01592499203979969, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.011608592234551907, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.010889067314565182, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.010613477788865566, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.00975162535905838, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008434112183749676, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008342966437339783, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.006523555610328913, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005706045310944319, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.08900502324104309, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08004441857337952, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06883472204208374, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06214643642306328, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.0403338298201561, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.032811567187309265, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.061430923640728, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.054728664457798004, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.042019110172986984, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03586968034505844, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03552760183811188, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.031139161437749863, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02606119215488434, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.019687801599502563, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.01782369427382946, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.015613851137459278, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010499468073248863, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009562757797539234, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009641694836318493, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008533747866749763, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008204236626625061, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007876729592680931, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0058186543174088, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00524193374440074, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.23135215044021606, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.20228785276412964, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1856437474489212, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1663486361503601, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10463038086891174, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09033692628145218, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.13805264234542847, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12375915050506592, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10997463017702103, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08998245745897293, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08715833723545074, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0710843876004219, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.059152472764253616, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.050405677407979965, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04813919961452484, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03575148805975914, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.026065383106470108, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02496534213423729, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.023123640567064285, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.021630365401506424, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.018408091738820076, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.017271310091018677, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.014483071863651276, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010832591913640499, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11565419286489487, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10216823220252991, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09827463328838348, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08279892057180405, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05156710371375084, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04648619890213013, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.062066663056612015, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.054043568670749664, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.052957355976104736, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04235302284359932, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.03960967808961868, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.031020332127809525, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.028043191879987717, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.026965206488966942, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02669152058660984, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016415420919656754, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.016991347074508667, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.016885051503777504, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.015587921254336834, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.015438534319400787, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.010234599933028221, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.013825922273099422, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.009857198223471642, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012545330449938774, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.11934211105108261, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.11175167560577393, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.10889452695846558, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.09816712886095047, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.05629142001271248, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.05322703719139099, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.06368917971849442, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0586639903485775, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.05711691081523895, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.050275254994630814, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.047601617872714996, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03251353278756142, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.028119811788201332, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.027053827419877052, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.026798250153660774, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.016262473538517952, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.014008469879627228, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.0138839827850461, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01279318705201149, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.012632213532924652, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.00858017522841692, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.008650974370539188, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.008155722171068192, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.005866402294486761, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.07955671101808548, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.07429911196231842, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.07234983891248703, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.0651712641119957, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.03746860846877098, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.03539015352725983, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.04247625544667244, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.03907465934753418, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.03804676979780197, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.03342754393815994, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.0316203311085701, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.021752165630459785, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.01880425401031971, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.018084486946463585, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.017912426963448524, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.010923275724053383, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.009536359459161758, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.009455776773393154, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.00874672457575798, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.008641601540148258, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.005942875519394875, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.006131041329354048, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.005672476254403591, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.004441776312887669, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.09678564220666885, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.08731096237897873, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.08174601942300797, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.07009902596473694, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.04472127929329872, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.040337760001420975, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.05786314979195595, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.05113497003912926, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.04607391357421875, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.03832106292247772, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.03635025769472122, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.029755841940641403, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.02504352107644081, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.022249514237046242, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.021519536152482033, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.01545150950551033, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.012739941477775574, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.012435896322131157, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.011642901226878166, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.011232857592403889, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.009208755567669868, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.009621408767998219, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.008312730118632317, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.007891933433711529, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + } + ], + "last_module_idx": 66, + "base_perplexity": 3.3143032987862084 +} \ No newline at end of file