diff --git "a/quant_strategy.json" "b/quant_strategy.json" new file mode 100644--- /dev/null +++ "b/quant_strategy.json" @@ -0,0 +1,4352 @@ +{ + "measurement": { + "model.layers.0": { + "accuracy": 0.8929605484008789, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.1": { + "accuracy": 0.9168710708618164, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.2": { + "accuracy": 0.9242000579833984, + "total_bits": 657821968, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.3": { + "accuracy": 0.9581476449966431, + "total_bits": 656511248, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.4": { + "accuracy": 0.9504798650741577, + "total_bits": 657821968, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.5": { + "accuracy": 0.9427821636199951, + "total_bits": 656511248, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.6": { + "accuracy": 0.934929609298706, + "total_bits": 657821968, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.7": { + "accuracy": 0.9278604984283447, + "total_bits": 657821968, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.8": { + "accuracy": 0.9180412292480469, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.9": { + "accuracy": 0.9271588325500488, + "total_bits": 657821968, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.10": { + "accuracy": 0.9456520080566406, + "total_bits": 657821968, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.11": { + "accuracy": 0.9409334659576416, + "total_bits": 657821968, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.12": { + "accuracy": 0.9337412118911743, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.13": { + "accuracy": 0.9283764362335205, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.14": { + "accuracy": 0.9217686653137207, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.15": { + "accuracy": 0.918848991394043, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.16": { + "accuracy": 0.9184324741363525, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.17": { + "accuracy": 0.9163417816162109, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.18": { + "accuracy": 0.9149003028869629, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.19": { + "accuracy": 0.9117274284362793, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.20": { + "accuracy": 0.9072480201721191, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.21": { + "accuracy": 0.9018845558166504, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.22": { + "accuracy": 0.898529052734375, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.23": { + "accuracy": 0.8957295417785645, + "total_bits": 664375568, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.24": { + "accuracy": 0.8970789909362793, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.25": { + "accuracy": 0.895576000213623, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.26": { + "accuracy": 0.8984298706054688, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.27": { + "accuracy": 0.9007019996643066, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.28": { + "accuracy": 0.9044156074523926, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.29": { + "accuracy": 0.9065403938293457, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.30": { + "accuracy": 0.9087052345275879, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.31": { + "accuracy": 0.9106259346008301, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.32": { + "accuracy": 0.9121687412261963, + "total_bits": 661754128, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.33": { + "accuracy": 0.9193267822265625, + "total_bits": 723423504, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + "model.layers.34": { + "accuracy": 0.9250683784484863, + "total_bits": 739545360, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "model.layers.35": { + "accuracy": 0.9275093078613281, + "total_bits": 739545360, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "model.layers.36": { + "accuracy": 0.9365782737731934, + "total_bits": 739545360, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "model.layers.37": { + "accuracy": 0.9481668472290039, + "total_bits": 890540304, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.35, + 0.65 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.35, + 0.65 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.5, + 0.5 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.35, + 0.65 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.35, + 0.65 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.35, + 0.65 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.5, + 0.5 + ], + "scale_bits": 4 + } + }, + "model.layers.38": { + "accuracy": 0.953655481338501, + "total_bits": 942444816, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.39": { + "accuracy": 0.9651250839233398, + "total_bits": 1161335056, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + } +} \ No newline at end of file