diff --git "a/quant_strategy.json" "b/quant_strategy.json" new file mode 100644--- /dev/null +++ "b/quant_strategy.json" @@ -0,0 +1,5083 @@ +{ + "measurement": { + "model.layers.0": { + "accuracy": 0.9299263954162598, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.1": { + "accuracy": 0.9162859916687012, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.2": { + "accuracy": 0.87398362159729, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.3": { + "accuracy": 0.8817203044891357, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.4": { + "accuracy": 0.866055965423584, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.5": { + "accuracy": 0.8579413890838623, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.6": { + "accuracy": 0.854529857635498, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.7": { + "accuracy": 0.839653491973877, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.8": { + "accuracy": 0.9200255870819092, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.9": { + "accuracy": 0.958084225654602, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.10": { + "accuracy": 0.9555337429046631, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.11": { + "accuracy": 0.948845386505127, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.12": { + "accuracy": 0.9455077648162842, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.13": { + "accuracy": 0.94464111328125, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.14": { + "accuracy": 0.9431122541427612, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.15": { + "accuracy": 0.941770076751709, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.16": { + "accuracy": 0.9416558742523193, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.17": { + "accuracy": 0.9400827884674072, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.18": { + "accuracy": 0.9372212886810303, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.19": { + "accuracy": 0.9422736167907715, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.20": { + "accuracy": 0.9491322040557861, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.21": { + "accuracy": 0.9568543434143066, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.22": { + "accuracy": 0.9596590995788574, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.23": { + "accuracy": 0.9590309858322144, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.24": { + "accuracy": 0.9619624614715576, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.25": { + "accuracy": 0.9607523679733276, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.26": { + "accuracy": 0.9579014778137207, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.27": { + "accuracy": 0.9534207582473755, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.28": { + "accuracy": 0.9508507251739502, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.29": { + "accuracy": 0.9470303058624268, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.30": { + "accuracy": 0.9453625679016113, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.31": { + "accuracy": 0.9427030086517334, + "total_bits": 361673760, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + } + }, + "model.layers.32": { + "accuracy": 0.9438602924346924, + "total_bits": 373601312, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + "model.layers.33": { + "accuracy": 0.9368832111358643, + "total_bits": 379106336, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + "model.layers.34": { + "accuracy": 0.9361872673034668, + "total_bits": 405582880, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "model.layers.35": { + "accuracy": 0.9489033222198486, + "total_bits": 477279264, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.36": { + "accuracy": 0.9453229904174805, + "total_bits": 477279264, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.37": { + "accuracy": 0.9442036151885986, + "total_bits": 477279264, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.38": { + "accuracy": 0.9535946846008301, + "total_bits": 571651104, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.5, + 0.5 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.5, + 0.5 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.5, + 0.5 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.5, + 0.5 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.5, + 0.5 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.39": { + "accuracy": 0.9655042886734009, + "total_bits": 634565664, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.40": { + "accuracy": 0.9649480581283569, + "total_bits": 634565664, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.41": { + "accuracy": 0.966320276260376, + "total_bits": 634565664, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.42": { + "accuracy": 0.9660360813140869, + "total_bits": 634565664, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.75, + 0.25 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.43": { + "accuracy": 0.9821763634681702, + "total_bits": 697480224, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.44": { + "accuracy": 0.9825348258018494, + "total_bits": 697480224, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.45": { + "accuracy": 0.9833475351333618, + "total_bits": 697480224, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.46": { + "accuracy": 0.9827241897583008, + "total_bits": 697480224, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.47": { + "accuracy": 0.926713228225708, + "total_bits": 697480224, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + } +} \ No newline at end of file