weiweiz1 commited on
Commit
54a2d37
1 Parent(s): f4a8e35

auto_round format

Browse files

Signed-off-by: Zhang, Weiwei1 <weiwei1.zhang@intel.com>

config.json CHANGED
@@ -11,12 +11,11 @@
11
  "quantization_config": {
12
  "amp": true,
13
  "autoround_version": "0.4.2.dev",
 
14
  "batch_size": 8,
15
  "bits": 4,
16
- "block_name_to_quantize": "language_model.model.layers",
17
- "damp_percent": 0.01,
18
  "data_type": "int",
19
- "desc_act": false,
20
  "enable_minmax_tuning": true,
21
  "enable_norm_bias_tuning": false,
22
  "enable_quanted_input": true,
@@ -27,12 +26,11 @@
27
  "lr": 0.001,
28
  "minmax_lr": 0.001,
29
  "nsamples": 512,
30
- "quant_method": "gptq",
31
  "scale_dtype": "torch.float16",
32
  "seqlen": 2048,
33
  "sym": true,
34
- "to_quant_block_names": "language_model.model.layers",
35
- "true_sequential": false
36
  },
37
  "text_config": {
38
  "_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct",
 
11
  "quantization_config": {
12
  "amp": true,
13
  "autoround_version": "0.4.2.dev",
14
+ "backend": "auto_round:gptq:exllamav2",
15
  "batch_size": 8,
16
  "bits": 4,
 
 
17
  "data_type": "int",
18
+ "dataset": "NeelNanda/pile-10k",
19
  "enable_minmax_tuning": true,
20
  "enable_norm_bias_tuning": false,
21
  "enable_quanted_input": true,
 
26
  "lr": 0.001,
27
  "minmax_lr": 0.001,
28
  "nsamples": 512,
29
+ "quant_method": "intel/auto-round",
30
  "scale_dtype": "torch.float16",
31
  "seqlen": 2048,
32
  "sym": true,
33
+ "to_quant_block_names": "language_model.model.layers"
 
34
  },
35
  "text_config": {
36
  "_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct",
quantize_config.json → quantization_config.json RENAMED
@@ -17,10 +17,8 @@
17
  "low_gpu_mem_usage": false,
18
  "to_quant_block_names": "language_model.model.layers",
19
  "enable_norm_bias_tuning": false,
 
20
  "autoround_version": "0.4.2.dev",
21
- "block_name_to_quantize": "language_model.model.layers",
22
- "quant_method": "gptq",
23
- "desc_act": false,
24
- "true_sequential": false,
25
- "damp_percent": 0.01
26
  }
 
17
  "low_gpu_mem_usage": false,
18
  "to_quant_block_names": "language_model.model.layers",
19
  "enable_norm_bias_tuning": false,
20
+ "dataset": "NeelNanda/pile-10k",
21
  "autoround_version": "0.4.2.dev",
22
+ "quant_method": "intel/auto-round",
23
+ "backend": "auto_round:gptq:exllamav2"
 
 
 
24
  }