nm-research commited on
Commit
3d860fc
·
verified ·
1 Parent(s): 667fb88

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +22 -13
README.md CHANGED
@@ -76,6 +76,7 @@ python quantize.py --model_path ibm-granite/granite-3.1-8b-instruct --quant_path
76
  from datasets import load_dataset
77
  from transformers import AutoTokenizer
78
  from llmcompressor.modifiers.quantization import GPTQModifier
 
79
  from llmcompressor.transformers import SparseAutoModelForCausalLM, oneshot, apply
80
  import argparse
81
  from compressed_tensors.quantization import QuantizationScheme, QuantizationArgs, QuantizationType, QuantizationStrategy
@@ -98,9 +99,8 @@ model = SparseAutoModelForCausalLM.from_pretrained(
98
  )
99
  tokenizer = AutoTokenizer.from_pretrained(args.model_path)
100
 
101
-
102
  NUM_CALIBRATION_SAMPLES = args.calib_size
103
- DATASET_ID = "garage-bAInd/Open-Platypus"
104
  DATASET_SPLIT = "train"
105
  ds = load_dataset(DATASET_ID, split=DATASET_SPLIT)
106
  ds = ds.shuffle(seed=42).select(range(NUM_CALIBRATION_SAMPLES))
@@ -122,7 +122,15 @@ def tokenize(sample):
122
 
123
  ds = ds.map(tokenize, remove_columns=ds.column_names)
124
 
 
 
 
 
 
 
 
125
  recipe = [
 
126
  GPTQModifier(
127
  targets=["Linear"],
128
  ignore=["lm_head"],
@@ -136,7 +144,7 @@ oneshot(
136
  dataset=ds,
137
  recipe=recipe,
138
  num_calibration_samples=args.calib_size,
139
- max_seq_length=8192,
140
  )
141
 
142
  # Save to disk compressed.
@@ -189,16 +197,17 @@ evalplus.evaluate \
189
 
190
  | Metric | ibm-granite/granite-3.1-8b-instruct | neuralmagic-ent/granite-3.1-8b-instruct-quantized.w8a8 |
191
  |-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
192
- | ARC-Challenge (Acc-Norm, 25-shot) | | |
193
- | GSM8K (Strict-Match, 5-shot) | | |
194
- | HellaSwag (Acc-Norm, 10-shot) | | |
195
- | MMLU (Acc, 5-shot) | | |
196
- | TruthfulQA (MC2, 0-shot) | | |
197
- | Winogrande (Acc, 5-shot) | | |
198
- | **Average Score** | **** | **** |
199
- | **Recovery** | **100.00** | **** |
200
 
201
  #### HumanEval pass@1 scores
202
-
203
-
 
204
 
 
76
  from datasets import load_dataset
77
  from transformers import AutoTokenizer
78
  from llmcompressor.modifiers.quantization import GPTQModifier
79
+ from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
80
  from llmcompressor.transformers import SparseAutoModelForCausalLM, oneshot, apply
81
  import argparse
82
  from compressed_tensors.quantization import QuantizationScheme, QuantizationArgs, QuantizationType, QuantizationStrategy
 
99
  )
100
  tokenizer = AutoTokenizer.from_pretrained(args.model_path)
101
 
 
102
  NUM_CALIBRATION_SAMPLES = args.calib_size
103
+ DATASET_ID = "neuralmagic/LLM_compression_calibration"
104
  DATASET_SPLIT = "train"
105
  ds = load_dataset(DATASET_ID, split=DATASET_SPLIT)
106
  ds = ds.shuffle(seed=42).select(range(NUM_CALIBRATION_SAMPLES))
 
122
 
123
  ds = ds.map(tokenize, remove_columns=ds.column_names)
124
 
125
+ ignore=["lm_head"]
126
+ mappings=[
127
+ [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"],
128
+ [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"],
129
+ [["re:.*down_proj"], "re:.*up_proj"]
130
+ ]
131
+
132
  recipe = [
133
+ SmoothQuantModifier(smoothing_strength=0.8, ignore=ignore, mappings=mappings),
134
  GPTQModifier(
135
  targets=["Linear"],
136
  ignore=["lm_head"],
 
144
  dataset=ds,
145
  recipe=recipe,
146
  num_calibration_samples=args.calib_size,
147
+ max_seq_length=8196,
148
  )
149
 
150
  # Save to disk compressed.
 
197
 
198
  | Metric | ibm-granite/granite-3.1-8b-instruct | neuralmagic-ent/granite-3.1-8b-instruct-quantized.w8a8 |
199
  |-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
200
+ | ARC-Challenge (Acc-Norm, 25-shot) | 66.81 | 66.81 |
201
+ | GSM8K (Strict-Match, 5-shot) | 64.52 | 64.37 |
202
+ | HellaSwag (Acc-Norm, 10-shot) | 84.18 | 83.91 |
203
+ | MMLU (Acc, 5-shot) | 65.52 | 65.00 |
204
+ | TruthfulQA (MC2, 0-shot) | 60.57 | 60.29 |
205
+ | Winogrande (Acc, 5-shot) | 80.19 | 79.87 |
206
+ | **Average Score** | **70.30** | **70.04** |
207
+ | **Recovery** | **100.00** | **99.64** |
208
 
209
  #### HumanEval pass@1 scores
210
+ | Metric | ibm-granite/granite-3.1-8b-instruct | neuralmagic-ent/granite-3.1-8b-instruct-quantized.w8a8 |
211
+ |-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
212
+ | HumanEval Pass@1 | 71.00 | 72.00 |
213