ProphetOfBostrom commited on
Commit
f37cbe5
1 Parent(s): 87a15f8

Update HQQbagelmix_def.py

Browse files
Files changed (1) hide show
  1. HQQbagelmix_def.py +6 -6
HQQbagelmix_def.py CHANGED
@@ -1,13 +1,13 @@
1
  from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
2
- model_path = "/mnt/ancient240/Undi95_BagelMix-8x7B"
3
  model = HQQModelForCausalLM.from_pretrained(model_path)
4
 
5
  #Quantize params
6
  from hqq.core.quantize import *
7
- #attn_prams = BaseQuantizeConfig(nbits=4, group_size=64, quant_zero=True, quant_scale=True)
8
- attn_prams = BaseQuantizeConfig(nbits=4, group_size=64, quant_zero=True, quant_scale=True)
9
  attn_prams['scale_quant_params']['group_size'] = 256
10
- experts_params = BaseQuantizeConfig(nbits=2, group_size=16, quant_zero=True, quant_scale=True)
11
 
12
  quant_config = {}
13
  #Attention
@@ -19,10 +19,10 @@ quant_config['self_attn.o_proj'] = attn_prams
19
  quant_config['block_sparse_moe.experts.w1'] = experts_params
20
  quant_config['block_sparse_moe.experts.w2'] = experts_params
21
  quant_config['block_sparse_moe.experts.w3'] = experts_params
22
- #print("configured")
23
  #Quantize
24
  save_path="models/BagelMix-8x7B-2g16-4g64-HQQ/"
25
  model.quantize_model(quant_config=quant_config)
26
  model.save_quantized(save_path)
27
 
28
- #brought to you by python -i
 
1
  from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
2
+ model_path = "Undi95/BagelMix-8x7B" #i used a directory here not hf's identifiers but in principle you can do that if your internet's fast and you trust the cache...
3
  model = HQQModelForCausalLM.from_pretrained(model_path)
4
 
5
  #Quantize params
6
  from hqq.core.quantize import *
7
+
8
+ attn_prams = BaseQuantizeConfig(nbits=4, group_size=64, quant_zero=True, quant_scale=True) #4g64
9
  attn_prams['scale_quant_params']['group_size'] = 256
10
+ experts_params = BaseQuantizeConfig(nbits=2, group_size=16, quant_zero=True, quant_scale=True) #2g16
11
 
12
  quant_config = {}
13
  #Attention
 
19
  quant_config['block_sparse_moe.experts.w1'] = experts_params
20
  quant_config['block_sparse_moe.experts.w2'] = experts_params
21
  quant_config['block_sparse_moe.experts.w3'] = experts_params
22
+
23
  #Quantize
24
  save_path="models/BagelMix-8x7B-2g16-4g64-HQQ/"
25
  model.quantize_model(quant_config=quant_config)
26
  model.save_quantized(save_path)
27
 
28
+ #brought to you by python -i and a 200 GB swap device, code adapted from last upload