Text Generation
Transformers
mixtral
Not-For-All-Audiences
nsfw
mergekit
Merge
HQQ
2bit
conversational
Inference Endpoints
ProphetOfBostrom
commited on
Commit
•
f37cbe5
1
Parent(s):
87a15f8
Update HQQbagelmix_def.py
Browse files- HQQbagelmix_def.py +6 -6
HQQbagelmix_def.py
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
|
2 |
-
model_path = "/
|
3 |
model = HQQModelForCausalLM.from_pretrained(model_path)
|
4 |
|
5 |
#Quantize params
|
6 |
from hqq.core.quantize import *
|
7 |
-
|
8 |
-
attn_prams = BaseQuantizeConfig(nbits=4, group_size=64, quant_zero=True, quant_scale=True)
|
9 |
attn_prams['scale_quant_params']['group_size'] = 256
|
10 |
-
experts_params = BaseQuantizeConfig(nbits=2, group_size=16, quant_zero=True, quant_scale=True)
|
11 |
|
12 |
quant_config = {}
|
13 |
#Attention
|
@@ -19,10 +19,10 @@ quant_config['self_attn.o_proj'] = attn_prams
|
|
19 |
quant_config['block_sparse_moe.experts.w1'] = experts_params
|
20 |
quant_config['block_sparse_moe.experts.w2'] = experts_params
|
21 |
quant_config['block_sparse_moe.experts.w3'] = experts_params
|
22 |
-
|
23 |
#Quantize
|
24 |
save_path="models/BagelMix-8x7B-2g16-4g64-HQQ/"
|
25 |
model.quantize_model(quant_config=quant_config)
|
26 |
model.save_quantized(save_path)
|
27 |
|
28 |
-
#brought to you by python -i
|
|
|
1 |
from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
|
2 |
+
model_path = "Undi95/BagelMix-8x7B" #i used a directory here not hf's identifiers but in principle you can do that if your internet's fast and you trust the cache...
|
3 |
model = HQQModelForCausalLM.from_pretrained(model_path)
|
4 |
|
5 |
#Quantize params
|
6 |
from hqq.core.quantize import *
|
7 |
+
|
8 |
+
attn_prams = BaseQuantizeConfig(nbits=4, group_size=64, quant_zero=True, quant_scale=True) #4g64
|
9 |
attn_prams['scale_quant_params']['group_size'] = 256
|
10 |
+
experts_params = BaseQuantizeConfig(nbits=2, group_size=16, quant_zero=True, quant_scale=True) #2g16
|
11 |
|
12 |
quant_config = {}
|
13 |
#Attention
|
|
|
19 |
quant_config['block_sparse_moe.experts.w1'] = experts_params
|
20 |
quant_config['block_sparse_moe.experts.w2'] = experts_params
|
21 |
quant_config['block_sparse_moe.experts.w3'] = experts_params
|
22 |
+
|
23 |
#Quantize
|
24 |
save_path="models/BagelMix-8x7B-2g16-4g64-HQQ/"
|
25 |
model.quantize_model(quant_config=quant_config)
|
26 |
model.save_quantized(save_path)
|
27 |
|
28 |
+
#brought to you by python -i and a 200 GB swap device, code adapted from last upload
|