TheBloke commited on
Commit
13f6b45
1 Parent(s): 7781280

Set main branch to 4bit-128g-True, sharded

Browse files
ACCEPTABLE_USE_POLICY.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FALCON 180B TII LICENSE VERSION 1.0
2
+ September 2023
3
+ falconllm.tii.ae
4
+
5
+
6
+ ACCEPTABLE USE POLICY
7
+ You agree not to use Falcon 180B or any Work or Derivative Work (as such terms are defined in the Falcon 180B TII License Version 1.0):
8
+ 1. In any way that violates any applicable national, federal, state, local or international law or regulation;
9
+ 2. For the purpose of exploiting, harming or attempting to exploit or harm minors and/or living beings in any way;
10
+ 3. To generate or disseminate verifiably false information with the purpose of harming others; and/or
11
+ 4. To defame, disparage or otherwise harass others.
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/workspace/falcon-180b-chat-gptq-main",
3
  "alibi": false,
4
  "architectures": [
5
  "FalconForCausalLM"
@@ -19,9 +19,7 @@
19
  "num_attention_heads": 232,
20
  "num_hidden_layers": 80,
21
  "num_kv_heads": 8,
22
- "pad_token_id": 0,
23
  "parallel_attn": true,
24
- "pretraining_tp": 1,
25
  "quantization_config": {
26
  "batch_size": 1,
27
  "bits": 4,
@@ -30,7 +28,7 @@
30
  "dataset": null,
31
  "desc_act": true,
32
  "disable_exllama": true,
33
- "group_size": -1,
34
  "model_seqlen": null,
35
  "module_name_preceding_first_block": null,
36
  "pad_token_id": null,
 
1
  {
2
+ "_name_or_path": "/workspace/process/tiiuae_falcon-180b-chat/gptq/gptq-4bit-128g-actorder_True/",
3
  "alibi": false,
4
  "architectures": [
5
  "FalconForCausalLM"
 
19
  "num_attention_heads": 232,
20
  "num_hidden_layers": 80,
21
  "num_kv_heads": 8,
 
22
  "parallel_attn": true,
 
23
  "quantization_config": {
24
  "batch_size": 1,
25
  "bits": 4,
 
28
  "dataset": null,
29
  "desc_act": true,
30
  "disable_exllama": true,
31
+ "group_size": 128,
32
  "model_seqlen": null,
33
  "module_name_preceding_first_block": null,
34
  "pad_token_id": null,
model-00001-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5e8bf5c1009b25e46f741620de1b8c8a73602142d5d3aa4ec24d576543155cc
3
- size 9935767728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0710855b0308e1a37410e6b67253974815c0f97d5c303d03e1132ab7ad7c55f7
3
+ size 9995023968
model-00002-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:462411c384834e128a4f1fbd438affe13809414a8e89c0bb718e80a16801af9b
3
- size 9998346208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ecbfc54c8304049dd69afd6280886fad89bc7e566f5b9570749e4be935448f6
3
+ size 9941818992
model-00003-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f691d9791f30c094520c5992c075c16dd6c520c4aedc515acda6e717cdce9037
3
- size 9998346272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3568ba7cefbf02ad057e9a32e1f883b13ec912ff79ef511b3f589451cd2106f
3
+ size 9927772520
model-00004-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0efec866ce1e79013384d7d29d0024aa057e0b39a7f93654fd6c25f15ab53a54
3
- size 9998346272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6d8e65ea85221c1b8ce251bec6ea9e5b498ccf2f4677f59faa8a6b54869be4a
3
+ size 9690586856
model-00005-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5b6e177ab2f0ed6fbc9f1cdd57a15d605922a485d689400df86095c6441fb39
3
- size 9998346272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fca596b681e4541b2d1e168ecceb3f2fa2b675ee9ace856032ac6c26f104e795
3
+ size 9927772520
model-00006-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f81daa26794d5fdcdcff501bc79a90395098343b38eab7311f148a044a217fbe
3
- size 9998346272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d73cd57d08723ae9efc442bae774702b518ab65c6fcbdfc35637dd2f9d791538
3
+ size 9690586856
model-00007-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b9275ba2740935b4c633508616cce85491987108ded36bcf2268facb53d2189
3
- size 9998346272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cf2e1595288202bdf9abcf4bfc14cb8f6142466adf1a1e9c64df19cf1367dfe
3
+ size 9927772520
model-00008-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e80780d21e54a97cea05312881287afa1b3bc1ed9656de691abf2e07861a3e27
3
- size 9998346272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b60dff7f3adf1fb7f49aa484ffc6d13f797acd61865bf9a1270e6f368f354f6
3
+ size 9690586856
model-00009-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ef0141399c5e1811aa5f0c30b63648fa93ab0de21481cf8533511c307b2f3b2
3
- size 9998346272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09092c16c2675c5d1621dda898f57aa9b6a6fb10c39a7baa9a3db35a5c1d8cdd
3
+ size 9927772520
model-00010-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10573cdffc16b8c5bcbcec75d45959b6e3d68af756aa76d7c70d1fb06619ef55
3
- size 882663400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5724bbe7696a4281087fd85831fc8fa6489117332176c9243145d4b424aab3a7
3
+ size 5533009736
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 90804975616
4
  },
5
  "weight_map": {
6
  "transformer.h.0.ln_attn.bias": "model-00001-of-00010.safetensors",
@@ -171,20 +171,20 @@
171
  "transformer.h.14.self_attention.query_key_value.qweight": "model-00002-of-00010.safetensors",
172
  "transformer.h.14.self_attention.query_key_value.qzeros": "model-00002-of-00010.safetensors",
173
  "transformer.h.14.self_attention.query_key_value.scales": "model-00002-of-00010.safetensors",
174
- "transformer.h.15.ln_attn.bias": "model-00002-of-00010.safetensors",
175
- "transformer.h.15.ln_attn.weight": "model-00002-of-00010.safetensors",
176
- "transformer.h.15.ln_mlp.bias": "model-00002-of-00010.safetensors",
177
- "transformer.h.15.ln_mlp.weight": "model-00002-of-00010.safetensors",
178
  "transformer.h.15.mlp.dense_4h_to_h.bias": "model-00002-of-00010.safetensors",
179
  "transformer.h.15.mlp.dense_4h_to_h.g_idx": "model-00002-of-00010.safetensors",
180
  "transformer.h.15.mlp.dense_4h_to_h.qweight": "model-00002-of-00010.safetensors",
181
  "transformer.h.15.mlp.dense_4h_to_h.qzeros": "model-00002-of-00010.safetensors",
182
  "transformer.h.15.mlp.dense_4h_to_h.scales": "model-00002-of-00010.safetensors",
183
- "transformer.h.15.mlp.dense_h_to_4h.bias": "model-00002-of-00010.safetensors",
184
- "transformer.h.15.mlp.dense_h_to_4h.g_idx": "model-00002-of-00010.safetensors",
185
- "transformer.h.15.mlp.dense_h_to_4h.qweight": "model-00002-of-00010.safetensors",
186
- "transformer.h.15.mlp.dense_h_to_4h.qzeros": "model-00002-of-00010.safetensors",
187
- "transformer.h.15.mlp.dense_h_to_4h.scales": "model-00002-of-00010.safetensors",
188
  "transformer.h.15.self_attention.dense.bias": "model-00002-of-00010.safetensors",
189
  "transformer.h.15.self_attention.dense.g_idx": "model-00002-of-00010.safetensors",
190
  "transformer.h.15.self_attention.dense.qweight": "model-00002-of-00010.safetensors",
@@ -209,16 +209,16 @@
209
  "transformer.h.16.mlp.dense_h_to_4h.qweight": "model-00003-of-00010.safetensors",
210
  "transformer.h.16.mlp.dense_h_to_4h.qzeros": "model-00003-of-00010.safetensors",
211
  "transformer.h.16.mlp.dense_h_to_4h.scales": "model-00003-of-00010.safetensors",
212
- "transformer.h.16.self_attention.dense.bias": "model-00002-of-00010.safetensors",
213
- "transformer.h.16.self_attention.dense.g_idx": "model-00002-of-00010.safetensors",
214
- "transformer.h.16.self_attention.dense.qweight": "model-00002-of-00010.safetensors",
215
- "transformer.h.16.self_attention.dense.qzeros": "model-00002-of-00010.safetensors",
216
- "transformer.h.16.self_attention.dense.scales": "model-00002-of-00010.safetensors",
217
- "transformer.h.16.self_attention.query_key_value.bias": "model-00002-of-00010.safetensors",
218
- "transformer.h.16.self_attention.query_key_value.g_idx": "model-00002-of-00010.safetensors",
219
- "transformer.h.16.self_attention.query_key_value.qweight": "model-00002-of-00010.safetensors",
220
- "transformer.h.16.self_attention.query_key_value.qzeros": "model-00002-of-00010.safetensors",
221
- "transformer.h.16.self_attention.query_key_value.scales": "model-00002-of-00010.safetensors",
222
  "transformer.h.17.ln_attn.bias": "model-00003-of-00010.safetensors",
223
  "transformer.h.17.ln_attn.weight": "model-00003-of-00010.safetensors",
224
  "transformer.h.17.ln_mlp.bias": "model-00003-of-00010.safetensors",
@@ -411,20 +411,20 @@
411
  "transformer.h.23.self_attention.query_key_value.qweight": "model-00003-of-00010.safetensors",
412
  "transformer.h.23.self_attention.query_key_value.qzeros": "model-00003-of-00010.safetensors",
413
  "transformer.h.23.self_attention.query_key_value.scales": "model-00003-of-00010.safetensors",
414
- "transformer.h.24.ln_attn.bias": "model-00003-of-00010.safetensors",
415
- "transformer.h.24.ln_attn.weight": "model-00003-of-00010.safetensors",
416
- "transformer.h.24.ln_mlp.bias": "model-00003-of-00010.safetensors",
417
- "transformer.h.24.ln_mlp.weight": "model-00003-of-00010.safetensors",
418
- "transformer.h.24.mlp.dense_4h_to_h.bias": "model-00003-of-00010.safetensors",
419
- "transformer.h.24.mlp.dense_4h_to_h.g_idx": "model-00003-of-00010.safetensors",
420
- "transformer.h.24.mlp.dense_4h_to_h.qweight": "model-00003-of-00010.safetensors",
421
- "transformer.h.24.mlp.dense_4h_to_h.qzeros": "model-00003-of-00010.safetensors",
422
- "transformer.h.24.mlp.dense_4h_to_h.scales": "model-00003-of-00010.safetensors",
423
- "transformer.h.24.mlp.dense_h_to_4h.bias": "model-00003-of-00010.safetensors",
424
- "transformer.h.24.mlp.dense_h_to_4h.g_idx": "model-00003-of-00010.safetensors",
425
- "transformer.h.24.mlp.dense_h_to_4h.qweight": "model-00003-of-00010.safetensors",
426
- "transformer.h.24.mlp.dense_h_to_4h.qzeros": "model-00003-of-00010.safetensors",
427
- "transformer.h.24.mlp.dense_h_to_4h.scales": "model-00003-of-00010.safetensors",
428
  "transformer.h.24.self_attention.dense.bias": "model-00003-of-00010.safetensors",
429
  "transformer.h.24.self_attention.dense.g_idx": "model-00003-of-00010.safetensors",
430
  "transformer.h.24.self_attention.dense.qweight": "model-00003-of-00010.safetensors",
@@ -449,16 +449,16 @@
449
  "transformer.h.25.mlp.dense_h_to_4h.qweight": "model-00004-of-00010.safetensors",
450
  "transformer.h.25.mlp.dense_h_to_4h.qzeros": "model-00004-of-00010.safetensors",
451
  "transformer.h.25.mlp.dense_h_to_4h.scales": "model-00004-of-00010.safetensors",
452
- "transformer.h.25.self_attention.dense.bias": "model-00003-of-00010.safetensors",
453
- "transformer.h.25.self_attention.dense.g_idx": "model-00003-of-00010.safetensors",
454
- "transformer.h.25.self_attention.dense.qweight": "model-00003-of-00010.safetensors",
455
- "transformer.h.25.self_attention.dense.qzeros": "model-00003-of-00010.safetensors",
456
- "transformer.h.25.self_attention.dense.scales": "model-00003-of-00010.safetensors",
457
- "transformer.h.25.self_attention.query_key_value.bias": "model-00003-of-00010.safetensors",
458
- "transformer.h.25.self_attention.query_key_value.g_idx": "model-00003-of-00010.safetensors",
459
- "transformer.h.25.self_attention.query_key_value.qweight": "model-00003-of-00010.safetensors",
460
- "transformer.h.25.self_attention.query_key_value.qzeros": "model-00003-of-00010.safetensors",
461
- "transformer.h.25.self_attention.query_key_value.scales": "model-00003-of-00010.safetensors",
462
  "transformer.h.26.ln_attn.bias": "model-00004-of-00010.safetensors",
463
  "transformer.h.26.ln_attn.weight": "model-00004-of-00010.safetensors",
464
  "transformer.h.26.ln_mlp.bias": "model-00004-of-00010.safetensors",
@@ -627,20 +627,20 @@
627
  "transformer.h.31.self_attention.query_key_value.qweight": "model-00004-of-00010.safetensors",
628
  "transformer.h.31.self_attention.query_key_value.qzeros": "model-00004-of-00010.safetensors",
629
  "transformer.h.31.self_attention.query_key_value.scales": "model-00004-of-00010.safetensors",
630
- "transformer.h.32.ln_attn.bias": "model-00004-of-00010.safetensors",
631
- "transformer.h.32.ln_attn.weight": "model-00004-of-00010.safetensors",
632
- "transformer.h.32.ln_mlp.bias": "model-00004-of-00010.safetensors",
633
- "transformer.h.32.ln_mlp.weight": "model-00004-of-00010.safetensors",
634
  "transformer.h.32.mlp.dense_4h_to_h.bias": "model-00004-of-00010.safetensors",
635
  "transformer.h.32.mlp.dense_4h_to_h.g_idx": "model-00004-of-00010.safetensors",
636
  "transformer.h.32.mlp.dense_4h_to_h.qweight": "model-00004-of-00010.safetensors",
637
  "transformer.h.32.mlp.dense_4h_to_h.qzeros": "model-00004-of-00010.safetensors",
638
  "transformer.h.32.mlp.dense_4h_to_h.scales": "model-00004-of-00010.safetensors",
639
- "transformer.h.32.mlp.dense_h_to_4h.bias": "model-00004-of-00010.safetensors",
640
- "transformer.h.32.mlp.dense_h_to_4h.g_idx": "model-00004-of-00010.safetensors",
641
- "transformer.h.32.mlp.dense_h_to_4h.qweight": "model-00004-of-00010.safetensors",
642
- "transformer.h.32.mlp.dense_h_to_4h.qzeros": "model-00004-of-00010.safetensors",
643
- "transformer.h.32.mlp.dense_h_to_4h.scales": "model-00004-of-00010.safetensors",
644
  "transformer.h.32.self_attention.dense.bias": "model-00004-of-00010.safetensors",
645
  "transformer.h.32.self_attention.dense.g_idx": "model-00004-of-00010.safetensors",
646
  "transformer.h.32.self_attention.dense.qweight": "model-00004-of-00010.safetensors",
@@ -651,30 +651,30 @@
651
  "transformer.h.32.self_attention.query_key_value.qweight": "model-00004-of-00010.safetensors",
652
  "transformer.h.32.self_attention.query_key_value.qzeros": "model-00004-of-00010.safetensors",
653
  "transformer.h.32.self_attention.query_key_value.scales": "model-00004-of-00010.safetensors",
654
- "transformer.h.33.ln_attn.bias": "model-00004-of-00010.safetensors",
655
- "transformer.h.33.ln_attn.weight": "model-00004-of-00010.safetensors",
656
- "transformer.h.33.ln_mlp.bias": "model-00004-of-00010.safetensors",
657
- "transformer.h.33.ln_mlp.weight": "model-00004-of-00010.safetensors",
658
- "transformer.h.33.mlp.dense_4h_to_h.bias": "model-00004-of-00010.safetensors",
659
- "transformer.h.33.mlp.dense_4h_to_h.g_idx": "model-00004-of-00010.safetensors",
660
- "transformer.h.33.mlp.dense_4h_to_h.qweight": "model-00004-of-00010.safetensors",
661
- "transformer.h.33.mlp.dense_4h_to_h.qzeros": "model-00004-of-00010.safetensors",
662
- "transformer.h.33.mlp.dense_4h_to_h.scales": "model-00004-of-00010.safetensors",
663
- "transformer.h.33.mlp.dense_h_to_4h.bias": "model-00004-of-00010.safetensors",
664
- "transformer.h.33.mlp.dense_h_to_4h.g_idx": "model-00004-of-00010.safetensors",
665
- "transformer.h.33.mlp.dense_h_to_4h.qweight": "model-00004-of-00010.safetensors",
666
- "transformer.h.33.mlp.dense_h_to_4h.qzeros": "model-00004-of-00010.safetensors",
667
- "transformer.h.33.mlp.dense_h_to_4h.scales": "model-00004-of-00010.safetensors",
668
- "transformer.h.33.self_attention.dense.bias": "model-00004-of-00010.safetensors",
669
- "transformer.h.33.self_attention.dense.g_idx": "model-00004-of-00010.safetensors",
670
- "transformer.h.33.self_attention.dense.qweight": "model-00004-of-00010.safetensors",
671
- "transformer.h.33.self_attention.dense.qzeros": "model-00004-of-00010.safetensors",
672
- "transformer.h.33.self_attention.dense.scales": "model-00004-of-00010.safetensors",
673
- "transformer.h.33.self_attention.query_key_value.bias": "model-00004-of-00010.safetensors",
674
- "transformer.h.33.self_attention.query_key_value.g_idx": "model-00004-of-00010.safetensors",
675
- "transformer.h.33.self_attention.query_key_value.qweight": "model-00004-of-00010.safetensors",
676
- "transformer.h.33.self_attention.query_key_value.qzeros": "model-00004-of-00010.safetensors",
677
- "transformer.h.33.self_attention.query_key_value.scales": "model-00004-of-00010.safetensors",
678
  "transformer.h.34.ln_attn.bias": "model-00005-of-00010.safetensors",
679
  "transformer.h.34.ln_attn.weight": "model-00005-of-00010.safetensors",
680
  "transformer.h.34.ln_mlp.bias": "model-00005-of-00010.safetensors",
@@ -689,16 +689,16 @@
689
  "transformer.h.34.mlp.dense_h_to_4h.qweight": "model-00005-of-00010.safetensors",
690
  "transformer.h.34.mlp.dense_h_to_4h.qzeros": "model-00005-of-00010.safetensors",
691
  "transformer.h.34.mlp.dense_h_to_4h.scales": "model-00005-of-00010.safetensors",
692
- "transformer.h.34.self_attention.dense.bias": "model-00004-of-00010.safetensors",
693
- "transformer.h.34.self_attention.dense.g_idx": "model-00004-of-00010.safetensors",
694
- "transformer.h.34.self_attention.dense.qweight": "model-00004-of-00010.safetensors",
695
- "transformer.h.34.self_attention.dense.qzeros": "model-00004-of-00010.safetensors",
696
- "transformer.h.34.self_attention.dense.scales": "model-00004-of-00010.safetensors",
697
- "transformer.h.34.self_attention.query_key_value.bias": "model-00004-of-00010.safetensors",
698
- "transformer.h.34.self_attention.query_key_value.g_idx": "model-00004-of-00010.safetensors",
699
- "transformer.h.34.self_attention.query_key_value.qweight": "model-00004-of-00010.safetensors",
700
- "transformer.h.34.self_attention.query_key_value.qzeros": "model-00004-of-00010.safetensors",
701
- "transformer.h.34.self_attention.query_key_value.scales": "model-00004-of-00010.safetensors",
702
  "transformer.h.35.ln_attn.bias": "model-00005-of-00010.safetensors",
703
  "transformer.h.35.ln_attn.weight": "model-00005-of-00010.safetensors",
704
  "transformer.h.35.ln_mlp.bias": "model-00005-of-00010.safetensors",
@@ -867,20 +867,20 @@
867
  "transformer.h.40.self_attention.query_key_value.qweight": "model-00005-of-00010.safetensors",
868
  "transformer.h.40.self_attention.query_key_value.qzeros": "model-00005-of-00010.safetensors",
869
  "transformer.h.40.self_attention.query_key_value.scales": "model-00005-of-00010.safetensors",
870
- "transformer.h.41.ln_attn.bias": "model-00005-of-00010.safetensors",
871
- "transformer.h.41.ln_attn.weight": "model-00005-of-00010.safetensors",
872
- "transformer.h.41.ln_mlp.bias": "model-00005-of-00010.safetensors",
873
- "transformer.h.41.ln_mlp.weight": "model-00005-of-00010.safetensors",
874
- "transformer.h.41.mlp.dense_4h_to_h.bias": "model-00005-of-00010.safetensors",
875
- "transformer.h.41.mlp.dense_4h_to_h.g_idx": "model-00005-of-00010.safetensors",
876
- "transformer.h.41.mlp.dense_4h_to_h.qweight": "model-00005-of-00010.safetensors",
877
- "transformer.h.41.mlp.dense_4h_to_h.qzeros": "model-00005-of-00010.safetensors",
878
- "transformer.h.41.mlp.dense_4h_to_h.scales": "model-00005-of-00010.safetensors",
879
- "transformer.h.41.mlp.dense_h_to_4h.bias": "model-00005-of-00010.safetensors",
880
- "transformer.h.41.mlp.dense_h_to_4h.g_idx": "model-00005-of-00010.safetensors",
881
- "transformer.h.41.mlp.dense_h_to_4h.qweight": "model-00005-of-00010.safetensors",
882
- "transformer.h.41.mlp.dense_h_to_4h.qzeros": "model-00005-of-00010.safetensors",
883
- "transformer.h.41.mlp.dense_h_to_4h.scales": "model-00005-of-00010.safetensors",
884
  "transformer.h.41.self_attention.dense.bias": "model-00005-of-00010.safetensors",
885
  "transformer.h.41.self_attention.dense.g_idx": "model-00005-of-00010.safetensors",
886
  "transformer.h.41.self_attention.dense.qweight": "model-00005-of-00010.safetensors",
@@ -891,30 +891,30 @@
891
  "transformer.h.41.self_attention.query_key_value.qweight": "model-00005-of-00010.safetensors",
892
  "transformer.h.41.self_attention.query_key_value.qzeros": "model-00005-of-00010.safetensors",
893
  "transformer.h.41.self_attention.query_key_value.scales": "model-00005-of-00010.safetensors",
894
- "transformer.h.42.ln_attn.bias": "model-00005-of-00010.safetensors",
895
- "transformer.h.42.ln_attn.weight": "model-00005-of-00010.safetensors",
896
- "transformer.h.42.ln_mlp.bias": "model-00005-of-00010.safetensors",
897
- "transformer.h.42.ln_mlp.weight": "model-00005-of-00010.safetensors",
898
- "transformer.h.42.mlp.dense_4h_to_h.bias": "model-00005-of-00010.safetensors",
899
- "transformer.h.42.mlp.dense_4h_to_h.g_idx": "model-00005-of-00010.safetensors",
900
- "transformer.h.42.mlp.dense_4h_to_h.qweight": "model-00005-of-00010.safetensors",
901
- "transformer.h.42.mlp.dense_4h_to_h.qzeros": "model-00005-of-00010.safetensors",
902
- "transformer.h.42.mlp.dense_4h_to_h.scales": "model-00005-of-00010.safetensors",
903
- "transformer.h.42.mlp.dense_h_to_4h.bias": "model-00005-of-00010.safetensors",
904
- "transformer.h.42.mlp.dense_h_to_4h.g_idx": "model-00005-of-00010.safetensors",
905
- "transformer.h.42.mlp.dense_h_to_4h.qweight": "model-00005-of-00010.safetensors",
906
- "transformer.h.42.mlp.dense_h_to_4h.qzeros": "model-00005-of-00010.safetensors",
907
- "transformer.h.42.mlp.dense_h_to_4h.scales": "model-00005-of-00010.safetensors",
908
- "transformer.h.42.self_attention.dense.bias": "model-00005-of-00010.safetensors",
909
- "transformer.h.42.self_attention.dense.g_idx": "model-00005-of-00010.safetensors",
910
- "transformer.h.42.self_attention.dense.qweight": "model-00005-of-00010.safetensors",
911
- "transformer.h.42.self_attention.dense.qzeros": "model-00005-of-00010.safetensors",
912
- "transformer.h.42.self_attention.dense.scales": "model-00005-of-00010.safetensors",
913
- "transformer.h.42.self_attention.query_key_value.bias": "model-00005-of-00010.safetensors",
914
- "transformer.h.42.self_attention.query_key_value.g_idx": "model-00005-of-00010.safetensors",
915
- "transformer.h.42.self_attention.query_key_value.qweight": "model-00005-of-00010.safetensors",
916
- "transformer.h.42.self_attention.query_key_value.qzeros": "model-00005-of-00010.safetensors",
917
- "transformer.h.42.self_attention.query_key_value.scales": "model-00005-of-00010.safetensors",
918
  "transformer.h.43.ln_attn.bias": "model-00006-of-00010.safetensors",
919
  "transformer.h.43.ln_attn.weight": "model-00006-of-00010.safetensors",
920
  "transformer.h.43.ln_mlp.bias": "model-00006-of-00010.safetensors",
@@ -929,16 +929,16 @@
929
  "transformer.h.43.mlp.dense_h_to_4h.qweight": "model-00006-of-00010.safetensors",
930
  "transformer.h.43.mlp.dense_h_to_4h.qzeros": "model-00006-of-00010.safetensors",
931
  "transformer.h.43.mlp.dense_h_to_4h.scales": "model-00006-of-00010.safetensors",
932
- "transformer.h.43.self_attention.dense.bias": "model-00005-of-00010.safetensors",
933
- "transformer.h.43.self_attention.dense.g_idx": "model-00005-of-00010.safetensors",
934
- "transformer.h.43.self_attention.dense.qweight": "model-00005-of-00010.safetensors",
935
- "transformer.h.43.self_attention.dense.qzeros": "model-00005-of-00010.safetensors",
936
- "transformer.h.43.self_attention.dense.scales": "model-00005-of-00010.safetensors",
937
- "transformer.h.43.self_attention.query_key_value.bias": "model-00005-of-00010.safetensors",
938
- "transformer.h.43.self_attention.query_key_value.g_idx": "model-00005-of-00010.safetensors",
939
- "transformer.h.43.self_attention.query_key_value.qweight": "model-00005-of-00010.safetensors",
940
- "transformer.h.43.self_attention.query_key_value.qzeros": "model-00005-of-00010.safetensors",
941
- "transformer.h.43.self_attention.query_key_value.scales": "model-00005-of-00010.safetensors",
942
  "transformer.h.44.ln_attn.bias": "model-00006-of-00010.safetensors",
943
  "transformer.h.44.ln_attn.weight": "model-00006-of-00010.safetensors",
944
  "transformer.h.44.ln_mlp.bias": "model-00006-of-00010.safetensors",
@@ -1059,20 +1059,20 @@
1059
  "transformer.h.48.self_attention.query_key_value.qweight": "model-00006-of-00010.safetensors",
1060
  "transformer.h.48.self_attention.query_key_value.qzeros": "model-00006-of-00010.safetensors",
1061
  "transformer.h.48.self_attention.query_key_value.scales": "model-00006-of-00010.safetensors",
1062
- "transformer.h.49.ln_attn.bias": "model-00006-of-00010.safetensors",
1063
- "transformer.h.49.ln_attn.weight": "model-00006-of-00010.safetensors",
1064
- "transformer.h.49.ln_mlp.bias": "model-00006-of-00010.safetensors",
1065
- "transformer.h.49.ln_mlp.weight": "model-00006-of-00010.safetensors",
1066
  "transformer.h.49.mlp.dense_4h_to_h.bias": "model-00006-of-00010.safetensors",
1067
  "transformer.h.49.mlp.dense_4h_to_h.g_idx": "model-00006-of-00010.safetensors",
1068
  "transformer.h.49.mlp.dense_4h_to_h.qweight": "model-00006-of-00010.safetensors",
1069
  "transformer.h.49.mlp.dense_4h_to_h.qzeros": "model-00006-of-00010.safetensors",
1070
  "transformer.h.49.mlp.dense_4h_to_h.scales": "model-00006-of-00010.safetensors",
1071
- "transformer.h.49.mlp.dense_h_to_4h.bias": "model-00006-of-00010.safetensors",
1072
- "transformer.h.49.mlp.dense_h_to_4h.g_idx": "model-00006-of-00010.safetensors",
1073
- "transformer.h.49.mlp.dense_h_to_4h.qweight": "model-00006-of-00010.safetensors",
1074
- "transformer.h.49.mlp.dense_h_to_4h.qzeros": "model-00006-of-00010.safetensors",
1075
- "transformer.h.49.mlp.dense_h_to_4h.scales": "model-00006-of-00010.safetensors",
1076
  "transformer.h.49.self_attention.dense.bias": "model-00006-of-00010.safetensors",
1077
  "transformer.h.49.self_attention.dense.g_idx": "model-00006-of-00010.safetensors",
1078
  "transformer.h.49.self_attention.dense.qweight": "model-00006-of-00010.safetensors",
@@ -1107,54 +1107,54 @@
1107
  "transformer.h.5.self_attention.query_key_value.qweight": "model-00001-of-00010.safetensors",
1108
  "transformer.h.5.self_attention.query_key_value.qzeros": "model-00001-of-00010.safetensors",
1109
  "transformer.h.5.self_attention.query_key_value.scales": "model-00001-of-00010.safetensors",
1110
- "transformer.h.50.ln_attn.bias": "model-00006-of-00010.safetensors",
1111
- "transformer.h.50.ln_attn.weight": "model-00006-of-00010.safetensors",
1112
- "transformer.h.50.ln_mlp.bias": "model-00006-of-00010.safetensors",
1113
- "transformer.h.50.ln_mlp.weight": "model-00006-of-00010.safetensors",
1114
- "transformer.h.50.mlp.dense_4h_to_h.bias": "model-00006-of-00010.safetensors",
1115
- "transformer.h.50.mlp.dense_4h_to_h.g_idx": "model-00006-of-00010.safetensors",
1116
- "transformer.h.50.mlp.dense_4h_to_h.qweight": "model-00006-of-00010.safetensors",
1117
- "transformer.h.50.mlp.dense_4h_to_h.qzeros": "model-00006-of-00010.safetensors",
1118
- "transformer.h.50.mlp.dense_4h_to_h.scales": "model-00006-of-00010.safetensors",
1119
- "transformer.h.50.mlp.dense_h_to_4h.bias": "model-00006-of-00010.safetensors",
1120
- "transformer.h.50.mlp.dense_h_to_4h.g_idx": "model-00006-of-00010.safetensors",
1121
- "transformer.h.50.mlp.dense_h_to_4h.qweight": "model-00006-of-00010.safetensors",
1122
- "transformer.h.50.mlp.dense_h_to_4h.qzeros": "model-00006-of-00010.safetensors",
1123
- "transformer.h.50.mlp.dense_h_to_4h.scales": "model-00006-of-00010.safetensors",
1124
- "transformer.h.50.self_attention.dense.bias": "model-00006-of-00010.safetensors",
1125
- "transformer.h.50.self_attention.dense.g_idx": "model-00006-of-00010.safetensors",
1126
- "transformer.h.50.self_attention.dense.qweight": "model-00006-of-00010.safetensors",
1127
- "transformer.h.50.self_attention.dense.qzeros": "model-00006-of-00010.safetensors",
1128
- "transformer.h.50.self_attention.dense.scales": "model-00006-of-00010.safetensors",
1129
- "transformer.h.50.self_attention.query_key_value.bias": "model-00006-of-00010.safetensors",
1130
- "transformer.h.50.self_attention.query_key_value.g_idx": "model-00006-of-00010.safetensors",
1131
- "transformer.h.50.self_attention.query_key_value.qweight": "model-00006-of-00010.safetensors",
1132
- "transformer.h.50.self_attention.query_key_value.qzeros": "model-00006-of-00010.safetensors",
1133
- "transformer.h.50.self_attention.query_key_value.scales": "model-00006-of-00010.safetensors",
1134
- "transformer.h.51.ln_attn.bias": "model-00006-of-00010.safetensors",
1135
- "transformer.h.51.ln_attn.weight": "model-00006-of-00010.safetensors",
1136
- "transformer.h.51.ln_mlp.bias": "model-00006-of-00010.safetensors",
1137
- "transformer.h.51.ln_mlp.weight": "model-00006-of-00010.safetensors",
1138
- "transformer.h.51.mlp.dense_4h_to_h.bias": "model-00006-of-00010.safetensors",
1139
- "transformer.h.51.mlp.dense_4h_to_h.g_idx": "model-00006-of-00010.safetensors",
1140
- "transformer.h.51.mlp.dense_4h_to_h.qweight": "model-00006-of-00010.safetensors",
1141
- "transformer.h.51.mlp.dense_4h_to_h.qzeros": "model-00006-of-00010.safetensors",
1142
- "transformer.h.51.mlp.dense_4h_to_h.scales": "model-00006-of-00010.safetensors",
1143
- "transformer.h.51.mlp.dense_h_to_4h.bias": "model-00006-of-00010.safetensors",
1144
- "transformer.h.51.mlp.dense_h_to_4h.g_idx": "model-00006-of-00010.safetensors",
1145
- "transformer.h.51.mlp.dense_h_to_4h.qweight": "model-00006-of-00010.safetensors",
1146
- "transformer.h.51.mlp.dense_h_to_4h.qzeros": "model-00006-of-00010.safetensors",
1147
- "transformer.h.51.mlp.dense_h_to_4h.scales": "model-00006-of-00010.safetensors",
1148
- "transformer.h.51.self_attention.dense.bias": "model-00006-of-00010.safetensors",
1149
- "transformer.h.51.self_attention.dense.g_idx": "model-00006-of-00010.safetensors",
1150
- "transformer.h.51.self_attention.dense.qweight": "model-00006-of-00010.safetensors",
1151
- "transformer.h.51.self_attention.dense.qzeros": "model-00006-of-00010.safetensors",
1152
- "transformer.h.51.self_attention.dense.scales": "model-00006-of-00010.safetensors",
1153
- "transformer.h.51.self_attention.query_key_value.bias": "model-00006-of-00010.safetensors",
1154
- "transformer.h.51.self_attention.query_key_value.g_idx": "model-00006-of-00010.safetensors",
1155
- "transformer.h.51.self_attention.query_key_value.qweight": "model-00006-of-00010.safetensors",
1156
- "transformer.h.51.self_attention.query_key_value.qzeros": "model-00006-of-00010.safetensors",
1157
- "transformer.h.51.self_attention.query_key_value.scales": "model-00006-of-00010.safetensors",
1158
  "transformer.h.52.ln_attn.bias": "model-00007-of-00010.safetensors",
1159
  "transformer.h.52.ln_attn.weight": "model-00007-of-00010.safetensors",
1160
  "transformer.h.52.ln_mlp.bias": "model-00007-of-00010.safetensors",
@@ -1169,16 +1169,16 @@
1169
  "transformer.h.52.mlp.dense_h_to_4h.qweight": "model-00007-of-00010.safetensors",
1170
  "transformer.h.52.mlp.dense_h_to_4h.qzeros": "model-00007-of-00010.safetensors",
1171
  "transformer.h.52.mlp.dense_h_to_4h.scales": "model-00007-of-00010.safetensors",
1172
- "transformer.h.52.self_attention.dense.bias": "model-00006-of-00010.safetensors",
1173
- "transformer.h.52.self_attention.dense.g_idx": "model-00006-of-00010.safetensors",
1174
- "transformer.h.52.self_attention.dense.qweight": "model-00006-of-00010.safetensors",
1175
- "transformer.h.52.self_attention.dense.qzeros": "model-00006-of-00010.safetensors",
1176
- "transformer.h.52.self_attention.dense.scales": "model-00006-of-00010.safetensors",
1177
- "transformer.h.52.self_attention.query_key_value.bias": "model-00006-of-00010.safetensors",
1178
- "transformer.h.52.self_attention.query_key_value.g_idx": "model-00006-of-00010.safetensors",
1179
- "transformer.h.52.self_attention.query_key_value.qweight": "model-00006-of-00010.safetensors",
1180
- "transformer.h.52.self_attention.query_key_value.qzeros": "model-00006-of-00010.safetensors",
1181
- "transformer.h.52.self_attention.query_key_value.scales": "model-00006-of-00010.safetensors",
1182
  "transformer.h.53.ln_attn.bias": "model-00007-of-00010.safetensors",
1183
  "transformer.h.53.ln_attn.weight": "model-00007-of-00010.safetensors",
1184
  "transformer.h.53.ln_mlp.bias": "model-00007-of-00010.safetensors",
@@ -1299,20 +1299,20 @@
1299
  "transformer.h.57.self_attention.query_key_value.qweight": "model-00007-of-00010.safetensors",
1300
  "transformer.h.57.self_attention.query_key_value.qzeros": "model-00007-of-00010.safetensors",
1301
  "transformer.h.57.self_attention.query_key_value.scales": "model-00007-of-00010.safetensors",
1302
- "transformer.h.58.ln_attn.bias": "model-00007-of-00010.safetensors",
1303
- "transformer.h.58.ln_attn.weight": "model-00007-of-00010.safetensors",
1304
- "transformer.h.58.ln_mlp.bias": "model-00007-of-00010.safetensors",
1305
- "transformer.h.58.ln_mlp.weight": "model-00007-of-00010.safetensors",
1306
- "transformer.h.58.mlp.dense_4h_to_h.bias": "model-00007-of-00010.safetensors",
1307
- "transformer.h.58.mlp.dense_4h_to_h.g_idx": "model-00007-of-00010.safetensors",
1308
- "transformer.h.58.mlp.dense_4h_to_h.qweight": "model-00007-of-00010.safetensors",
1309
- "transformer.h.58.mlp.dense_4h_to_h.qzeros": "model-00007-of-00010.safetensors",
1310
- "transformer.h.58.mlp.dense_4h_to_h.scales": "model-00007-of-00010.safetensors",
1311
- "transformer.h.58.mlp.dense_h_to_4h.bias": "model-00007-of-00010.safetensors",
1312
- "transformer.h.58.mlp.dense_h_to_4h.g_idx": "model-00007-of-00010.safetensors",
1313
- "transformer.h.58.mlp.dense_h_to_4h.qweight": "model-00007-of-00010.safetensors",
1314
- "transformer.h.58.mlp.dense_h_to_4h.qzeros": "model-00007-of-00010.safetensors",
1315
- "transformer.h.58.mlp.dense_h_to_4h.scales": "model-00007-of-00010.safetensors",
1316
  "transformer.h.58.self_attention.dense.bias": "model-00007-of-00010.safetensors",
1317
  "transformer.h.58.self_attention.dense.g_idx": "model-00007-of-00010.safetensors",
1318
  "transformer.h.58.self_attention.dense.qweight": "model-00007-of-00010.safetensors",
@@ -1323,44 +1323,44 @@
1323
  "transformer.h.58.self_attention.query_key_value.qweight": "model-00007-of-00010.safetensors",
1324
  "transformer.h.58.self_attention.query_key_value.qzeros": "model-00007-of-00010.safetensors",
1325
  "transformer.h.58.self_attention.query_key_value.scales": "model-00007-of-00010.safetensors",
1326
- "transformer.h.59.ln_attn.bias": "model-00007-of-00010.safetensors",
1327
- "transformer.h.59.ln_attn.weight": "model-00007-of-00010.safetensors",
1328
- "transformer.h.59.ln_mlp.bias": "model-00007-of-00010.safetensors",
1329
- "transformer.h.59.ln_mlp.weight": "model-00007-of-00010.safetensors",
1330
- "transformer.h.59.mlp.dense_4h_to_h.bias": "model-00007-of-00010.safetensors",
1331
- "transformer.h.59.mlp.dense_4h_to_h.g_idx": "model-00007-of-00010.safetensors",
1332
- "transformer.h.59.mlp.dense_4h_to_h.qweight": "model-00007-of-00010.safetensors",
1333
- "transformer.h.59.mlp.dense_4h_to_h.qzeros": "model-00007-of-00010.safetensors",
1334
- "transformer.h.59.mlp.dense_4h_to_h.scales": "model-00007-of-00010.safetensors",
1335
- "transformer.h.59.mlp.dense_h_to_4h.bias": "model-00007-of-00010.safetensors",
1336
- "transformer.h.59.mlp.dense_h_to_4h.g_idx": "model-00007-of-00010.safetensors",
1337
- "transformer.h.59.mlp.dense_h_to_4h.qweight": "model-00007-of-00010.safetensors",
1338
- "transformer.h.59.mlp.dense_h_to_4h.qzeros": "model-00007-of-00010.safetensors",
1339
- "transformer.h.59.mlp.dense_h_to_4h.scales": "model-00007-of-00010.safetensors",
1340
- "transformer.h.59.self_attention.dense.bias": "model-00007-of-00010.safetensors",
1341
- "transformer.h.59.self_attention.dense.g_idx": "model-00007-of-00010.safetensors",
1342
- "transformer.h.59.self_attention.dense.qweight": "model-00007-of-00010.safetensors",
1343
- "transformer.h.59.self_attention.dense.qzeros": "model-00007-of-00010.safetensors",
1344
- "transformer.h.59.self_attention.dense.scales": "model-00007-of-00010.safetensors",
1345
- "transformer.h.59.self_attention.query_key_value.bias": "model-00007-of-00010.safetensors",
1346
- "transformer.h.59.self_attention.query_key_value.g_idx": "model-00007-of-00010.safetensors",
1347
- "transformer.h.59.self_attention.query_key_value.qweight": "model-00007-of-00010.safetensors",
1348
- "transformer.h.59.self_attention.query_key_value.qzeros": "model-00007-of-00010.safetensors",
1349
- "transformer.h.59.self_attention.query_key_value.scales": "model-00007-of-00010.safetensors",
1350
- "transformer.h.6.ln_attn.bias": "model-00001-of-00010.safetensors",
1351
- "transformer.h.6.ln_attn.weight": "model-00001-of-00010.safetensors",
1352
- "transformer.h.6.ln_mlp.bias": "model-00001-of-00010.safetensors",
1353
- "transformer.h.6.ln_mlp.weight": "model-00001-of-00010.safetensors",
1354
  "transformer.h.6.mlp.dense_4h_to_h.bias": "model-00001-of-00010.safetensors",
1355
  "transformer.h.6.mlp.dense_4h_to_h.g_idx": "model-00001-of-00010.safetensors",
1356
  "transformer.h.6.mlp.dense_4h_to_h.qweight": "model-00001-of-00010.safetensors",
1357
  "transformer.h.6.mlp.dense_4h_to_h.qzeros": "model-00001-of-00010.safetensors",
1358
  "transformer.h.6.mlp.dense_4h_to_h.scales": "model-00001-of-00010.safetensors",
1359
- "transformer.h.6.mlp.dense_h_to_4h.bias": "model-00001-of-00010.safetensors",
1360
- "transformer.h.6.mlp.dense_h_to_4h.g_idx": "model-00001-of-00010.safetensors",
1361
  "transformer.h.6.mlp.dense_h_to_4h.qweight": "model-00001-of-00010.safetensors",
1362
  "transformer.h.6.mlp.dense_h_to_4h.qzeros": "model-00001-of-00010.safetensors",
1363
- "transformer.h.6.mlp.dense_h_to_4h.scales": "model-00001-of-00010.safetensors",
1364
  "transformer.h.6.self_attention.dense.bias": "model-00001-of-00010.safetensors",
1365
  "transformer.h.6.self_attention.dense.g_idx": "model-00001-of-00010.safetensors",
1366
  "transformer.h.6.self_attention.dense.qweight": "model-00001-of-00010.safetensors",
@@ -1371,30 +1371,30 @@
1371
  "transformer.h.6.self_attention.query_key_value.qweight": "model-00001-of-00010.safetensors",
1372
  "transformer.h.6.self_attention.query_key_value.qzeros": "model-00001-of-00010.safetensors",
1373
  "transformer.h.6.self_attention.query_key_value.scales": "model-00001-of-00010.safetensors",
1374
- "transformer.h.60.ln_attn.bias": "model-00007-of-00010.safetensors",
1375
- "transformer.h.60.ln_attn.weight": "model-00007-of-00010.safetensors",
1376
- "transformer.h.60.ln_mlp.bias": "model-00007-of-00010.safetensors",
1377
- "transformer.h.60.ln_mlp.weight": "model-00007-of-00010.safetensors",
1378
- "transformer.h.60.mlp.dense_4h_to_h.bias": "model-00007-of-00010.safetensors",
1379
- "transformer.h.60.mlp.dense_4h_to_h.g_idx": "model-00007-of-00010.safetensors",
1380
- "transformer.h.60.mlp.dense_4h_to_h.qweight": "model-00007-of-00010.safetensors",
1381
- "transformer.h.60.mlp.dense_4h_to_h.qzeros": "model-00007-of-00010.safetensors",
1382
- "transformer.h.60.mlp.dense_4h_to_h.scales": "model-00007-of-00010.safetensors",
1383
- "transformer.h.60.mlp.dense_h_to_4h.bias": "model-00007-of-00010.safetensors",
1384
- "transformer.h.60.mlp.dense_h_to_4h.g_idx": "model-00007-of-00010.safetensors",
1385
- "transformer.h.60.mlp.dense_h_to_4h.qweight": "model-00007-of-00010.safetensors",
1386
- "transformer.h.60.mlp.dense_h_to_4h.qzeros": "model-00007-of-00010.safetensors",
1387
- "transformer.h.60.mlp.dense_h_to_4h.scales": "model-00007-of-00010.safetensors",
1388
- "transformer.h.60.self_attention.dense.bias": "model-00007-of-00010.safetensors",
1389
- "transformer.h.60.self_attention.dense.g_idx": "model-00007-of-00010.safetensors",
1390
- "transformer.h.60.self_attention.dense.qweight": "model-00007-of-00010.safetensors",
1391
- "transformer.h.60.self_attention.dense.qzeros": "model-00007-of-00010.safetensors",
1392
- "transformer.h.60.self_attention.dense.scales": "model-00007-of-00010.safetensors",
1393
- "transformer.h.60.self_attention.query_key_value.bias": "model-00007-of-00010.safetensors",
1394
- "transformer.h.60.self_attention.query_key_value.g_idx": "model-00007-of-00010.safetensors",
1395
- "transformer.h.60.self_attention.query_key_value.qweight": "model-00007-of-00010.safetensors",
1396
- "transformer.h.60.self_attention.query_key_value.qzeros": "model-00007-of-00010.safetensors",
1397
- "transformer.h.60.self_attention.query_key_value.scales": "model-00007-of-00010.safetensors",
1398
  "transformer.h.61.ln_attn.bias": "model-00008-of-00010.safetensors",
1399
  "transformer.h.61.ln_attn.weight": "model-00008-of-00010.safetensors",
1400
  "transformer.h.61.ln_mlp.bias": "model-00008-of-00010.safetensors",
@@ -1409,16 +1409,16 @@
1409
  "transformer.h.61.mlp.dense_h_to_4h.qweight": "model-00008-of-00010.safetensors",
1410
  "transformer.h.61.mlp.dense_h_to_4h.qzeros": "model-00008-of-00010.safetensors",
1411
  "transformer.h.61.mlp.dense_h_to_4h.scales": "model-00008-of-00010.safetensors",
1412
- "transformer.h.61.self_attention.dense.bias": "model-00007-of-00010.safetensors",
1413
- "transformer.h.61.self_attention.dense.g_idx": "model-00007-of-00010.safetensors",
1414
- "transformer.h.61.self_attention.dense.qweight": "model-00007-of-00010.safetensors",
1415
- "transformer.h.61.self_attention.dense.qzeros": "model-00007-of-00010.safetensors",
1416
- "transformer.h.61.self_attention.dense.scales": "model-00007-of-00010.safetensors",
1417
- "transformer.h.61.self_attention.query_key_value.bias": "model-00007-of-00010.safetensors",
1418
- "transformer.h.61.self_attention.query_key_value.g_idx": "model-00007-of-00010.safetensors",
1419
- "transformer.h.61.self_attention.query_key_value.qweight": "model-00007-of-00010.safetensors",
1420
- "transformer.h.61.self_attention.query_key_value.qzeros": "model-00007-of-00010.safetensors",
1421
- "transformer.h.61.self_attention.query_key_value.scales": "model-00007-of-00010.safetensors",
1422
  "transformer.h.62.ln_attn.bias": "model-00008-of-00010.safetensors",
1423
  "transformer.h.62.ln_attn.weight": "model-00008-of-00010.safetensors",
1424
  "transformer.h.62.ln_mlp.bias": "model-00008-of-00010.safetensors",
@@ -1515,20 +1515,20 @@
1515
  "transformer.h.65.self_attention.query_key_value.qweight": "model-00008-of-00010.safetensors",
1516
  "transformer.h.65.self_attention.query_key_value.qzeros": "model-00008-of-00010.safetensors",
1517
  "transformer.h.65.self_attention.query_key_value.scales": "model-00008-of-00010.safetensors",
1518
- "transformer.h.66.ln_attn.bias": "model-00008-of-00010.safetensors",
1519
- "transformer.h.66.ln_attn.weight": "model-00008-of-00010.safetensors",
1520
- "transformer.h.66.ln_mlp.bias": "model-00008-of-00010.safetensors",
1521
- "transformer.h.66.ln_mlp.weight": "model-00008-of-00010.safetensors",
1522
  "transformer.h.66.mlp.dense_4h_to_h.bias": "model-00008-of-00010.safetensors",
1523
  "transformer.h.66.mlp.dense_4h_to_h.g_idx": "model-00008-of-00010.safetensors",
1524
  "transformer.h.66.mlp.dense_4h_to_h.qweight": "model-00008-of-00010.safetensors",
1525
  "transformer.h.66.mlp.dense_4h_to_h.qzeros": "model-00008-of-00010.safetensors",
1526
  "transformer.h.66.mlp.dense_4h_to_h.scales": "model-00008-of-00010.safetensors",
1527
- "transformer.h.66.mlp.dense_h_to_4h.bias": "model-00008-of-00010.safetensors",
1528
- "transformer.h.66.mlp.dense_h_to_4h.g_idx": "model-00008-of-00010.safetensors",
1529
- "transformer.h.66.mlp.dense_h_to_4h.qweight": "model-00008-of-00010.safetensors",
1530
- "transformer.h.66.mlp.dense_h_to_4h.qzeros": "model-00008-of-00010.safetensors",
1531
- "transformer.h.66.mlp.dense_h_to_4h.scales": "model-00008-of-00010.safetensors",
1532
  "transformer.h.66.self_attention.dense.bias": "model-00008-of-00010.safetensors",
1533
  "transformer.h.66.self_attention.dense.g_idx": "model-00008-of-00010.safetensors",
1534
  "transformer.h.66.self_attention.dense.qweight": "model-00008-of-00010.safetensors",
@@ -1539,78 +1539,78 @@
1539
  "transformer.h.66.self_attention.query_key_value.qweight": "model-00008-of-00010.safetensors",
1540
  "transformer.h.66.self_attention.query_key_value.qzeros": "model-00008-of-00010.safetensors",
1541
  "transformer.h.66.self_attention.query_key_value.scales": "model-00008-of-00010.safetensors",
1542
- "transformer.h.67.ln_attn.bias": "model-00008-of-00010.safetensors",
1543
- "transformer.h.67.ln_attn.weight": "model-00008-of-00010.safetensors",
1544
- "transformer.h.67.ln_mlp.bias": "model-00008-of-00010.safetensors",
1545
- "transformer.h.67.ln_mlp.weight": "model-00008-of-00010.safetensors",
1546
- "transformer.h.67.mlp.dense_4h_to_h.bias": "model-00008-of-00010.safetensors",
1547
- "transformer.h.67.mlp.dense_4h_to_h.g_idx": "model-00008-of-00010.safetensors",
1548
- "transformer.h.67.mlp.dense_4h_to_h.qweight": "model-00008-of-00010.safetensors",
1549
- "transformer.h.67.mlp.dense_4h_to_h.qzeros": "model-00008-of-00010.safetensors",
1550
- "transformer.h.67.mlp.dense_4h_to_h.scales": "model-00008-of-00010.safetensors",
1551
- "transformer.h.67.mlp.dense_h_to_4h.bias": "model-00008-of-00010.safetensors",
1552
- "transformer.h.67.mlp.dense_h_to_4h.g_idx": "model-00008-of-00010.safetensors",
1553
- "transformer.h.67.mlp.dense_h_to_4h.qweight": "model-00008-of-00010.safetensors",
1554
- "transformer.h.67.mlp.dense_h_to_4h.qzeros": "model-00008-of-00010.safetensors",
1555
- "transformer.h.67.mlp.dense_h_to_4h.scales": "model-00008-of-00010.safetensors",
1556
- "transformer.h.67.self_attention.dense.bias": "model-00008-of-00010.safetensors",
1557
- "transformer.h.67.self_attention.dense.g_idx": "model-00008-of-00010.safetensors",
1558
- "transformer.h.67.self_attention.dense.qweight": "model-00008-of-00010.safetensors",
1559
- "transformer.h.67.self_attention.dense.qzeros": "model-00008-of-00010.safetensors",
1560
- "transformer.h.67.self_attention.dense.scales": "model-00008-of-00010.safetensors",
1561
- "transformer.h.67.self_attention.query_key_value.bias": "model-00008-of-00010.safetensors",
1562
- "transformer.h.67.self_attention.query_key_value.g_idx": "model-00008-of-00010.safetensors",
1563
- "transformer.h.67.self_attention.query_key_value.qweight": "model-00008-of-00010.safetensors",
1564
- "transformer.h.67.self_attention.query_key_value.qzeros": "model-00008-of-00010.safetensors",
1565
- "transformer.h.67.self_attention.query_key_value.scales": "model-00008-of-00010.safetensors",
1566
- "transformer.h.68.ln_attn.bias": "model-00008-of-00010.safetensors",
1567
- "transformer.h.68.ln_attn.weight": "model-00008-of-00010.safetensors",
1568
- "transformer.h.68.ln_mlp.bias": "model-00008-of-00010.safetensors",
1569
- "transformer.h.68.ln_mlp.weight": "model-00008-of-00010.safetensors",
1570
- "transformer.h.68.mlp.dense_4h_to_h.bias": "model-00008-of-00010.safetensors",
1571
- "transformer.h.68.mlp.dense_4h_to_h.g_idx": "model-00008-of-00010.safetensors",
1572
- "transformer.h.68.mlp.dense_4h_to_h.qweight": "model-00008-of-00010.safetensors",
1573
- "transformer.h.68.mlp.dense_4h_to_h.qzeros": "model-00008-of-00010.safetensors",
1574
- "transformer.h.68.mlp.dense_4h_to_h.scales": "model-00008-of-00010.safetensors",
1575
- "transformer.h.68.mlp.dense_h_to_4h.bias": "model-00008-of-00010.safetensors",
1576
- "transformer.h.68.mlp.dense_h_to_4h.g_idx": "model-00008-of-00010.safetensors",
1577
- "transformer.h.68.mlp.dense_h_to_4h.qweight": "model-00008-of-00010.safetensors",
1578
- "transformer.h.68.mlp.dense_h_to_4h.qzeros": "model-00008-of-00010.safetensors",
1579
- "transformer.h.68.mlp.dense_h_to_4h.scales": "model-00008-of-00010.safetensors",
1580
- "transformer.h.68.self_attention.dense.bias": "model-00008-of-00010.safetensors",
1581
- "transformer.h.68.self_attention.dense.g_idx": "model-00008-of-00010.safetensors",
1582
- "transformer.h.68.self_attention.dense.qweight": "model-00008-of-00010.safetensors",
1583
- "transformer.h.68.self_attention.dense.qzeros": "model-00008-of-00010.safetensors",
1584
- "transformer.h.68.self_attention.dense.scales": "model-00008-of-00010.safetensors",
1585
- "transformer.h.68.self_attention.query_key_value.bias": "model-00008-of-00010.safetensors",
1586
- "transformer.h.68.self_attention.query_key_value.g_idx": "model-00008-of-00010.safetensors",
1587
- "transformer.h.68.self_attention.query_key_value.qweight": "model-00008-of-00010.safetensors",
1588
- "transformer.h.68.self_attention.query_key_value.qzeros": "model-00008-of-00010.safetensors",
1589
- "transformer.h.68.self_attention.query_key_value.scales": "model-00008-of-00010.safetensors",
1590
- "transformer.h.69.ln_attn.bias": "model-00008-of-00010.safetensors",
1591
- "transformer.h.69.ln_attn.weight": "model-00008-of-00010.safetensors",
1592
- "transformer.h.69.ln_mlp.bias": "model-00008-of-00010.safetensors",
1593
- "transformer.h.69.ln_mlp.weight": "model-00008-of-00010.safetensors",
1594
- "transformer.h.69.mlp.dense_4h_to_h.bias": "model-00008-of-00010.safetensors",
1595
- "transformer.h.69.mlp.dense_4h_to_h.g_idx": "model-00008-of-00010.safetensors",
1596
- "transformer.h.69.mlp.dense_4h_to_h.qweight": "model-00008-of-00010.safetensors",
1597
- "transformer.h.69.mlp.dense_4h_to_h.qzeros": "model-00008-of-00010.safetensors",
1598
- "transformer.h.69.mlp.dense_4h_to_h.scales": "model-00008-of-00010.safetensors",
1599
- "transformer.h.69.mlp.dense_h_to_4h.bias": "model-00008-of-00010.safetensors",
1600
- "transformer.h.69.mlp.dense_h_to_4h.g_idx": "model-00008-of-00010.safetensors",
1601
- "transformer.h.69.mlp.dense_h_to_4h.qweight": "model-00008-of-00010.safetensors",
1602
- "transformer.h.69.mlp.dense_h_to_4h.qzeros": "model-00008-of-00010.safetensors",
1603
- "transformer.h.69.mlp.dense_h_to_4h.scales": "model-00008-of-00010.safetensors",
1604
- "transformer.h.69.self_attention.dense.bias": "model-00008-of-00010.safetensors",
1605
- "transformer.h.69.self_attention.dense.g_idx": "model-00008-of-00010.safetensors",
1606
- "transformer.h.69.self_attention.dense.qweight": "model-00008-of-00010.safetensors",
1607
- "transformer.h.69.self_attention.dense.qzeros": "model-00008-of-00010.safetensors",
1608
- "transformer.h.69.self_attention.dense.scales": "model-00008-of-00010.safetensors",
1609
- "transformer.h.69.self_attention.query_key_value.bias": "model-00008-of-00010.safetensors",
1610
- "transformer.h.69.self_attention.query_key_value.g_idx": "model-00008-of-00010.safetensors",
1611
- "transformer.h.69.self_attention.query_key_value.qweight": "model-00008-of-00010.safetensors",
1612
- "transformer.h.69.self_attention.query_key_value.qzeros": "model-00008-of-00010.safetensors",
1613
- "transformer.h.69.self_attention.query_key_value.scales": "model-00008-of-00010.safetensors",
1614
  "transformer.h.7.ln_attn.bias": "model-00002-of-00010.safetensors",
1615
  "transformer.h.7.ln_attn.weight": "model-00002-of-00010.safetensors",
1616
  "transformer.h.7.ln_mlp.bias": "model-00002-of-00010.safetensors",
@@ -1625,16 +1625,16 @@
1625
  "transformer.h.7.mlp.dense_h_to_4h.qweight": "model-00002-of-00010.safetensors",
1626
  "transformer.h.7.mlp.dense_h_to_4h.qzeros": "model-00002-of-00010.safetensors",
1627
  "transformer.h.7.mlp.dense_h_to_4h.scales": "model-00002-of-00010.safetensors",
1628
- "transformer.h.7.self_attention.dense.bias": "model-00001-of-00010.safetensors",
1629
- "transformer.h.7.self_attention.dense.g_idx": "model-00001-of-00010.safetensors",
1630
- "transformer.h.7.self_attention.dense.qweight": "model-00001-of-00010.safetensors",
1631
- "transformer.h.7.self_attention.dense.qzeros": "model-00001-of-00010.safetensors",
1632
- "transformer.h.7.self_attention.dense.scales": "model-00001-of-00010.safetensors",
1633
- "transformer.h.7.self_attention.query_key_value.bias": "model-00001-of-00010.safetensors",
1634
- "transformer.h.7.self_attention.query_key_value.g_idx": "model-00001-of-00010.safetensors",
1635
- "transformer.h.7.self_attention.query_key_value.qweight": "model-00001-of-00010.safetensors",
1636
- "transformer.h.7.self_attention.query_key_value.qzeros": "model-00001-of-00010.safetensors",
1637
- "transformer.h.7.self_attention.query_key_value.scales": "model-00001-of-00010.safetensors",
1638
  "transformer.h.70.ln_attn.bias": "model-00009-of-00010.safetensors",
1639
  "transformer.h.70.ln_attn.weight": "model-00009-of-00010.safetensors",
1640
  "transformer.h.70.ln_mlp.bias": "model-00009-of-00010.safetensors",
@@ -1649,16 +1649,16 @@
1649
  "transformer.h.70.mlp.dense_h_to_4h.qweight": "model-00009-of-00010.safetensors",
1650
  "transformer.h.70.mlp.dense_h_to_4h.qzeros": "model-00009-of-00010.safetensors",
1651
  "transformer.h.70.mlp.dense_h_to_4h.scales": "model-00009-of-00010.safetensors",
1652
- "transformer.h.70.self_attention.dense.bias": "model-00008-of-00010.safetensors",
1653
- "transformer.h.70.self_attention.dense.g_idx": "model-00008-of-00010.safetensors",
1654
- "transformer.h.70.self_attention.dense.qweight": "model-00008-of-00010.safetensors",
1655
- "transformer.h.70.self_attention.dense.qzeros": "model-00008-of-00010.safetensors",
1656
- "transformer.h.70.self_attention.dense.scales": "model-00008-of-00010.safetensors",
1657
- "transformer.h.70.self_attention.query_key_value.bias": "model-00008-of-00010.safetensors",
1658
- "transformer.h.70.self_attention.query_key_value.g_idx": "model-00008-of-00010.safetensors",
1659
- "transformer.h.70.self_attention.query_key_value.qweight": "model-00008-of-00010.safetensors",
1660
- "transformer.h.70.self_attention.query_key_value.qzeros": "model-00008-of-00010.safetensors",
1661
- "transformer.h.70.self_attention.query_key_value.scales": "model-00008-of-00010.safetensors",
1662
  "transformer.h.71.ln_attn.bias": "model-00009-of-00010.safetensors",
1663
  "transformer.h.71.ln_attn.weight": "model-00009-of-00010.safetensors",
1664
  "transformer.h.71.ln_mlp.bias": "model-00009-of-00010.safetensors",
@@ -1755,20 +1755,20 @@
1755
  "transformer.h.74.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
1756
  "transformer.h.74.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
1757
  "transformer.h.74.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
1758
- "transformer.h.75.ln_attn.bias": "model-00009-of-00010.safetensors",
1759
- "transformer.h.75.ln_attn.weight": "model-00009-of-00010.safetensors",
1760
- "transformer.h.75.ln_mlp.bias": "model-00009-of-00010.safetensors",
1761
- "transformer.h.75.ln_mlp.weight": "model-00009-of-00010.safetensors",
1762
- "transformer.h.75.mlp.dense_4h_to_h.bias": "model-00009-of-00010.safetensors",
1763
- "transformer.h.75.mlp.dense_4h_to_h.g_idx": "model-00009-of-00010.safetensors",
1764
- "transformer.h.75.mlp.dense_4h_to_h.qweight": "model-00009-of-00010.safetensors",
1765
- "transformer.h.75.mlp.dense_4h_to_h.qzeros": "model-00009-of-00010.safetensors",
1766
- "transformer.h.75.mlp.dense_4h_to_h.scales": "model-00009-of-00010.safetensors",
1767
- "transformer.h.75.mlp.dense_h_to_4h.bias": "model-00009-of-00010.safetensors",
1768
- "transformer.h.75.mlp.dense_h_to_4h.g_idx": "model-00009-of-00010.safetensors",
1769
- "transformer.h.75.mlp.dense_h_to_4h.qweight": "model-00009-of-00010.safetensors",
1770
- "transformer.h.75.mlp.dense_h_to_4h.qzeros": "model-00009-of-00010.safetensors",
1771
- "transformer.h.75.mlp.dense_h_to_4h.scales": "model-00009-of-00010.safetensors",
1772
  "transformer.h.75.self_attention.dense.bias": "model-00009-of-00010.safetensors",
1773
  "transformer.h.75.self_attention.dense.g_idx": "model-00009-of-00010.safetensors",
1774
  "transformer.h.75.self_attention.dense.qweight": "model-00009-of-00010.safetensors",
@@ -1779,78 +1779,78 @@
1779
  "transformer.h.75.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
1780
  "transformer.h.75.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
1781
  "transformer.h.75.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
1782
- "transformer.h.76.ln_attn.bias": "model-00009-of-00010.safetensors",
1783
- "transformer.h.76.ln_attn.weight": "model-00009-of-00010.safetensors",
1784
- "transformer.h.76.ln_mlp.bias": "model-00009-of-00010.safetensors",
1785
- "transformer.h.76.ln_mlp.weight": "model-00009-of-00010.safetensors",
1786
- "transformer.h.76.mlp.dense_4h_to_h.bias": "model-00009-of-00010.safetensors",
1787
- "transformer.h.76.mlp.dense_4h_to_h.g_idx": "model-00009-of-00010.safetensors",
1788
- "transformer.h.76.mlp.dense_4h_to_h.qweight": "model-00009-of-00010.safetensors",
1789
- "transformer.h.76.mlp.dense_4h_to_h.qzeros": "model-00009-of-00010.safetensors",
1790
- "transformer.h.76.mlp.dense_4h_to_h.scales": "model-00009-of-00010.safetensors",
1791
- "transformer.h.76.mlp.dense_h_to_4h.bias": "model-00009-of-00010.safetensors",
1792
- "transformer.h.76.mlp.dense_h_to_4h.g_idx": "model-00009-of-00010.safetensors",
1793
- "transformer.h.76.mlp.dense_h_to_4h.qweight": "model-00009-of-00010.safetensors",
1794
- "transformer.h.76.mlp.dense_h_to_4h.qzeros": "model-00009-of-00010.safetensors",
1795
- "transformer.h.76.mlp.dense_h_to_4h.scales": "model-00009-of-00010.safetensors",
1796
- "transformer.h.76.self_attention.dense.bias": "model-00009-of-00010.safetensors",
1797
- "transformer.h.76.self_attention.dense.g_idx": "model-00009-of-00010.safetensors",
1798
- "transformer.h.76.self_attention.dense.qweight": "model-00009-of-00010.safetensors",
1799
- "transformer.h.76.self_attention.dense.qzeros": "model-00009-of-00010.safetensors",
1800
- "transformer.h.76.self_attention.dense.scales": "model-00009-of-00010.safetensors",
1801
- "transformer.h.76.self_attention.query_key_value.bias": "model-00009-of-00010.safetensors",
1802
- "transformer.h.76.self_attention.query_key_value.g_idx": "model-00009-of-00010.safetensors",
1803
- "transformer.h.76.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
1804
- "transformer.h.76.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
1805
- "transformer.h.76.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
1806
- "transformer.h.77.ln_attn.bias": "model-00009-of-00010.safetensors",
1807
- "transformer.h.77.ln_attn.weight": "model-00009-of-00010.safetensors",
1808
- "transformer.h.77.ln_mlp.bias": "model-00009-of-00010.safetensors",
1809
- "transformer.h.77.ln_mlp.weight": "model-00009-of-00010.safetensors",
1810
- "transformer.h.77.mlp.dense_4h_to_h.bias": "model-00009-of-00010.safetensors",
1811
- "transformer.h.77.mlp.dense_4h_to_h.g_idx": "model-00009-of-00010.safetensors",
1812
- "transformer.h.77.mlp.dense_4h_to_h.qweight": "model-00009-of-00010.safetensors",
1813
- "transformer.h.77.mlp.dense_4h_to_h.qzeros": "model-00009-of-00010.safetensors",
1814
- "transformer.h.77.mlp.dense_4h_to_h.scales": "model-00009-of-00010.safetensors",
1815
- "transformer.h.77.mlp.dense_h_to_4h.bias": "model-00009-of-00010.safetensors",
1816
- "transformer.h.77.mlp.dense_h_to_4h.g_idx": "model-00009-of-00010.safetensors",
1817
- "transformer.h.77.mlp.dense_h_to_4h.qweight": "model-00009-of-00010.safetensors",
1818
- "transformer.h.77.mlp.dense_h_to_4h.qzeros": "model-00009-of-00010.safetensors",
1819
- "transformer.h.77.mlp.dense_h_to_4h.scales": "model-00009-of-00010.safetensors",
1820
- "transformer.h.77.self_attention.dense.bias": "model-00009-of-00010.safetensors",
1821
- "transformer.h.77.self_attention.dense.g_idx": "model-00009-of-00010.safetensors",
1822
- "transformer.h.77.self_attention.dense.qweight": "model-00009-of-00010.safetensors",
1823
- "transformer.h.77.self_attention.dense.qzeros": "model-00009-of-00010.safetensors",
1824
- "transformer.h.77.self_attention.dense.scales": "model-00009-of-00010.safetensors",
1825
- "transformer.h.77.self_attention.query_key_value.bias": "model-00009-of-00010.safetensors",
1826
- "transformer.h.77.self_attention.query_key_value.g_idx": "model-00009-of-00010.safetensors",
1827
- "transformer.h.77.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
1828
- "transformer.h.77.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
1829
- "transformer.h.77.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
1830
- "transformer.h.78.ln_attn.bias": "model-00009-of-00010.safetensors",
1831
- "transformer.h.78.ln_attn.weight": "model-00009-of-00010.safetensors",
1832
- "transformer.h.78.ln_mlp.bias": "model-00009-of-00010.safetensors",
1833
- "transformer.h.78.ln_mlp.weight": "model-00009-of-00010.safetensors",
1834
- "transformer.h.78.mlp.dense_4h_to_h.bias": "model-00009-of-00010.safetensors",
1835
- "transformer.h.78.mlp.dense_4h_to_h.g_idx": "model-00009-of-00010.safetensors",
1836
- "transformer.h.78.mlp.dense_4h_to_h.qweight": "model-00009-of-00010.safetensors",
1837
- "transformer.h.78.mlp.dense_4h_to_h.qzeros": "model-00009-of-00010.safetensors",
1838
- "transformer.h.78.mlp.dense_4h_to_h.scales": "model-00009-of-00010.safetensors",
1839
- "transformer.h.78.mlp.dense_h_to_4h.bias": "model-00009-of-00010.safetensors",
1840
- "transformer.h.78.mlp.dense_h_to_4h.g_idx": "model-00009-of-00010.safetensors",
1841
- "transformer.h.78.mlp.dense_h_to_4h.qweight": "model-00009-of-00010.safetensors",
1842
- "transformer.h.78.mlp.dense_h_to_4h.qzeros": "model-00009-of-00010.safetensors",
1843
- "transformer.h.78.mlp.dense_h_to_4h.scales": "model-00009-of-00010.safetensors",
1844
- "transformer.h.78.self_attention.dense.bias": "model-00009-of-00010.safetensors",
1845
- "transformer.h.78.self_attention.dense.g_idx": "model-00009-of-00010.safetensors",
1846
- "transformer.h.78.self_attention.dense.qweight": "model-00009-of-00010.safetensors",
1847
- "transformer.h.78.self_attention.dense.qzeros": "model-00009-of-00010.safetensors",
1848
- "transformer.h.78.self_attention.dense.scales": "model-00009-of-00010.safetensors",
1849
- "transformer.h.78.self_attention.query_key_value.bias": "model-00009-of-00010.safetensors",
1850
- "transformer.h.78.self_attention.query_key_value.g_idx": "model-00009-of-00010.safetensors",
1851
- "transformer.h.78.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
1852
- "transformer.h.78.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
1853
- "transformer.h.78.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
1854
  "transformer.h.79.ln_attn.bias": "model-00010-of-00010.safetensors",
1855
  "transformer.h.79.ln_attn.weight": "model-00010-of-00010.safetensors",
1856
  "transformer.h.79.ln_mlp.bias": "model-00010-of-00010.safetensors",
@@ -1865,16 +1865,16 @@
1865
  "transformer.h.79.mlp.dense_h_to_4h.qweight": "model-00010-of-00010.safetensors",
1866
  "transformer.h.79.mlp.dense_h_to_4h.qzeros": "model-00010-of-00010.safetensors",
1867
  "transformer.h.79.mlp.dense_h_to_4h.scales": "model-00010-of-00010.safetensors",
1868
- "transformer.h.79.self_attention.dense.bias": "model-00009-of-00010.safetensors",
1869
- "transformer.h.79.self_attention.dense.g_idx": "model-00009-of-00010.safetensors",
1870
- "transformer.h.79.self_attention.dense.qweight": "model-00009-of-00010.safetensors",
1871
- "transformer.h.79.self_attention.dense.qzeros": "model-00009-of-00010.safetensors",
1872
- "transformer.h.79.self_attention.dense.scales": "model-00009-of-00010.safetensors",
1873
- "transformer.h.79.self_attention.query_key_value.bias": "model-00009-of-00010.safetensors",
1874
- "transformer.h.79.self_attention.query_key_value.g_idx": "model-00009-of-00010.safetensors",
1875
- "transformer.h.79.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
1876
- "transformer.h.79.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
1877
- "transformer.h.79.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
1878
  "transformer.h.8.ln_attn.bias": "model-00002-of-00010.safetensors",
1879
  "transformer.h.8.ln_attn.weight": "model-00002-of-00010.safetensors",
1880
  "transformer.h.8.ln_mlp.bias": "model-00002-of-00010.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 94252476416
4
  },
5
  "weight_map": {
6
  "transformer.h.0.ln_attn.bias": "model-00001-of-00010.safetensors",
 
171
  "transformer.h.14.self_attention.query_key_value.qweight": "model-00002-of-00010.safetensors",
172
  "transformer.h.14.self_attention.query_key_value.qzeros": "model-00002-of-00010.safetensors",
173
  "transformer.h.14.self_attention.query_key_value.scales": "model-00002-of-00010.safetensors",
174
+ "transformer.h.15.ln_attn.bias": "model-00003-of-00010.safetensors",
175
+ "transformer.h.15.ln_attn.weight": "model-00003-of-00010.safetensors",
176
+ "transformer.h.15.ln_mlp.bias": "model-00003-of-00010.safetensors",
177
+ "transformer.h.15.ln_mlp.weight": "model-00003-of-00010.safetensors",
178
  "transformer.h.15.mlp.dense_4h_to_h.bias": "model-00002-of-00010.safetensors",
179
  "transformer.h.15.mlp.dense_4h_to_h.g_idx": "model-00002-of-00010.safetensors",
180
  "transformer.h.15.mlp.dense_4h_to_h.qweight": "model-00002-of-00010.safetensors",
181
  "transformer.h.15.mlp.dense_4h_to_h.qzeros": "model-00002-of-00010.safetensors",
182
  "transformer.h.15.mlp.dense_4h_to_h.scales": "model-00002-of-00010.safetensors",
183
+ "transformer.h.15.mlp.dense_h_to_4h.bias": "model-00003-of-00010.safetensors",
184
+ "transformer.h.15.mlp.dense_h_to_4h.g_idx": "model-00003-of-00010.safetensors",
185
+ "transformer.h.15.mlp.dense_h_to_4h.qweight": "model-00003-of-00010.safetensors",
186
+ "transformer.h.15.mlp.dense_h_to_4h.qzeros": "model-00003-of-00010.safetensors",
187
+ "transformer.h.15.mlp.dense_h_to_4h.scales": "model-00003-of-00010.safetensors",
188
  "transformer.h.15.self_attention.dense.bias": "model-00002-of-00010.safetensors",
189
  "transformer.h.15.self_attention.dense.g_idx": "model-00002-of-00010.safetensors",
190
  "transformer.h.15.self_attention.dense.qweight": "model-00002-of-00010.safetensors",
 
209
  "transformer.h.16.mlp.dense_h_to_4h.qweight": "model-00003-of-00010.safetensors",
210
  "transformer.h.16.mlp.dense_h_to_4h.qzeros": "model-00003-of-00010.safetensors",
211
  "transformer.h.16.mlp.dense_h_to_4h.scales": "model-00003-of-00010.safetensors",
212
+ "transformer.h.16.self_attention.dense.bias": "model-00003-of-00010.safetensors",
213
+ "transformer.h.16.self_attention.dense.g_idx": "model-00003-of-00010.safetensors",
214
+ "transformer.h.16.self_attention.dense.qweight": "model-00003-of-00010.safetensors",
215
+ "transformer.h.16.self_attention.dense.qzeros": "model-00003-of-00010.safetensors",
216
+ "transformer.h.16.self_attention.dense.scales": "model-00003-of-00010.safetensors",
217
+ "transformer.h.16.self_attention.query_key_value.bias": "model-00003-of-00010.safetensors",
218
+ "transformer.h.16.self_attention.query_key_value.g_idx": "model-00003-of-00010.safetensors",
219
+ "transformer.h.16.self_attention.query_key_value.qweight": "model-00003-of-00010.safetensors",
220
+ "transformer.h.16.self_attention.query_key_value.qzeros": "model-00003-of-00010.safetensors",
221
+ "transformer.h.16.self_attention.query_key_value.scales": "model-00003-of-00010.safetensors",
222
  "transformer.h.17.ln_attn.bias": "model-00003-of-00010.safetensors",
223
  "transformer.h.17.ln_attn.weight": "model-00003-of-00010.safetensors",
224
  "transformer.h.17.ln_mlp.bias": "model-00003-of-00010.safetensors",
 
411
  "transformer.h.23.self_attention.query_key_value.qweight": "model-00003-of-00010.safetensors",
412
  "transformer.h.23.self_attention.query_key_value.qzeros": "model-00003-of-00010.safetensors",
413
  "transformer.h.23.self_attention.query_key_value.scales": "model-00003-of-00010.safetensors",
414
+ "transformer.h.24.ln_attn.bias": "model-00004-of-00010.safetensors",
415
+ "transformer.h.24.ln_attn.weight": "model-00004-of-00010.safetensors",
416
+ "transformer.h.24.ln_mlp.bias": "model-00004-of-00010.safetensors",
417
+ "transformer.h.24.ln_mlp.weight": "model-00004-of-00010.safetensors",
418
+ "transformer.h.24.mlp.dense_4h_to_h.bias": "model-00004-of-00010.safetensors",
419
+ "transformer.h.24.mlp.dense_4h_to_h.g_idx": "model-00004-of-00010.safetensors",
420
+ "transformer.h.24.mlp.dense_4h_to_h.qweight": "model-00004-of-00010.safetensors",
421
+ "transformer.h.24.mlp.dense_4h_to_h.qzeros": "model-00004-of-00010.safetensors",
422
+ "transformer.h.24.mlp.dense_4h_to_h.scales": "model-00004-of-00010.safetensors",
423
+ "transformer.h.24.mlp.dense_h_to_4h.bias": "model-00004-of-00010.safetensors",
424
+ "transformer.h.24.mlp.dense_h_to_4h.g_idx": "model-00004-of-00010.safetensors",
425
+ "transformer.h.24.mlp.dense_h_to_4h.qweight": "model-00004-of-00010.safetensors",
426
+ "transformer.h.24.mlp.dense_h_to_4h.qzeros": "model-00004-of-00010.safetensors",
427
+ "transformer.h.24.mlp.dense_h_to_4h.scales": "model-00004-of-00010.safetensors",
428
  "transformer.h.24.self_attention.dense.bias": "model-00003-of-00010.safetensors",
429
  "transformer.h.24.self_attention.dense.g_idx": "model-00003-of-00010.safetensors",
430
  "transformer.h.24.self_attention.dense.qweight": "model-00003-of-00010.safetensors",
 
449
  "transformer.h.25.mlp.dense_h_to_4h.qweight": "model-00004-of-00010.safetensors",
450
  "transformer.h.25.mlp.dense_h_to_4h.qzeros": "model-00004-of-00010.safetensors",
451
  "transformer.h.25.mlp.dense_h_to_4h.scales": "model-00004-of-00010.safetensors",
452
+ "transformer.h.25.self_attention.dense.bias": "model-00004-of-00010.safetensors",
453
+ "transformer.h.25.self_attention.dense.g_idx": "model-00004-of-00010.safetensors",
454
+ "transformer.h.25.self_attention.dense.qweight": "model-00004-of-00010.safetensors",
455
+ "transformer.h.25.self_attention.dense.qzeros": "model-00004-of-00010.safetensors",
456
+ "transformer.h.25.self_attention.dense.scales": "model-00004-of-00010.safetensors",
457
+ "transformer.h.25.self_attention.query_key_value.bias": "model-00004-of-00010.safetensors",
458
+ "transformer.h.25.self_attention.query_key_value.g_idx": "model-00004-of-00010.safetensors",
459
+ "transformer.h.25.self_attention.query_key_value.qweight": "model-00004-of-00010.safetensors",
460
+ "transformer.h.25.self_attention.query_key_value.qzeros": "model-00004-of-00010.safetensors",
461
+ "transformer.h.25.self_attention.query_key_value.scales": "model-00004-of-00010.safetensors",
462
  "transformer.h.26.ln_attn.bias": "model-00004-of-00010.safetensors",
463
  "transformer.h.26.ln_attn.weight": "model-00004-of-00010.safetensors",
464
  "transformer.h.26.ln_mlp.bias": "model-00004-of-00010.safetensors",
 
627
  "transformer.h.31.self_attention.query_key_value.qweight": "model-00004-of-00010.safetensors",
628
  "transformer.h.31.self_attention.query_key_value.qzeros": "model-00004-of-00010.safetensors",
629
  "transformer.h.31.self_attention.query_key_value.scales": "model-00004-of-00010.safetensors",
630
+ "transformer.h.32.ln_attn.bias": "model-00005-of-00010.safetensors",
631
+ "transformer.h.32.ln_attn.weight": "model-00005-of-00010.safetensors",
632
+ "transformer.h.32.ln_mlp.bias": "model-00005-of-00010.safetensors",
633
+ "transformer.h.32.ln_mlp.weight": "model-00005-of-00010.safetensors",
634
  "transformer.h.32.mlp.dense_4h_to_h.bias": "model-00004-of-00010.safetensors",
635
  "transformer.h.32.mlp.dense_4h_to_h.g_idx": "model-00004-of-00010.safetensors",
636
  "transformer.h.32.mlp.dense_4h_to_h.qweight": "model-00004-of-00010.safetensors",
637
  "transformer.h.32.mlp.dense_4h_to_h.qzeros": "model-00004-of-00010.safetensors",
638
  "transformer.h.32.mlp.dense_4h_to_h.scales": "model-00004-of-00010.safetensors",
639
+ "transformer.h.32.mlp.dense_h_to_4h.bias": "model-00005-of-00010.safetensors",
640
+ "transformer.h.32.mlp.dense_h_to_4h.g_idx": "model-00005-of-00010.safetensors",
641
+ "transformer.h.32.mlp.dense_h_to_4h.qweight": "model-00005-of-00010.safetensors",
642
+ "transformer.h.32.mlp.dense_h_to_4h.qzeros": "model-00005-of-00010.safetensors",
643
+ "transformer.h.32.mlp.dense_h_to_4h.scales": "model-00005-of-00010.safetensors",
644
  "transformer.h.32.self_attention.dense.bias": "model-00004-of-00010.safetensors",
645
  "transformer.h.32.self_attention.dense.g_idx": "model-00004-of-00010.safetensors",
646
  "transformer.h.32.self_attention.dense.qweight": "model-00004-of-00010.safetensors",
 
651
  "transformer.h.32.self_attention.query_key_value.qweight": "model-00004-of-00010.safetensors",
652
  "transformer.h.32.self_attention.query_key_value.qzeros": "model-00004-of-00010.safetensors",
653
  "transformer.h.32.self_attention.query_key_value.scales": "model-00004-of-00010.safetensors",
654
+ "transformer.h.33.ln_attn.bias": "model-00005-of-00010.safetensors",
655
+ "transformer.h.33.ln_attn.weight": "model-00005-of-00010.safetensors",
656
+ "transformer.h.33.ln_mlp.bias": "model-00005-of-00010.safetensors",
657
+ "transformer.h.33.ln_mlp.weight": "model-00005-of-00010.safetensors",
658
+ "transformer.h.33.mlp.dense_4h_to_h.bias": "model-00005-of-00010.safetensors",
659
+ "transformer.h.33.mlp.dense_4h_to_h.g_idx": "model-00005-of-00010.safetensors",
660
+ "transformer.h.33.mlp.dense_4h_to_h.qweight": "model-00005-of-00010.safetensors",
661
+ "transformer.h.33.mlp.dense_4h_to_h.qzeros": "model-00005-of-00010.safetensors",
662
+ "transformer.h.33.mlp.dense_4h_to_h.scales": "model-00005-of-00010.safetensors",
663
+ "transformer.h.33.mlp.dense_h_to_4h.bias": "model-00005-of-00010.safetensors",
664
+ "transformer.h.33.mlp.dense_h_to_4h.g_idx": "model-00005-of-00010.safetensors",
665
+ "transformer.h.33.mlp.dense_h_to_4h.qweight": "model-00005-of-00010.safetensors",
666
+ "transformer.h.33.mlp.dense_h_to_4h.qzeros": "model-00005-of-00010.safetensors",
667
+ "transformer.h.33.mlp.dense_h_to_4h.scales": "model-00005-of-00010.safetensors",
668
+ "transformer.h.33.self_attention.dense.bias": "model-00005-of-00010.safetensors",
669
+ "transformer.h.33.self_attention.dense.g_idx": "model-00005-of-00010.safetensors",
670
+ "transformer.h.33.self_attention.dense.qweight": "model-00005-of-00010.safetensors",
671
+ "transformer.h.33.self_attention.dense.qzeros": "model-00005-of-00010.safetensors",
672
+ "transformer.h.33.self_attention.dense.scales": "model-00005-of-00010.safetensors",
673
+ "transformer.h.33.self_attention.query_key_value.bias": "model-00005-of-00010.safetensors",
674
+ "transformer.h.33.self_attention.query_key_value.g_idx": "model-00005-of-00010.safetensors",
675
+ "transformer.h.33.self_attention.query_key_value.qweight": "model-00005-of-00010.safetensors",
676
+ "transformer.h.33.self_attention.query_key_value.qzeros": "model-00005-of-00010.safetensors",
677
+ "transformer.h.33.self_attention.query_key_value.scales": "model-00005-of-00010.safetensors",
678
  "transformer.h.34.ln_attn.bias": "model-00005-of-00010.safetensors",
679
  "transformer.h.34.ln_attn.weight": "model-00005-of-00010.safetensors",
680
  "transformer.h.34.ln_mlp.bias": "model-00005-of-00010.safetensors",
 
689
  "transformer.h.34.mlp.dense_h_to_4h.qweight": "model-00005-of-00010.safetensors",
690
  "transformer.h.34.mlp.dense_h_to_4h.qzeros": "model-00005-of-00010.safetensors",
691
  "transformer.h.34.mlp.dense_h_to_4h.scales": "model-00005-of-00010.safetensors",
692
+ "transformer.h.34.self_attention.dense.bias": "model-00005-of-00010.safetensors",
693
+ "transformer.h.34.self_attention.dense.g_idx": "model-00005-of-00010.safetensors",
694
+ "transformer.h.34.self_attention.dense.qweight": "model-00005-of-00010.safetensors",
695
+ "transformer.h.34.self_attention.dense.qzeros": "model-00005-of-00010.safetensors",
696
+ "transformer.h.34.self_attention.dense.scales": "model-00005-of-00010.safetensors",
697
+ "transformer.h.34.self_attention.query_key_value.bias": "model-00005-of-00010.safetensors",
698
+ "transformer.h.34.self_attention.query_key_value.g_idx": "model-00005-of-00010.safetensors",
699
+ "transformer.h.34.self_attention.query_key_value.qweight": "model-00005-of-00010.safetensors",
700
+ "transformer.h.34.self_attention.query_key_value.qzeros": "model-00005-of-00010.safetensors",
701
+ "transformer.h.34.self_attention.query_key_value.scales": "model-00005-of-00010.safetensors",
702
  "transformer.h.35.ln_attn.bias": "model-00005-of-00010.safetensors",
703
  "transformer.h.35.ln_attn.weight": "model-00005-of-00010.safetensors",
704
  "transformer.h.35.ln_mlp.bias": "model-00005-of-00010.safetensors",
 
867
  "transformer.h.40.self_attention.query_key_value.qweight": "model-00005-of-00010.safetensors",
868
  "transformer.h.40.self_attention.query_key_value.qzeros": "model-00005-of-00010.safetensors",
869
  "transformer.h.40.self_attention.query_key_value.scales": "model-00005-of-00010.safetensors",
870
+ "transformer.h.41.ln_attn.bias": "model-00006-of-00010.safetensors",
871
+ "transformer.h.41.ln_attn.weight": "model-00006-of-00010.safetensors",
872
+ "transformer.h.41.ln_mlp.bias": "model-00006-of-00010.safetensors",
873
+ "transformer.h.41.ln_mlp.weight": "model-00006-of-00010.safetensors",
874
+ "transformer.h.41.mlp.dense_4h_to_h.bias": "model-00006-of-00010.safetensors",
875
+ "transformer.h.41.mlp.dense_4h_to_h.g_idx": "model-00006-of-00010.safetensors",
876
+ "transformer.h.41.mlp.dense_4h_to_h.qweight": "model-00006-of-00010.safetensors",
877
+ "transformer.h.41.mlp.dense_4h_to_h.qzeros": "model-00006-of-00010.safetensors",
878
+ "transformer.h.41.mlp.dense_4h_to_h.scales": "model-00006-of-00010.safetensors",
879
+ "transformer.h.41.mlp.dense_h_to_4h.bias": "model-00006-of-00010.safetensors",
880
+ "transformer.h.41.mlp.dense_h_to_4h.g_idx": "model-00006-of-00010.safetensors",
881
+ "transformer.h.41.mlp.dense_h_to_4h.qweight": "model-00006-of-00010.safetensors",
882
+ "transformer.h.41.mlp.dense_h_to_4h.qzeros": "model-00006-of-00010.safetensors",
883
+ "transformer.h.41.mlp.dense_h_to_4h.scales": "model-00006-of-00010.safetensors",
884
  "transformer.h.41.self_attention.dense.bias": "model-00005-of-00010.safetensors",
885
  "transformer.h.41.self_attention.dense.g_idx": "model-00005-of-00010.safetensors",
886
  "transformer.h.41.self_attention.dense.qweight": "model-00005-of-00010.safetensors",
 
891
  "transformer.h.41.self_attention.query_key_value.qweight": "model-00005-of-00010.safetensors",
892
  "transformer.h.41.self_attention.query_key_value.qzeros": "model-00005-of-00010.safetensors",
893
  "transformer.h.41.self_attention.query_key_value.scales": "model-00005-of-00010.safetensors",
894
+ "transformer.h.42.ln_attn.bias": "model-00006-of-00010.safetensors",
895
+ "transformer.h.42.ln_attn.weight": "model-00006-of-00010.safetensors",
896
+ "transformer.h.42.ln_mlp.bias": "model-00006-of-00010.safetensors",
897
+ "transformer.h.42.ln_mlp.weight": "model-00006-of-00010.safetensors",
898
+ "transformer.h.42.mlp.dense_4h_to_h.bias": "model-00006-of-00010.safetensors",
899
+ "transformer.h.42.mlp.dense_4h_to_h.g_idx": "model-00006-of-00010.safetensors",
900
+ "transformer.h.42.mlp.dense_4h_to_h.qweight": "model-00006-of-00010.safetensors",
901
+ "transformer.h.42.mlp.dense_4h_to_h.qzeros": "model-00006-of-00010.safetensors",
902
+ "transformer.h.42.mlp.dense_4h_to_h.scales": "model-00006-of-00010.safetensors",
903
+ "transformer.h.42.mlp.dense_h_to_4h.bias": "model-00006-of-00010.safetensors",
904
+ "transformer.h.42.mlp.dense_h_to_4h.g_idx": "model-00006-of-00010.safetensors",
905
+ "transformer.h.42.mlp.dense_h_to_4h.qweight": "model-00006-of-00010.safetensors",
906
+ "transformer.h.42.mlp.dense_h_to_4h.qzeros": "model-00006-of-00010.safetensors",
907
+ "transformer.h.42.mlp.dense_h_to_4h.scales": "model-00006-of-00010.safetensors",
908
+ "transformer.h.42.self_attention.dense.bias": "model-00006-of-00010.safetensors",
909
+ "transformer.h.42.self_attention.dense.g_idx": "model-00006-of-00010.safetensors",
910
+ "transformer.h.42.self_attention.dense.qweight": "model-00006-of-00010.safetensors",
911
+ "transformer.h.42.self_attention.dense.qzeros": "model-00006-of-00010.safetensors",
912
+ "transformer.h.42.self_attention.dense.scales": "model-00006-of-00010.safetensors",
913
+ "transformer.h.42.self_attention.query_key_value.bias": "model-00006-of-00010.safetensors",
914
+ "transformer.h.42.self_attention.query_key_value.g_idx": "model-00006-of-00010.safetensors",
915
+ "transformer.h.42.self_attention.query_key_value.qweight": "model-00006-of-00010.safetensors",
916
+ "transformer.h.42.self_attention.query_key_value.qzeros": "model-00006-of-00010.safetensors",
917
+ "transformer.h.42.self_attention.query_key_value.scales": "model-00006-of-00010.safetensors",
918
  "transformer.h.43.ln_attn.bias": "model-00006-of-00010.safetensors",
919
  "transformer.h.43.ln_attn.weight": "model-00006-of-00010.safetensors",
920
  "transformer.h.43.ln_mlp.bias": "model-00006-of-00010.safetensors",
 
929
  "transformer.h.43.mlp.dense_h_to_4h.qweight": "model-00006-of-00010.safetensors",
930
  "transformer.h.43.mlp.dense_h_to_4h.qzeros": "model-00006-of-00010.safetensors",
931
  "transformer.h.43.mlp.dense_h_to_4h.scales": "model-00006-of-00010.safetensors",
932
+ "transformer.h.43.self_attention.dense.bias": "model-00006-of-00010.safetensors",
933
+ "transformer.h.43.self_attention.dense.g_idx": "model-00006-of-00010.safetensors",
934
+ "transformer.h.43.self_attention.dense.qweight": "model-00006-of-00010.safetensors",
935
+ "transformer.h.43.self_attention.dense.qzeros": "model-00006-of-00010.safetensors",
936
+ "transformer.h.43.self_attention.dense.scales": "model-00006-of-00010.safetensors",
937
+ "transformer.h.43.self_attention.query_key_value.bias": "model-00006-of-00010.safetensors",
938
+ "transformer.h.43.self_attention.query_key_value.g_idx": "model-00006-of-00010.safetensors",
939
+ "transformer.h.43.self_attention.query_key_value.qweight": "model-00006-of-00010.safetensors",
940
+ "transformer.h.43.self_attention.query_key_value.qzeros": "model-00006-of-00010.safetensors",
941
+ "transformer.h.43.self_attention.query_key_value.scales": "model-00006-of-00010.safetensors",
942
  "transformer.h.44.ln_attn.bias": "model-00006-of-00010.safetensors",
943
  "transformer.h.44.ln_attn.weight": "model-00006-of-00010.safetensors",
944
  "transformer.h.44.ln_mlp.bias": "model-00006-of-00010.safetensors",
 
1059
  "transformer.h.48.self_attention.query_key_value.qweight": "model-00006-of-00010.safetensors",
1060
  "transformer.h.48.self_attention.query_key_value.qzeros": "model-00006-of-00010.safetensors",
1061
  "transformer.h.48.self_attention.query_key_value.scales": "model-00006-of-00010.safetensors",
1062
+ "transformer.h.49.ln_attn.bias": "model-00007-of-00010.safetensors",
1063
+ "transformer.h.49.ln_attn.weight": "model-00007-of-00010.safetensors",
1064
+ "transformer.h.49.ln_mlp.bias": "model-00007-of-00010.safetensors",
1065
+ "transformer.h.49.ln_mlp.weight": "model-00007-of-00010.safetensors",
1066
  "transformer.h.49.mlp.dense_4h_to_h.bias": "model-00006-of-00010.safetensors",
1067
  "transformer.h.49.mlp.dense_4h_to_h.g_idx": "model-00006-of-00010.safetensors",
1068
  "transformer.h.49.mlp.dense_4h_to_h.qweight": "model-00006-of-00010.safetensors",
1069
  "transformer.h.49.mlp.dense_4h_to_h.qzeros": "model-00006-of-00010.safetensors",
1070
  "transformer.h.49.mlp.dense_4h_to_h.scales": "model-00006-of-00010.safetensors",
1071
+ "transformer.h.49.mlp.dense_h_to_4h.bias": "model-00007-of-00010.safetensors",
1072
+ "transformer.h.49.mlp.dense_h_to_4h.g_idx": "model-00007-of-00010.safetensors",
1073
+ "transformer.h.49.mlp.dense_h_to_4h.qweight": "model-00007-of-00010.safetensors",
1074
+ "transformer.h.49.mlp.dense_h_to_4h.qzeros": "model-00007-of-00010.safetensors",
1075
+ "transformer.h.49.mlp.dense_h_to_4h.scales": "model-00007-of-00010.safetensors",
1076
  "transformer.h.49.self_attention.dense.bias": "model-00006-of-00010.safetensors",
1077
  "transformer.h.49.self_attention.dense.g_idx": "model-00006-of-00010.safetensors",
1078
  "transformer.h.49.self_attention.dense.qweight": "model-00006-of-00010.safetensors",
 
1107
  "transformer.h.5.self_attention.query_key_value.qweight": "model-00001-of-00010.safetensors",
1108
  "transformer.h.5.self_attention.query_key_value.qzeros": "model-00001-of-00010.safetensors",
1109
  "transformer.h.5.self_attention.query_key_value.scales": "model-00001-of-00010.safetensors",
1110
+ "transformer.h.50.ln_attn.bias": "model-00007-of-00010.safetensors",
1111
+ "transformer.h.50.ln_attn.weight": "model-00007-of-00010.safetensors",
1112
+ "transformer.h.50.ln_mlp.bias": "model-00007-of-00010.safetensors",
1113
+ "transformer.h.50.ln_mlp.weight": "model-00007-of-00010.safetensors",
1114
+ "transformer.h.50.mlp.dense_4h_to_h.bias": "model-00007-of-00010.safetensors",
1115
+ "transformer.h.50.mlp.dense_4h_to_h.g_idx": "model-00007-of-00010.safetensors",
1116
+ "transformer.h.50.mlp.dense_4h_to_h.qweight": "model-00007-of-00010.safetensors",
1117
+ "transformer.h.50.mlp.dense_4h_to_h.qzeros": "model-00007-of-00010.safetensors",
1118
+ "transformer.h.50.mlp.dense_4h_to_h.scales": "model-00007-of-00010.safetensors",
1119
+ "transformer.h.50.mlp.dense_h_to_4h.bias": "model-00007-of-00010.safetensors",
1120
+ "transformer.h.50.mlp.dense_h_to_4h.g_idx": "model-00007-of-00010.safetensors",
1121
+ "transformer.h.50.mlp.dense_h_to_4h.qweight": "model-00007-of-00010.safetensors",
1122
+ "transformer.h.50.mlp.dense_h_to_4h.qzeros": "model-00007-of-00010.safetensors",
1123
+ "transformer.h.50.mlp.dense_h_to_4h.scales": "model-00007-of-00010.safetensors",
1124
+ "transformer.h.50.self_attention.dense.bias": "model-00007-of-00010.safetensors",
1125
+ "transformer.h.50.self_attention.dense.g_idx": "model-00007-of-00010.safetensors",
1126
+ "transformer.h.50.self_attention.dense.qweight": "model-00007-of-00010.safetensors",
1127
+ "transformer.h.50.self_attention.dense.qzeros": "model-00007-of-00010.safetensors",
1128
+ "transformer.h.50.self_attention.dense.scales": "model-00007-of-00010.safetensors",
1129
+ "transformer.h.50.self_attention.query_key_value.bias": "model-00007-of-00010.safetensors",
1130
+ "transformer.h.50.self_attention.query_key_value.g_idx": "model-00007-of-00010.safetensors",
1131
+ "transformer.h.50.self_attention.query_key_value.qweight": "model-00007-of-00010.safetensors",
1132
+ "transformer.h.50.self_attention.query_key_value.qzeros": "model-00007-of-00010.safetensors",
1133
+ "transformer.h.50.self_attention.query_key_value.scales": "model-00007-of-00010.safetensors",
1134
+ "transformer.h.51.ln_attn.bias": "model-00007-of-00010.safetensors",
1135
+ "transformer.h.51.ln_attn.weight": "model-00007-of-00010.safetensors",
1136
+ "transformer.h.51.ln_mlp.bias": "model-00007-of-00010.safetensors",
1137
+ "transformer.h.51.ln_mlp.weight": "model-00007-of-00010.safetensors",
1138
+ "transformer.h.51.mlp.dense_4h_to_h.bias": "model-00007-of-00010.safetensors",
1139
+ "transformer.h.51.mlp.dense_4h_to_h.g_idx": "model-00007-of-00010.safetensors",
1140
+ "transformer.h.51.mlp.dense_4h_to_h.qweight": "model-00007-of-00010.safetensors",
1141
+ "transformer.h.51.mlp.dense_4h_to_h.qzeros": "model-00007-of-00010.safetensors",
1142
+ "transformer.h.51.mlp.dense_4h_to_h.scales": "model-00007-of-00010.safetensors",
1143
+ "transformer.h.51.mlp.dense_h_to_4h.bias": "model-00007-of-00010.safetensors",
1144
+ "transformer.h.51.mlp.dense_h_to_4h.g_idx": "model-00007-of-00010.safetensors",
1145
+ "transformer.h.51.mlp.dense_h_to_4h.qweight": "model-00007-of-00010.safetensors",
1146
+ "transformer.h.51.mlp.dense_h_to_4h.qzeros": "model-00007-of-00010.safetensors",
1147
+ "transformer.h.51.mlp.dense_h_to_4h.scales": "model-00007-of-00010.safetensors",
1148
+ "transformer.h.51.self_attention.dense.bias": "model-00007-of-00010.safetensors",
1149
+ "transformer.h.51.self_attention.dense.g_idx": "model-00007-of-00010.safetensors",
1150
+ "transformer.h.51.self_attention.dense.qweight": "model-00007-of-00010.safetensors",
1151
+ "transformer.h.51.self_attention.dense.qzeros": "model-00007-of-00010.safetensors",
1152
+ "transformer.h.51.self_attention.dense.scales": "model-00007-of-00010.safetensors",
1153
+ "transformer.h.51.self_attention.query_key_value.bias": "model-00007-of-00010.safetensors",
1154
+ "transformer.h.51.self_attention.query_key_value.g_idx": "model-00007-of-00010.safetensors",
1155
+ "transformer.h.51.self_attention.query_key_value.qweight": "model-00007-of-00010.safetensors",
1156
+ "transformer.h.51.self_attention.query_key_value.qzeros": "model-00007-of-00010.safetensors",
1157
+ "transformer.h.51.self_attention.query_key_value.scales": "model-00007-of-00010.safetensors",
1158
  "transformer.h.52.ln_attn.bias": "model-00007-of-00010.safetensors",
1159
  "transformer.h.52.ln_attn.weight": "model-00007-of-00010.safetensors",
1160
  "transformer.h.52.ln_mlp.bias": "model-00007-of-00010.safetensors",
 
1169
  "transformer.h.52.mlp.dense_h_to_4h.qweight": "model-00007-of-00010.safetensors",
1170
  "transformer.h.52.mlp.dense_h_to_4h.qzeros": "model-00007-of-00010.safetensors",
1171
  "transformer.h.52.mlp.dense_h_to_4h.scales": "model-00007-of-00010.safetensors",
1172
+ "transformer.h.52.self_attention.dense.bias": "model-00007-of-00010.safetensors",
1173
+ "transformer.h.52.self_attention.dense.g_idx": "model-00007-of-00010.safetensors",
1174
+ "transformer.h.52.self_attention.dense.qweight": "model-00007-of-00010.safetensors",
1175
+ "transformer.h.52.self_attention.dense.qzeros": "model-00007-of-00010.safetensors",
1176
+ "transformer.h.52.self_attention.dense.scales": "model-00007-of-00010.safetensors",
1177
+ "transformer.h.52.self_attention.query_key_value.bias": "model-00007-of-00010.safetensors",
1178
+ "transformer.h.52.self_attention.query_key_value.g_idx": "model-00007-of-00010.safetensors",
1179
+ "transformer.h.52.self_attention.query_key_value.qweight": "model-00007-of-00010.safetensors",
1180
+ "transformer.h.52.self_attention.query_key_value.qzeros": "model-00007-of-00010.safetensors",
1181
+ "transformer.h.52.self_attention.query_key_value.scales": "model-00007-of-00010.safetensors",
1182
  "transformer.h.53.ln_attn.bias": "model-00007-of-00010.safetensors",
1183
  "transformer.h.53.ln_attn.weight": "model-00007-of-00010.safetensors",
1184
  "transformer.h.53.ln_mlp.bias": "model-00007-of-00010.safetensors",
 
1299
  "transformer.h.57.self_attention.query_key_value.qweight": "model-00007-of-00010.safetensors",
1300
  "transformer.h.57.self_attention.query_key_value.qzeros": "model-00007-of-00010.safetensors",
1301
  "transformer.h.57.self_attention.query_key_value.scales": "model-00007-of-00010.safetensors",
1302
+ "transformer.h.58.ln_attn.bias": "model-00008-of-00010.safetensors",
1303
+ "transformer.h.58.ln_attn.weight": "model-00008-of-00010.safetensors",
1304
+ "transformer.h.58.ln_mlp.bias": "model-00008-of-00010.safetensors",
1305
+ "transformer.h.58.ln_mlp.weight": "model-00008-of-00010.safetensors",
1306
+ "transformer.h.58.mlp.dense_4h_to_h.bias": "model-00008-of-00010.safetensors",
1307
+ "transformer.h.58.mlp.dense_4h_to_h.g_idx": "model-00008-of-00010.safetensors",
1308
+ "transformer.h.58.mlp.dense_4h_to_h.qweight": "model-00008-of-00010.safetensors",
1309
+ "transformer.h.58.mlp.dense_4h_to_h.qzeros": "model-00008-of-00010.safetensors",
1310
+ "transformer.h.58.mlp.dense_4h_to_h.scales": "model-00008-of-00010.safetensors",
1311
+ "transformer.h.58.mlp.dense_h_to_4h.bias": "model-00008-of-00010.safetensors",
1312
+ "transformer.h.58.mlp.dense_h_to_4h.g_idx": "model-00008-of-00010.safetensors",
1313
+ "transformer.h.58.mlp.dense_h_to_4h.qweight": "model-00008-of-00010.safetensors",
1314
+ "transformer.h.58.mlp.dense_h_to_4h.qzeros": "model-00008-of-00010.safetensors",
1315
+ "transformer.h.58.mlp.dense_h_to_4h.scales": "model-00008-of-00010.safetensors",
1316
  "transformer.h.58.self_attention.dense.bias": "model-00007-of-00010.safetensors",
1317
  "transformer.h.58.self_attention.dense.g_idx": "model-00007-of-00010.safetensors",
1318
  "transformer.h.58.self_attention.dense.qweight": "model-00007-of-00010.safetensors",
 
1323
  "transformer.h.58.self_attention.query_key_value.qweight": "model-00007-of-00010.safetensors",
1324
  "transformer.h.58.self_attention.query_key_value.qzeros": "model-00007-of-00010.safetensors",
1325
  "transformer.h.58.self_attention.query_key_value.scales": "model-00007-of-00010.safetensors",
1326
+ "transformer.h.59.ln_attn.bias": "model-00008-of-00010.safetensors",
1327
+ "transformer.h.59.ln_attn.weight": "model-00008-of-00010.safetensors",
1328
+ "transformer.h.59.ln_mlp.bias": "model-00008-of-00010.safetensors",
1329
+ "transformer.h.59.ln_mlp.weight": "model-00008-of-00010.safetensors",
1330
+ "transformer.h.59.mlp.dense_4h_to_h.bias": "model-00008-of-00010.safetensors",
1331
+ "transformer.h.59.mlp.dense_4h_to_h.g_idx": "model-00008-of-00010.safetensors",
1332
+ "transformer.h.59.mlp.dense_4h_to_h.qweight": "model-00008-of-00010.safetensors",
1333
+ "transformer.h.59.mlp.dense_4h_to_h.qzeros": "model-00008-of-00010.safetensors",
1334
+ "transformer.h.59.mlp.dense_4h_to_h.scales": "model-00008-of-00010.safetensors",
1335
+ "transformer.h.59.mlp.dense_h_to_4h.bias": "model-00008-of-00010.safetensors",
1336
+ "transformer.h.59.mlp.dense_h_to_4h.g_idx": "model-00008-of-00010.safetensors",
1337
+ "transformer.h.59.mlp.dense_h_to_4h.qweight": "model-00008-of-00010.safetensors",
1338
+ "transformer.h.59.mlp.dense_h_to_4h.qzeros": "model-00008-of-00010.safetensors",
1339
+ "transformer.h.59.mlp.dense_h_to_4h.scales": "model-00008-of-00010.safetensors",
1340
+ "transformer.h.59.self_attention.dense.bias": "model-00008-of-00010.safetensors",
1341
+ "transformer.h.59.self_attention.dense.g_idx": "model-00008-of-00010.safetensors",
1342
+ "transformer.h.59.self_attention.dense.qweight": "model-00008-of-00010.safetensors",
1343
+ "transformer.h.59.self_attention.dense.qzeros": "model-00008-of-00010.safetensors",
1344
+ "transformer.h.59.self_attention.dense.scales": "model-00008-of-00010.safetensors",
1345
+ "transformer.h.59.self_attention.query_key_value.bias": "model-00008-of-00010.safetensors",
1346
+ "transformer.h.59.self_attention.query_key_value.g_idx": "model-00008-of-00010.safetensors",
1347
+ "transformer.h.59.self_attention.query_key_value.qweight": "model-00008-of-00010.safetensors",
1348
+ "transformer.h.59.self_attention.query_key_value.qzeros": "model-00008-of-00010.safetensors",
1349
+ "transformer.h.59.self_attention.query_key_value.scales": "model-00008-of-00010.safetensors",
1350
+ "transformer.h.6.ln_attn.bias": "model-00002-of-00010.safetensors",
1351
+ "transformer.h.6.ln_attn.weight": "model-00002-of-00010.safetensors",
1352
+ "transformer.h.6.ln_mlp.bias": "model-00002-of-00010.safetensors",
1353
+ "transformer.h.6.ln_mlp.weight": "model-00002-of-00010.safetensors",
1354
  "transformer.h.6.mlp.dense_4h_to_h.bias": "model-00001-of-00010.safetensors",
1355
  "transformer.h.6.mlp.dense_4h_to_h.g_idx": "model-00001-of-00010.safetensors",
1356
  "transformer.h.6.mlp.dense_4h_to_h.qweight": "model-00001-of-00010.safetensors",
1357
  "transformer.h.6.mlp.dense_4h_to_h.qzeros": "model-00001-of-00010.safetensors",
1358
  "transformer.h.6.mlp.dense_4h_to_h.scales": "model-00001-of-00010.safetensors",
1359
+ "transformer.h.6.mlp.dense_h_to_4h.bias": "model-00002-of-00010.safetensors",
1360
+ "transformer.h.6.mlp.dense_h_to_4h.g_idx": "model-00002-of-00010.safetensors",
1361
  "transformer.h.6.mlp.dense_h_to_4h.qweight": "model-00001-of-00010.safetensors",
1362
  "transformer.h.6.mlp.dense_h_to_4h.qzeros": "model-00001-of-00010.safetensors",
1363
+ "transformer.h.6.mlp.dense_h_to_4h.scales": "model-00002-of-00010.safetensors",
1364
  "transformer.h.6.self_attention.dense.bias": "model-00001-of-00010.safetensors",
1365
  "transformer.h.6.self_attention.dense.g_idx": "model-00001-of-00010.safetensors",
1366
  "transformer.h.6.self_attention.dense.qweight": "model-00001-of-00010.safetensors",
 
1371
  "transformer.h.6.self_attention.query_key_value.qweight": "model-00001-of-00010.safetensors",
1372
  "transformer.h.6.self_attention.query_key_value.qzeros": "model-00001-of-00010.safetensors",
1373
  "transformer.h.6.self_attention.query_key_value.scales": "model-00001-of-00010.safetensors",
1374
+ "transformer.h.60.ln_attn.bias": "model-00008-of-00010.safetensors",
1375
+ "transformer.h.60.ln_attn.weight": "model-00008-of-00010.safetensors",
1376
+ "transformer.h.60.ln_mlp.bias": "model-00008-of-00010.safetensors",
1377
+ "transformer.h.60.ln_mlp.weight": "model-00008-of-00010.safetensors",
1378
+ "transformer.h.60.mlp.dense_4h_to_h.bias": "model-00008-of-00010.safetensors",
1379
+ "transformer.h.60.mlp.dense_4h_to_h.g_idx": "model-00008-of-00010.safetensors",
1380
+ "transformer.h.60.mlp.dense_4h_to_h.qweight": "model-00008-of-00010.safetensors",
1381
+ "transformer.h.60.mlp.dense_4h_to_h.qzeros": "model-00008-of-00010.safetensors",
1382
+ "transformer.h.60.mlp.dense_4h_to_h.scales": "model-00008-of-00010.safetensors",
1383
+ "transformer.h.60.mlp.dense_h_to_4h.bias": "model-00008-of-00010.safetensors",
1384
+ "transformer.h.60.mlp.dense_h_to_4h.g_idx": "model-00008-of-00010.safetensors",
1385
+ "transformer.h.60.mlp.dense_h_to_4h.qweight": "model-00008-of-00010.safetensors",
1386
+ "transformer.h.60.mlp.dense_h_to_4h.qzeros": "model-00008-of-00010.safetensors",
1387
+ "transformer.h.60.mlp.dense_h_to_4h.scales": "model-00008-of-00010.safetensors",
1388
+ "transformer.h.60.self_attention.dense.bias": "model-00008-of-00010.safetensors",
1389
+ "transformer.h.60.self_attention.dense.g_idx": "model-00008-of-00010.safetensors",
1390
+ "transformer.h.60.self_attention.dense.qweight": "model-00008-of-00010.safetensors",
1391
+ "transformer.h.60.self_attention.dense.qzeros": "model-00008-of-00010.safetensors",
1392
+ "transformer.h.60.self_attention.dense.scales": "model-00008-of-00010.safetensors",
1393
+ "transformer.h.60.self_attention.query_key_value.bias": "model-00008-of-00010.safetensors",
1394
+ "transformer.h.60.self_attention.query_key_value.g_idx": "model-00008-of-00010.safetensors",
1395
+ "transformer.h.60.self_attention.query_key_value.qweight": "model-00008-of-00010.safetensors",
1396
+ "transformer.h.60.self_attention.query_key_value.qzeros": "model-00008-of-00010.safetensors",
1397
+ "transformer.h.60.self_attention.query_key_value.scales": "model-00008-of-00010.safetensors",
1398
  "transformer.h.61.ln_attn.bias": "model-00008-of-00010.safetensors",
1399
  "transformer.h.61.ln_attn.weight": "model-00008-of-00010.safetensors",
1400
  "transformer.h.61.ln_mlp.bias": "model-00008-of-00010.safetensors",
 
1409
  "transformer.h.61.mlp.dense_h_to_4h.qweight": "model-00008-of-00010.safetensors",
1410
  "transformer.h.61.mlp.dense_h_to_4h.qzeros": "model-00008-of-00010.safetensors",
1411
  "transformer.h.61.mlp.dense_h_to_4h.scales": "model-00008-of-00010.safetensors",
1412
+ "transformer.h.61.self_attention.dense.bias": "model-00008-of-00010.safetensors",
1413
+ "transformer.h.61.self_attention.dense.g_idx": "model-00008-of-00010.safetensors",
1414
+ "transformer.h.61.self_attention.dense.qweight": "model-00008-of-00010.safetensors",
1415
+ "transformer.h.61.self_attention.dense.qzeros": "model-00008-of-00010.safetensors",
1416
+ "transformer.h.61.self_attention.dense.scales": "model-00008-of-00010.safetensors",
1417
+ "transformer.h.61.self_attention.query_key_value.bias": "model-00008-of-00010.safetensors",
1418
+ "transformer.h.61.self_attention.query_key_value.g_idx": "model-00008-of-00010.safetensors",
1419
+ "transformer.h.61.self_attention.query_key_value.qweight": "model-00008-of-00010.safetensors",
1420
+ "transformer.h.61.self_attention.query_key_value.qzeros": "model-00008-of-00010.safetensors",
1421
+ "transformer.h.61.self_attention.query_key_value.scales": "model-00008-of-00010.safetensors",
1422
  "transformer.h.62.ln_attn.bias": "model-00008-of-00010.safetensors",
1423
  "transformer.h.62.ln_attn.weight": "model-00008-of-00010.safetensors",
1424
  "transformer.h.62.ln_mlp.bias": "model-00008-of-00010.safetensors",
 
1515
  "transformer.h.65.self_attention.query_key_value.qweight": "model-00008-of-00010.safetensors",
1516
  "transformer.h.65.self_attention.query_key_value.qzeros": "model-00008-of-00010.safetensors",
1517
  "transformer.h.65.self_attention.query_key_value.scales": "model-00008-of-00010.safetensors",
1518
+ "transformer.h.66.ln_attn.bias": "model-00009-of-00010.safetensors",
1519
+ "transformer.h.66.ln_attn.weight": "model-00009-of-00010.safetensors",
1520
+ "transformer.h.66.ln_mlp.bias": "model-00009-of-00010.safetensors",
1521
+ "transformer.h.66.ln_mlp.weight": "model-00009-of-00010.safetensors",
1522
  "transformer.h.66.mlp.dense_4h_to_h.bias": "model-00008-of-00010.safetensors",
1523
  "transformer.h.66.mlp.dense_4h_to_h.g_idx": "model-00008-of-00010.safetensors",
1524
  "transformer.h.66.mlp.dense_4h_to_h.qweight": "model-00008-of-00010.safetensors",
1525
  "transformer.h.66.mlp.dense_4h_to_h.qzeros": "model-00008-of-00010.safetensors",
1526
  "transformer.h.66.mlp.dense_4h_to_h.scales": "model-00008-of-00010.safetensors",
1527
+ "transformer.h.66.mlp.dense_h_to_4h.bias": "model-00009-of-00010.safetensors",
1528
+ "transformer.h.66.mlp.dense_h_to_4h.g_idx": "model-00009-of-00010.safetensors",
1529
+ "transformer.h.66.mlp.dense_h_to_4h.qweight": "model-00009-of-00010.safetensors",
1530
+ "transformer.h.66.mlp.dense_h_to_4h.qzeros": "model-00009-of-00010.safetensors",
1531
+ "transformer.h.66.mlp.dense_h_to_4h.scales": "model-00009-of-00010.safetensors",
1532
  "transformer.h.66.self_attention.dense.bias": "model-00008-of-00010.safetensors",
1533
  "transformer.h.66.self_attention.dense.g_idx": "model-00008-of-00010.safetensors",
1534
  "transformer.h.66.self_attention.dense.qweight": "model-00008-of-00010.safetensors",
 
1539
  "transformer.h.66.self_attention.query_key_value.qweight": "model-00008-of-00010.safetensors",
1540
  "transformer.h.66.self_attention.query_key_value.qzeros": "model-00008-of-00010.safetensors",
1541
  "transformer.h.66.self_attention.query_key_value.scales": "model-00008-of-00010.safetensors",
1542
+ "transformer.h.67.ln_attn.bias": "model-00009-of-00010.safetensors",
1543
+ "transformer.h.67.ln_attn.weight": "model-00009-of-00010.safetensors",
1544
+ "transformer.h.67.ln_mlp.bias": "model-00009-of-00010.safetensors",
1545
+ "transformer.h.67.ln_mlp.weight": "model-00009-of-00010.safetensors",
1546
+ "transformer.h.67.mlp.dense_4h_to_h.bias": "model-00009-of-00010.safetensors",
1547
+ "transformer.h.67.mlp.dense_4h_to_h.g_idx": "model-00009-of-00010.safetensors",
1548
+ "transformer.h.67.mlp.dense_4h_to_h.qweight": "model-00009-of-00010.safetensors",
1549
+ "transformer.h.67.mlp.dense_4h_to_h.qzeros": "model-00009-of-00010.safetensors",
1550
+ "transformer.h.67.mlp.dense_4h_to_h.scales": "model-00009-of-00010.safetensors",
1551
+ "transformer.h.67.mlp.dense_h_to_4h.bias": "model-00009-of-00010.safetensors",
1552
+ "transformer.h.67.mlp.dense_h_to_4h.g_idx": "model-00009-of-00010.safetensors",
1553
+ "transformer.h.67.mlp.dense_h_to_4h.qweight": "model-00009-of-00010.safetensors",
1554
+ "transformer.h.67.mlp.dense_h_to_4h.qzeros": "model-00009-of-00010.safetensors",
1555
+ "transformer.h.67.mlp.dense_h_to_4h.scales": "model-00009-of-00010.safetensors",
1556
+ "transformer.h.67.self_attention.dense.bias": "model-00009-of-00010.safetensors",
1557
+ "transformer.h.67.self_attention.dense.g_idx": "model-00009-of-00010.safetensors",
1558
+ "transformer.h.67.self_attention.dense.qweight": "model-00009-of-00010.safetensors",
1559
+ "transformer.h.67.self_attention.dense.qzeros": "model-00009-of-00010.safetensors",
1560
+ "transformer.h.67.self_attention.dense.scales": "model-00009-of-00010.safetensors",
1561
+ "transformer.h.67.self_attention.query_key_value.bias": "model-00009-of-00010.safetensors",
1562
+ "transformer.h.67.self_attention.query_key_value.g_idx": "model-00009-of-00010.safetensors",
1563
+ "transformer.h.67.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
1564
+ "transformer.h.67.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
1565
+ "transformer.h.67.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
1566
+ "transformer.h.68.ln_attn.bias": "model-00009-of-00010.safetensors",
1567
+ "transformer.h.68.ln_attn.weight": "model-00009-of-00010.safetensors",
1568
+ "transformer.h.68.ln_mlp.bias": "model-00009-of-00010.safetensors",
1569
+ "transformer.h.68.ln_mlp.weight": "model-00009-of-00010.safetensors",
1570
+ "transformer.h.68.mlp.dense_4h_to_h.bias": "model-00009-of-00010.safetensors",
1571
+ "transformer.h.68.mlp.dense_4h_to_h.g_idx": "model-00009-of-00010.safetensors",
1572
+ "transformer.h.68.mlp.dense_4h_to_h.qweight": "model-00009-of-00010.safetensors",
1573
+ "transformer.h.68.mlp.dense_4h_to_h.qzeros": "model-00009-of-00010.safetensors",
1574
+ "transformer.h.68.mlp.dense_4h_to_h.scales": "model-00009-of-00010.safetensors",
1575
+ "transformer.h.68.mlp.dense_h_to_4h.bias": "model-00009-of-00010.safetensors",
1576
+ "transformer.h.68.mlp.dense_h_to_4h.g_idx": "model-00009-of-00010.safetensors",
1577
+ "transformer.h.68.mlp.dense_h_to_4h.qweight": "model-00009-of-00010.safetensors",
1578
+ "transformer.h.68.mlp.dense_h_to_4h.qzeros": "model-00009-of-00010.safetensors",
1579
+ "transformer.h.68.mlp.dense_h_to_4h.scales": "model-00009-of-00010.safetensors",
1580
+ "transformer.h.68.self_attention.dense.bias": "model-00009-of-00010.safetensors",
1581
+ "transformer.h.68.self_attention.dense.g_idx": "model-00009-of-00010.safetensors",
1582
+ "transformer.h.68.self_attention.dense.qweight": "model-00009-of-00010.safetensors",
1583
+ "transformer.h.68.self_attention.dense.qzeros": "model-00009-of-00010.safetensors",
1584
+ "transformer.h.68.self_attention.dense.scales": "model-00009-of-00010.safetensors",
1585
+ "transformer.h.68.self_attention.query_key_value.bias": "model-00009-of-00010.safetensors",
1586
+ "transformer.h.68.self_attention.query_key_value.g_idx": "model-00009-of-00010.safetensors",
1587
+ "transformer.h.68.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
1588
+ "transformer.h.68.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
1589
+ "transformer.h.68.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
1590
+ "transformer.h.69.ln_attn.bias": "model-00009-of-00010.safetensors",
1591
+ "transformer.h.69.ln_attn.weight": "model-00009-of-00010.safetensors",
1592
+ "transformer.h.69.ln_mlp.bias": "model-00009-of-00010.safetensors",
1593
+ "transformer.h.69.ln_mlp.weight": "model-00009-of-00010.safetensors",
1594
+ "transformer.h.69.mlp.dense_4h_to_h.bias": "model-00009-of-00010.safetensors",
1595
+ "transformer.h.69.mlp.dense_4h_to_h.g_idx": "model-00009-of-00010.safetensors",
1596
+ "transformer.h.69.mlp.dense_4h_to_h.qweight": "model-00009-of-00010.safetensors",
1597
+ "transformer.h.69.mlp.dense_4h_to_h.qzeros": "model-00009-of-00010.safetensors",
1598
+ "transformer.h.69.mlp.dense_4h_to_h.scales": "model-00009-of-00010.safetensors",
1599
+ "transformer.h.69.mlp.dense_h_to_4h.bias": "model-00009-of-00010.safetensors",
1600
+ "transformer.h.69.mlp.dense_h_to_4h.g_idx": "model-00009-of-00010.safetensors",
1601
+ "transformer.h.69.mlp.dense_h_to_4h.qweight": "model-00009-of-00010.safetensors",
1602
+ "transformer.h.69.mlp.dense_h_to_4h.qzeros": "model-00009-of-00010.safetensors",
1603
+ "transformer.h.69.mlp.dense_h_to_4h.scales": "model-00009-of-00010.safetensors",
1604
+ "transformer.h.69.self_attention.dense.bias": "model-00009-of-00010.safetensors",
1605
+ "transformer.h.69.self_attention.dense.g_idx": "model-00009-of-00010.safetensors",
1606
+ "transformer.h.69.self_attention.dense.qweight": "model-00009-of-00010.safetensors",
1607
+ "transformer.h.69.self_attention.dense.qzeros": "model-00009-of-00010.safetensors",
1608
+ "transformer.h.69.self_attention.dense.scales": "model-00009-of-00010.safetensors",
1609
+ "transformer.h.69.self_attention.query_key_value.bias": "model-00009-of-00010.safetensors",
1610
+ "transformer.h.69.self_attention.query_key_value.g_idx": "model-00009-of-00010.safetensors",
1611
+ "transformer.h.69.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
1612
+ "transformer.h.69.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
1613
+ "transformer.h.69.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
1614
  "transformer.h.7.ln_attn.bias": "model-00002-of-00010.safetensors",
1615
  "transformer.h.7.ln_attn.weight": "model-00002-of-00010.safetensors",
1616
  "transformer.h.7.ln_mlp.bias": "model-00002-of-00010.safetensors",
 
1625
  "transformer.h.7.mlp.dense_h_to_4h.qweight": "model-00002-of-00010.safetensors",
1626
  "transformer.h.7.mlp.dense_h_to_4h.qzeros": "model-00002-of-00010.safetensors",
1627
  "transformer.h.7.mlp.dense_h_to_4h.scales": "model-00002-of-00010.safetensors",
1628
+ "transformer.h.7.self_attention.dense.bias": "model-00002-of-00010.safetensors",
1629
+ "transformer.h.7.self_attention.dense.g_idx": "model-00002-of-00010.safetensors",
1630
+ "transformer.h.7.self_attention.dense.qweight": "model-00002-of-00010.safetensors",
1631
+ "transformer.h.7.self_attention.dense.qzeros": "model-00002-of-00010.safetensors",
1632
+ "transformer.h.7.self_attention.dense.scales": "model-00002-of-00010.safetensors",
1633
+ "transformer.h.7.self_attention.query_key_value.bias": "model-00002-of-00010.safetensors",
1634
+ "transformer.h.7.self_attention.query_key_value.g_idx": "model-00002-of-00010.safetensors",
1635
+ "transformer.h.7.self_attention.query_key_value.qweight": "model-00002-of-00010.safetensors",
1636
+ "transformer.h.7.self_attention.query_key_value.qzeros": "model-00002-of-00010.safetensors",
1637
+ "transformer.h.7.self_attention.query_key_value.scales": "model-00002-of-00010.safetensors",
1638
  "transformer.h.70.ln_attn.bias": "model-00009-of-00010.safetensors",
1639
  "transformer.h.70.ln_attn.weight": "model-00009-of-00010.safetensors",
1640
  "transformer.h.70.ln_mlp.bias": "model-00009-of-00010.safetensors",
 
1649
  "transformer.h.70.mlp.dense_h_to_4h.qweight": "model-00009-of-00010.safetensors",
1650
  "transformer.h.70.mlp.dense_h_to_4h.qzeros": "model-00009-of-00010.safetensors",
1651
  "transformer.h.70.mlp.dense_h_to_4h.scales": "model-00009-of-00010.safetensors",
1652
+ "transformer.h.70.self_attention.dense.bias": "model-00009-of-00010.safetensors",
1653
+ "transformer.h.70.self_attention.dense.g_idx": "model-00009-of-00010.safetensors",
1654
+ "transformer.h.70.self_attention.dense.qweight": "model-00009-of-00010.safetensors",
1655
+ "transformer.h.70.self_attention.dense.qzeros": "model-00009-of-00010.safetensors",
1656
+ "transformer.h.70.self_attention.dense.scales": "model-00009-of-00010.safetensors",
1657
+ "transformer.h.70.self_attention.query_key_value.bias": "model-00009-of-00010.safetensors",
1658
+ "transformer.h.70.self_attention.query_key_value.g_idx": "model-00009-of-00010.safetensors",
1659
+ "transformer.h.70.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
1660
+ "transformer.h.70.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
1661
+ "transformer.h.70.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
1662
  "transformer.h.71.ln_attn.bias": "model-00009-of-00010.safetensors",
1663
  "transformer.h.71.ln_attn.weight": "model-00009-of-00010.safetensors",
1664
  "transformer.h.71.ln_mlp.bias": "model-00009-of-00010.safetensors",
 
1755
  "transformer.h.74.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
1756
  "transformer.h.74.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
1757
  "transformer.h.74.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
1758
+ "transformer.h.75.ln_attn.bias": "model-00010-of-00010.safetensors",
1759
+ "transformer.h.75.ln_attn.weight": "model-00010-of-00010.safetensors",
1760
+ "transformer.h.75.ln_mlp.bias": "model-00010-of-00010.safetensors",
1761
+ "transformer.h.75.ln_mlp.weight": "model-00010-of-00010.safetensors",
1762
+ "transformer.h.75.mlp.dense_4h_to_h.bias": "model-00010-of-00010.safetensors",
1763
+ "transformer.h.75.mlp.dense_4h_to_h.g_idx": "model-00010-of-00010.safetensors",
1764
+ "transformer.h.75.mlp.dense_4h_to_h.qweight": "model-00010-of-00010.safetensors",
1765
+ "transformer.h.75.mlp.dense_4h_to_h.qzeros": "model-00010-of-00010.safetensors",
1766
+ "transformer.h.75.mlp.dense_4h_to_h.scales": "model-00010-of-00010.safetensors",
1767
+ "transformer.h.75.mlp.dense_h_to_4h.bias": "model-00010-of-00010.safetensors",
1768
+ "transformer.h.75.mlp.dense_h_to_4h.g_idx": "model-00010-of-00010.safetensors",
1769
+ "transformer.h.75.mlp.dense_h_to_4h.qweight": "model-00010-of-00010.safetensors",
1770
+ "transformer.h.75.mlp.dense_h_to_4h.qzeros": "model-00010-of-00010.safetensors",
1771
+ "transformer.h.75.mlp.dense_h_to_4h.scales": "model-00010-of-00010.safetensors",
1772
  "transformer.h.75.self_attention.dense.bias": "model-00009-of-00010.safetensors",
1773
  "transformer.h.75.self_attention.dense.g_idx": "model-00009-of-00010.safetensors",
1774
  "transformer.h.75.self_attention.dense.qweight": "model-00009-of-00010.safetensors",
 
1779
  "transformer.h.75.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
1780
  "transformer.h.75.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
1781
  "transformer.h.75.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
1782
+ "transformer.h.76.ln_attn.bias": "model-00010-of-00010.safetensors",
1783
+ "transformer.h.76.ln_attn.weight": "model-00010-of-00010.safetensors",
1784
+ "transformer.h.76.ln_mlp.bias": "model-00010-of-00010.safetensors",
1785
+ "transformer.h.76.ln_mlp.weight": "model-00010-of-00010.safetensors",
1786
+ "transformer.h.76.mlp.dense_4h_to_h.bias": "model-00010-of-00010.safetensors",
1787
+ "transformer.h.76.mlp.dense_4h_to_h.g_idx": "model-00010-of-00010.safetensors",
1788
+ "transformer.h.76.mlp.dense_4h_to_h.qweight": "model-00010-of-00010.safetensors",
1789
+ "transformer.h.76.mlp.dense_4h_to_h.qzeros": "model-00010-of-00010.safetensors",
1790
+ "transformer.h.76.mlp.dense_4h_to_h.scales": "model-00010-of-00010.safetensors",
1791
+ "transformer.h.76.mlp.dense_h_to_4h.bias": "model-00010-of-00010.safetensors",
1792
+ "transformer.h.76.mlp.dense_h_to_4h.g_idx": "model-00010-of-00010.safetensors",
1793
+ "transformer.h.76.mlp.dense_h_to_4h.qweight": "model-00010-of-00010.safetensors",
1794
+ "transformer.h.76.mlp.dense_h_to_4h.qzeros": "model-00010-of-00010.safetensors",
1795
+ "transformer.h.76.mlp.dense_h_to_4h.scales": "model-00010-of-00010.safetensors",
1796
+ "transformer.h.76.self_attention.dense.bias": "model-00010-of-00010.safetensors",
1797
+ "transformer.h.76.self_attention.dense.g_idx": "model-00010-of-00010.safetensors",
1798
+ "transformer.h.76.self_attention.dense.qweight": "model-00010-of-00010.safetensors",
1799
+ "transformer.h.76.self_attention.dense.qzeros": "model-00010-of-00010.safetensors",
1800
+ "transformer.h.76.self_attention.dense.scales": "model-00010-of-00010.safetensors",
1801
+ "transformer.h.76.self_attention.query_key_value.bias": "model-00010-of-00010.safetensors",
1802
+ "transformer.h.76.self_attention.query_key_value.g_idx": "model-00010-of-00010.safetensors",
1803
+ "transformer.h.76.self_attention.query_key_value.qweight": "model-00010-of-00010.safetensors",
1804
+ "transformer.h.76.self_attention.query_key_value.qzeros": "model-00010-of-00010.safetensors",
1805
+ "transformer.h.76.self_attention.query_key_value.scales": "model-00010-of-00010.safetensors",
1806
+ "transformer.h.77.ln_attn.bias": "model-00010-of-00010.safetensors",
1807
+ "transformer.h.77.ln_attn.weight": "model-00010-of-00010.safetensors",
1808
+ "transformer.h.77.ln_mlp.bias": "model-00010-of-00010.safetensors",
1809
+ "transformer.h.77.ln_mlp.weight": "model-00010-of-00010.safetensors",
1810
+ "transformer.h.77.mlp.dense_4h_to_h.bias": "model-00010-of-00010.safetensors",
1811
+ "transformer.h.77.mlp.dense_4h_to_h.g_idx": "model-00010-of-00010.safetensors",
1812
+ "transformer.h.77.mlp.dense_4h_to_h.qweight": "model-00010-of-00010.safetensors",
1813
+ "transformer.h.77.mlp.dense_4h_to_h.qzeros": "model-00010-of-00010.safetensors",
1814
+ "transformer.h.77.mlp.dense_4h_to_h.scales": "model-00010-of-00010.safetensors",
1815
+ "transformer.h.77.mlp.dense_h_to_4h.bias": "model-00010-of-00010.safetensors",
1816
+ "transformer.h.77.mlp.dense_h_to_4h.g_idx": "model-00010-of-00010.safetensors",
1817
+ "transformer.h.77.mlp.dense_h_to_4h.qweight": "model-00010-of-00010.safetensors",
1818
+ "transformer.h.77.mlp.dense_h_to_4h.qzeros": "model-00010-of-00010.safetensors",
1819
+ "transformer.h.77.mlp.dense_h_to_4h.scales": "model-00010-of-00010.safetensors",
1820
+ "transformer.h.77.self_attention.dense.bias": "model-00010-of-00010.safetensors",
1821
+ "transformer.h.77.self_attention.dense.g_idx": "model-00010-of-00010.safetensors",
1822
+ "transformer.h.77.self_attention.dense.qweight": "model-00010-of-00010.safetensors",
1823
+ "transformer.h.77.self_attention.dense.qzeros": "model-00010-of-00010.safetensors",
1824
+ "transformer.h.77.self_attention.dense.scales": "model-00010-of-00010.safetensors",
1825
+ "transformer.h.77.self_attention.query_key_value.bias": "model-00010-of-00010.safetensors",
1826
+ "transformer.h.77.self_attention.query_key_value.g_idx": "model-00010-of-00010.safetensors",
1827
+ "transformer.h.77.self_attention.query_key_value.qweight": "model-00010-of-00010.safetensors",
1828
+ "transformer.h.77.self_attention.query_key_value.qzeros": "model-00010-of-00010.safetensors",
1829
+ "transformer.h.77.self_attention.query_key_value.scales": "model-00010-of-00010.safetensors",
1830
+ "transformer.h.78.ln_attn.bias": "model-00010-of-00010.safetensors",
1831
+ "transformer.h.78.ln_attn.weight": "model-00010-of-00010.safetensors",
1832
+ "transformer.h.78.ln_mlp.bias": "model-00010-of-00010.safetensors",
1833
+ "transformer.h.78.ln_mlp.weight": "model-00010-of-00010.safetensors",
1834
+ "transformer.h.78.mlp.dense_4h_to_h.bias": "model-00010-of-00010.safetensors",
1835
+ "transformer.h.78.mlp.dense_4h_to_h.g_idx": "model-00010-of-00010.safetensors",
1836
+ "transformer.h.78.mlp.dense_4h_to_h.qweight": "model-00010-of-00010.safetensors",
1837
+ "transformer.h.78.mlp.dense_4h_to_h.qzeros": "model-00010-of-00010.safetensors",
1838
+ "transformer.h.78.mlp.dense_4h_to_h.scales": "model-00010-of-00010.safetensors",
1839
+ "transformer.h.78.mlp.dense_h_to_4h.bias": "model-00010-of-00010.safetensors",
1840
+ "transformer.h.78.mlp.dense_h_to_4h.g_idx": "model-00010-of-00010.safetensors",
1841
+ "transformer.h.78.mlp.dense_h_to_4h.qweight": "model-00010-of-00010.safetensors",
1842
+ "transformer.h.78.mlp.dense_h_to_4h.qzeros": "model-00010-of-00010.safetensors",
1843
+ "transformer.h.78.mlp.dense_h_to_4h.scales": "model-00010-of-00010.safetensors",
1844
+ "transformer.h.78.self_attention.dense.bias": "model-00010-of-00010.safetensors",
1845
+ "transformer.h.78.self_attention.dense.g_idx": "model-00010-of-00010.safetensors",
1846
+ "transformer.h.78.self_attention.dense.qweight": "model-00010-of-00010.safetensors",
1847
+ "transformer.h.78.self_attention.dense.qzeros": "model-00010-of-00010.safetensors",
1848
+ "transformer.h.78.self_attention.dense.scales": "model-00010-of-00010.safetensors",
1849
+ "transformer.h.78.self_attention.query_key_value.bias": "model-00010-of-00010.safetensors",
1850
+ "transformer.h.78.self_attention.query_key_value.g_idx": "model-00010-of-00010.safetensors",
1851
+ "transformer.h.78.self_attention.query_key_value.qweight": "model-00010-of-00010.safetensors",
1852
+ "transformer.h.78.self_attention.query_key_value.qzeros": "model-00010-of-00010.safetensors",
1853
+ "transformer.h.78.self_attention.query_key_value.scales": "model-00010-of-00010.safetensors",
1854
  "transformer.h.79.ln_attn.bias": "model-00010-of-00010.safetensors",
1855
  "transformer.h.79.ln_attn.weight": "model-00010-of-00010.safetensors",
1856
  "transformer.h.79.ln_mlp.bias": "model-00010-of-00010.safetensors",
 
1865
  "transformer.h.79.mlp.dense_h_to_4h.qweight": "model-00010-of-00010.safetensors",
1866
  "transformer.h.79.mlp.dense_h_to_4h.qzeros": "model-00010-of-00010.safetensors",
1867
  "transformer.h.79.mlp.dense_h_to_4h.scales": "model-00010-of-00010.safetensors",
1868
+ "transformer.h.79.self_attention.dense.bias": "model-00010-of-00010.safetensors",
1869
+ "transformer.h.79.self_attention.dense.g_idx": "model-00010-of-00010.safetensors",
1870
+ "transformer.h.79.self_attention.dense.qweight": "model-00010-of-00010.safetensors",
1871
+ "transformer.h.79.self_attention.dense.qzeros": "model-00010-of-00010.safetensors",
1872
+ "transformer.h.79.self_attention.dense.scales": "model-00010-of-00010.safetensors",
1873
+ "transformer.h.79.self_attention.query_key_value.bias": "model-00010-of-00010.safetensors",
1874
+ "transformer.h.79.self_attention.query_key_value.g_idx": "model-00010-of-00010.safetensors",
1875
+ "transformer.h.79.self_attention.query_key_value.qweight": "model-00010-of-00010.safetensors",
1876
+ "transformer.h.79.self_attention.query_key_value.qzeros": "model-00010-of-00010.safetensors",
1877
+ "transformer.h.79.self_attention.query_key_value.scales": "model-00010-of-00010.safetensors",
1878
  "transformer.h.8.ln_attn.bias": "model-00002-of-00010.safetensors",
1879
  "transformer.h.8.ln_attn.weight": "model-00002-of-00010.safetensors",
1880
  "transformer.h.8.ln_mlp.bias": "model-00002-of-00010.safetensors",
quantize_config.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "bits": 4,
3
- "group_size": -1,
4
  "damp_percent": 0.1,
5
  "desc_act": true,
6
  "sym": true,
7
  "true_sequential": true,
8
  "model_name_or_path": null,
9
  "model_file_base_name": "model"
10
- }
 
1
  {
2
  "bits": 4,
3
+ "group_size": 128,
4
  "damp_percent": 0.1,
5
  "desc_act": true,
6
  "sym": true,
7
  "true_sequential": true,
8
  "model_name_or_path": null,
9
  "model_file_base_name": "model"
10
+ }