NicoNico6 commited on
Commit
fb28ab8
1 Parent(s): e3da9cc
Files changed (2) hide show
  1. model.safetensors +2 -2
  2. quant_strategy.json +214 -214
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d5eaab4c2f3b7c01a1cb63ac17ebc6e46451ec8c02cda8883bb336efd9ea977
3
- size 3358077296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:272dd7c6f22b5f01672cd8a4c062232f9ed95df64a49c7fbced931540cd30f66
3
+ size 3358077336
quant_strategy.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "measurement": {
3
  "model.layers.0": {
4
- "accuracy": 0.9288430213928223,
5
- "total_bits": 365124672,
6
  "q_proj": {
7
  "group_size": {
8
  "2": 64
@@ -17,10 +17,10 @@
17
  },
18
  "k_proj": {
19
  "group_size": {
20
- "4": 128
21
  },
22
  "bits": [
23
- 4
24
  ],
25
  "bits_prop": [
26
  1
@@ -89,8 +89,8 @@
89
  }
90
  },
91
  "model.layers.1": {
92
- "accuracy": 0.9289391040802002,
93
- "total_bits": 449761824,
94
  "q_proj": {
95
  "group_size": {
96
  "2": 64
@@ -105,10 +105,10 @@
105
  },
106
  "k_proj": {
107
  "group_size": {
108
- "2": 64
109
  },
110
  "bits": [
111
- 2
112
  ],
113
  "bits_prop": [
114
  1
@@ -129,10 +129,10 @@
129
  },
130
  "o_proj": {
131
  "group_size": {
132
- "2": 64
133
  },
134
  "bits": [
135
- 2
136
  ],
137
  "bits_prop": [
138
  1
@@ -177,14 +177,14 @@
177
  }
178
  },
179
  "model.layers.2": {
180
- "accuracy": 0.936192512512207,
181
- "total_bits": 519946272,
182
  "q_proj": {
183
  "group_size": {
184
- "4": 128
185
  },
186
  "bits": [
187
- 4
188
  ],
189
  "bits_prop": [
190
  1
@@ -193,10 +193,10 @@
193
  },
194
  "k_proj": {
195
  "group_size": {
196
- "4": 128
197
  },
198
  "bits": [
199
- 4
200
  ],
201
  "bits_prop": [
202
  1
@@ -217,10 +217,10 @@
217
  },
218
  "o_proj": {
219
  "group_size": {
220
- "4": 128
221
  },
222
  "bits": [
223
- 4
224
  ],
225
  "bits_prop": [
226
  1
@@ -253,10 +253,10 @@
253
  },
254
  "down_proj": {
255
  "group_size": {
256
- "4": 128
257
  },
258
  "bits": [
259
- 4
260
  ],
261
  "bits_prop": [
262
  1
@@ -265,14 +265,14 @@
265
  }
266
  },
267
  "model.layers.3": {
268
- "accuracy": 0.9241403341293335,
269
- "total_bits": 515819040,
270
  "q_proj": {
271
  "group_size": {
272
- "4": 128
273
  },
274
  "bits": [
275
- 4
276
  ],
277
  "bits_prop": [
278
  1
@@ -305,10 +305,10 @@
305
  },
306
  "o_proj": {
307
  "group_size": {
308
- "4": 128
309
  },
310
  "bits": [
311
- 4
312
  ],
313
  "bits_prop": [
314
  1
@@ -353,14 +353,14 @@
353
  }
354
  },
355
  "model.layers.4": {
356
- "accuracy": 0.924299955368042,
357
- "total_bits": 519946272,
358
  "q_proj": {
359
  "group_size": {
360
- "4": 128
361
  },
362
  "bits": [
363
- 4
364
  ],
365
  "bits_prop": [
366
  1
@@ -369,10 +369,10 @@
369
  },
370
  "k_proj": {
371
  "group_size": {
372
- "4": 128
373
  },
374
  "bits": [
375
- 4
376
  ],
377
  "bits_prop": [
378
  1
@@ -429,10 +429,10 @@
429
  },
430
  "down_proj": {
431
  "group_size": {
432
- "4": 128
433
  },
434
  "bits": [
435
- 4
436
  ],
437
  "bits_prop": [
438
  1
@@ -441,14 +441,14 @@
441
  }
442
  },
443
  "model.layers.5": {
444
- "accuracy": 0.9709324836730957,
445
- "total_bits": 697480224,
446
  "q_proj": {
447
  "group_size": {
448
- "4": 128
449
  },
450
  "bits": [
451
- 4
452
  ],
453
  "bits_prop": [
454
  1
@@ -481,10 +481,10 @@
481
  },
482
  "o_proj": {
483
  "group_size": {
484
- "4": 128
485
  },
486
  "bits": [
487
- 4
488
  ],
489
  "bits_prop": [
490
  1
@@ -493,10 +493,10 @@
493
  },
494
  "up_proj": {
495
  "group_size": {
496
- "4": 128
497
  },
498
  "bits": [
499
- 4
500
  ],
501
  "bits_prop": [
502
  1
@@ -505,10 +505,10 @@
505
  },
506
  "gate_proj": {
507
  "group_size": {
508
- "4": 128
509
  },
510
  "bits": [
511
- 4
512
  ],
513
  "bits_prop": [
514
  1
@@ -517,10 +517,10 @@
517
  },
518
  "down_proj": {
519
  "group_size": {
520
- "4": 128
521
  },
522
  "bits": [
523
- 4
524
  ],
525
  "bits_prop": [
526
  1
@@ -529,14 +529,14 @@
529
  }
530
  },
531
  "model.layers.6": {
532
- "accuracy": 0.9690049886703491,
533
- "total_bits": 697480224,
534
  "q_proj": {
535
  "group_size": {
536
- "4": 128
537
  },
538
  "bits": [
539
- 4
540
  ],
541
  "bits_prop": [
542
  1
@@ -545,10 +545,10 @@
545
  },
546
  "k_proj": {
547
  "group_size": {
548
- "4": 128
549
  },
550
  "bits": [
551
- 4
552
  ],
553
  "bits_prop": [
554
  1
@@ -569,10 +569,10 @@
569
  },
570
  "o_proj": {
571
  "group_size": {
572
- "4": 128
573
  },
574
  "bits": [
575
- 4
576
  ],
577
  "bits_prop": [
578
  1
@@ -581,10 +581,10 @@
581
  },
582
  "up_proj": {
583
  "group_size": {
584
- "4": 128
585
  },
586
  "bits": [
587
- 4
588
  ],
589
  "bits_prop": [
590
  1
@@ -593,10 +593,10 @@
593
  },
594
  "gate_proj": {
595
  "group_size": {
596
- "4": 128
597
  },
598
  "bits": [
599
- 4
600
  ],
601
  "bits_prop": [
602
  1
@@ -617,14 +617,14 @@
617
  }
618
  },
619
  "model.layers.7": {
620
- "accuracy": 0.9691200256347656,
621
- "total_bits": 697480224,
622
  "q_proj": {
623
  "group_size": {
624
- "4": 128
625
  },
626
  "bits": [
627
- 4
628
  ],
629
  "bits_prop": [
630
  1
@@ -633,10 +633,10 @@
633
  },
634
  "k_proj": {
635
  "group_size": {
636
- "4": 128
637
  },
638
  "bits": [
639
- 4
640
  ],
641
  "bits_prop": [
642
  1
@@ -669,10 +669,10 @@
669
  },
670
  "up_proj": {
671
  "group_size": {
672
- "4": 128
673
  },
674
  "bits": [
675
- 4
676
  ],
677
  "bits_prop": [
678
  1
@@ -681,10 +681,10 @@
681
  },
682
  "gate_proj": {
683
  "group_size": {
684
- "4": 128
685
  },
686
  "bits": [
687
- 4
688
  ],
689
  "bits_prop": [
690
  1
@@ -705,8 +705,8 @@
705
  }
706
  },
707
  "model.layers.8": {
708
- "accuracy": 0.9361705780029297,
709
- "total_bits": 486917664,
710
  "q_proj": {
711
  "group_size": {
712
  "2": 64
@@ -721,10 +721,10 @@
721
  },
722
  "k_proj": {
723
  "group_size": {
724
- "4": 128
725
  },
726
  "bits": [
727
- 4
728
  ],
729
  "bits_prop": [
730
  1
@@ -745,10 +745,10 @@
745
  },
746
  "o_proj": {
747
  "group_size": {
748
- "4": 128
749
  },
750
  "bits": [
751
- 4
752
  ],
753
  "bits_prop": [
754
  1
@@ -793,14 +793,14 @@
793
  }
794
  },
795
  "model.layers.9": {
796
- "accuracy": 0.959143877029419,
797
- "total_bits": 394026048,
798
  "q_proj": {
799
  "group_size": {
800
- "4": 128
801
  },
802
  "bits": [
803
- 4
804
  ],
805
  "bits_prop": [
806
  1
@@ -809,10 +809,10 @@
809
  },
810
  "k_proj": {
811
  "group_size": {
812
- "2": 64
813
  },
814
  "bits": [
815
- 2
816
  ],
817
  "bits_prop": [
818
  1
@@ -881,8 +881,8 @@
881
  }
882
  },
883
  "model.layers.10": {
884
- "accuracy": 0.9522266387939453,
885
- "total_bits": 394026048,
886
  "q_proj": {
887
  "group_size": {
888
  "2": 64
@@ -921,10 +921,10 @@
921
  },
922
  "o_proj": {
923
  "group_size": {
924
- "4": 128
925
  },
926
  "bits": [
927
- 4
928
  ],
929
  "bits_prop": [
930
  1
@@ -969,14 +969,14 @@
969
  }
970
  },
971
  "model.layers.11": {
972
- "accuracy": 0.9406924247741699,
973
- "total_bits": 394026048,
974
  "q_proj": {
975
  "group_size": {
976
- "4": 128
977
  },
978
  "bits": [
979
- 4
980
  ],
981
  "bits_prop": [
982
  1
@@ -1057,8 +1057,8 @@
1057
  }
1058
  },
1059
  "model.layers.12": {
1060
- "accuracy": 0.9434242248535156,
1061
- "total_bits": 394026048,
1062
  "q_proj": {
1063
  "group_size": {
1064
  "2": 64
@@ -1073,10 +1073,10 @@
1073
  },
1074
  "k_proj": {
1075
  "group_size": {
1076
- "2": 64
1077
  },
1078
  "bits": [
1079
- 2
1080
  ],
1081
  "bits_prop": [
1082
  1
@@ -1097,10 +1097,10 @@
1097
  },
1098
  "o_proj": {
1099
  "group_size": {
1100
- "4": 128
1101
  },
1102
  "bits": [
1103
- 4
1104
  ],
1105
  "bits_prop": [
1106
  1
@@ -1145,8 +1145,8 @@
1145
  }
1146
  },
1147
  "model.layers.13": {
1148
- "accuracy": 0.9376596212387085,
1149
- "total_bits": 394026048,
1150
  "q_proj": {
1151
  "group_size": {
1152
  "2": 64
@@ -1161,10 +1161,10 @@
1161
  },
1162
  "k_proj": {
1163
  "group_size": {
1164
- "2": 64
1165
  },
1166
  "bits": [
1167
- 2
1168
  ],
1169
  "bits_prop": [
1170
  1
@@ -1185,10 +1185,10 @@
1185
  },
1186
  "o_proj": {
1187
  "group_size": {
1188
- "4": 128
1189
  },
1190
  "bits": [
1191
- 4
1192
  ],
1193
  "bits_prop": [
1194
  1
@@ -1233,8 +1233,8 @@
1233
  }
1234
  },
1235
  "model.layers.14": {
1236
- "accuracy": 0.9380558729171753,
1237
- "total_bits": 394026048,
1238
  "q_proj": {
1239
  "group_size": {
1240
  "2": 64
@@ -1249,10 +1249,10 @@
1249
  },
1250
  "k_proj": {
1251
  "group_size": {
1252
- "2": 64
1253
  },
1254
  "bits": [
1255
- 2
1256
  ],
1257
  "bits_prop": [
1258
  1
@@ -1273,10 +1273,10 @@
1273
  },
1274
  "o_proj": {
1275
  "group_size": {
1276
- "4": 128
1277
  },
1278
  "bits": [
1279
- 4
1280
  ],
1281
  "bits_prop": [
1282
  1
@@ -1321,8 +1321,8 @@
1321
  }
1322
  },
1323
  "model.layers.15": {
1324
- "accuracy": 0.9555038213729858,
1325
- "total_bits": 486917664,
1326
  "q_proj": {
1327
  "group_size": {
1328
  "2": 64
@@ -1361,10 +1361,10 @@
1361
  },
1362
  "o_proj": {
1363
  "group_size": {
1364
- "4": 128
1365
  },
1366
  "bits": [
1367
- 4
1368
  ],
1369
  "bits_prop": [
1370
  1
@@ -1397,10 +1397,10 @@
1397
  },
1398
  "down_proj": {
1399
  "group_size": {
1400
- "4": 128
1401
  },
1402
  "bits": [
1403
- 4
1404
  ],
1405
  "bits_prop": [
1406
  1
@@ -1409,8 +1409,8 @@
1409
  }
1410
  },
1411
  "model.layers.16": {
1412
- "accuracy": 0.9523999691009521,
1413
- "total_bits": 486917664,
1414
  "q_proj": {
1415
  "group_size": {
1416
  "2": 64
@@ -1425,10 +1425,10 @@
1425
  },
1426
  "k_proj": {
1427
  "group_size": {
1428
- "4": 128
1429
  },
1430
  "bits": [
1431
- 4
1432
  ],
1433
  "bits_prop": [
1434
  1
@@ -1449,10 +1449,10 @@
1449
  },
1450
  "o_proj": {
1451
  "group_size": {
1452
- "4": 128
1453
  },
1454
  "bits": [
1455
- 4
1456
  ],
1457
  "bits_prop": [
1458
  1
@@ -1497,14 +1497,14 @@
1497
  }
1498
  },
1499
  "model.layers.17": {
1500
- "accuracy": 0.9459496736526489,
1501
- "total_bits": 515819040,
1502
  "q_proj": {
1503
  "group_size": {
1504
- "4": 128
1505
  },
1506
  "bits": [
1507
- 4
1508
  ],
1509
  "bits_prop": [
1510
  1
@@ -1513,10 +1513,10 @@
1513
  },
1514
  "k_proj": {
1515
  "group_size": {
1516
- "2": 64
1517
  },
1518
  "bits": [
1519
- 2
1520
  ],
1521
  "bits_prop": [
1522
  1
@@ -1585,8 +1585,8 @@
1585
  }
1586
  },
1587
  "model.layers.18": {
1588
- "accuracy": 0.9424376487731934,
1589
- "total_bits": 515819040,
1590
  "q_proj": {
1591
  "group_size": {
1592
  "4": 128
@@ -1601,10 +1601,10 @@
1601
  },
1602
  "k_proj": {
1603
  "group_size": {
1604
- "2": 64
1605
  },
1606
  "bits": [
1607
- 2
1608
  ],
1609
  "bits_prop": [
1610
  1
@@ -1673,14 +1673,14 @@
1673
  }
1674
  },
1675
  "model.layers.19": {
1676
- "accuracy": 0.9421288967132568,
1677
- "total_bits": 515819040,
1678
  "q_proj": {
1679
  "group_size": {
1680
- "4": 128
1681
  },
1682
  "bits": [
1683
- 4
1684
  ],
1685
  "bits_prop": [
1686
  1
@@ -1725,10 +1725,10 @@
1725
  },
1726
  "up_proj": {
1727
  "group_size": {
1728
- "2": 64
1729
  },
1730
  "bits": [
1731
- 2
1732
  ],
1733
  "bits_prop": [
1734
  1
@@ -1761,8 +1761,8 @@
1761
  }
1762
  },
1763
  "model.layers.20": {
1764
- "accuracy": 0.9422779083251953,
1765
- "total_bits": 519946272,
1766
  "q_proj": {
1767
  "group_size": {
1768
  "4": 128
@@ -1777,10 +1777,10 @@
1777
  },
1778
  "k_proj": {
1779
  "group_size": {
1780
- "4": 128
1781
  },
1782
  "bits": [
1783
- 4
1784
  ],
1785
  "bits_prop": [
1786
  1
@@ -1813,10 +1813,10 @@
1813
  },
1814
  "up_proj": {
1815
  "group_size": {
1816
- "2": 64
1817
  },
1818
  "bits": [
1819
- 2
1820
  ],
1821
  "bits_prop": [
1822
  1
@@ -1849,14 +1849,14 @@
1849
  }
1850
  },
1851
  "model.layers.21": {
1852
- "accuracy": 0.9373316764831543,
1853
- "total_bits": 519946272,
1854
  "q_proj": {
1855
  "group_size": {
1856
- "4": 128
1857
  },
1858
  "bits": [
1859
- 4
1860
  ],
1861
  "bits_prop": [
1862
  1
@@ -1901,10 +1901,10 @@
1901
  },
1902
  "up_proj": {
1903
  "group_size": {
1904
- "2": 64
1905
  },
1906
  "bits": [
1907
- 2
1908
  ],
1909
  "bits_prop": [
1910
  1
@@ -1913,10 +1913,10 @@
1913
  },
1914
  "gate_proj": {
1915
  "group_size": {
1916
- "2": 64
1917
  },
1918
  "bits": [
1919
- 2
1920
  ],
1921
  "bits_prop": [
1922
  1
@@ -1937,8 +1937,8 @@
1937
  }
1938
  },
1939
  "model.layers.22": {
1940
- "accuracy": 0.9449453353881836,
1941
- "total_bits": 604586016,
1942
  "q_proj": {
1943
  "group_size": {
1944
  "4": 128
@@ -1953,10 +1953,10 @@
1953
  },
1954
  "k_proj": {
1955
  "group_size": {
1956
- "2": 64
1957
  },
1958
  "bits": [
1959
- 2
1960
  ],
1961
  "bits_prop": [
1962
  1
@@ -2001,10 +2001,10 @@
2001
  },
2002
  "gate_proj": {
2003
  "group_size": {
2004
- "2": 64
2005
  },
2006
  "bits": [
2007
- 2
2008
  ],
2009
  "bits_prop": [
2010
  1
@@ -2025,8 +2025,8 @@
2025
  }
2026
  },
2027
  "model.layers.23": {
2028
- "accuracy": 0.9457166194915771,
2029
- "total_bits": 604586016,
2030
  "q_proj": {
2031
  "group_size": {
2032
  "4": 128
@@ -2041,10 +2041,10 @@
2041
  },
2042
  "k_proj": {
2043
  "group_size": {
2044
- "2": 64
2045
  },
2046
  "bits": [
2047
- 2
2048
  ],
2049
  "bits_prop": [
2050
  1
@@ -2089,10 +2089,10 @@
2089
  },
2090
  "gate_proj": {
2091
  "group_size": {
2092
- "2": 64
2093
  },
2094
  "bits": [
2095
- 2
2096
  ],
2097
  "bits_prop": [
2098
  1
@@ -2113,8 +2113,8 @@
2113
  }
2114
  },
2115
  "model.layers.24": {
2116
- "accuracy": 0.9458975791931152,
2117
- "total_bits": 604586016,
2118
  "q_proj": {
2119
  "group_size": {
2120
  "4": 128
@@ -2129,10 +2129,10 @@
2129
  },
2130
  "k_proj": {
2131
  "group_size": {
2132
- "2": 64
2133
  },
2134
  "bits": [
2135
- 2
2136
  ],
2137
  "bits_prop": [
2138
  1
@@ -2177,10 +2177,10 @@
2177
  },
2178
  "gate_proj": {
2179
  "group_size": {
2180
- "2": 64
2181
  },
2182
  "bits": [
2183
- 2
2184
  ],
2185
  "bits_prop": [
2186
  1
@@ -2201,8 +2201,8 @@
2201
  }
2202
  },
2203
  "model.layers.25": {
2204
- "accuracy": 0.9496898651123047,
2205
- "total_bits": 604586016,
2206
  "q_proj": {
2207
  "group_size": {
2208
  "4": 128
@@ -2217,10 +2217,10 @@
2217
  },
2218
  "k_proj": {
2219
  "group_size": {
2220
- "2": 64
2221
  },
2222
  "bits": [
2223
- 2
2224
  ],
2225
  "bits_prop": [
2226
  1
@@ -2265,10 +2265,10 @@
2265
  },
2266
  "gate_proj": {
2267
  "group_size": {
2268
- "2": 64
2269
  },
2270
  "bits": [
2271
- 2
2272
  ],
2273
  "bits_prop": [
2274
  1
@@ -2289,14 +2289,14 @@
2289
  }
2290
  },
2291
  "model.layers.26": {
2292
- "accuracy": 0.9475545883178711,
2293
- "total_bits": 571557408,
2294
  "q_proj": {
2295
  "group_size": {
2296
- "2": 64
2297
  },
2298
  "bits": [
2299
- 2
2300
  ],
2301
  "bits_prop": [
2302
  1
@@ -2305,10 +2305,10 @@
2305
  },
2306
  "k_proj": {
2307
  "group_size": {
2308
- "2": 64
2309
  },
2310
  "bits": [
2311
- 2
2312
  ],
2313
  "bits_prop": [
2314
  1
@@ -2341,10 +2341,10 @@
2341
  },
2342
  "up_proj": {
2343
  "group_size": {
2344
- "2": 64
2345
  },
2346
  "bits": [
2347
- 2
2348
  ],
2349
  "bits_prop": [
2350
  1
@@ -2377,14 +2377,14 @@
2377
  }
2378
  },
2379
  "model.layers.27": {
2380
- "accuracy": 0.9472908973693848,
2381
- "total_bits": 571557408,
2382
  "q_proj": {
2383
  "group_size": {
2384
- "2": 64
2385
  },
2386
  "bits": [
2387
- 2
2388
  ],
2389
  "bits_prop": [
2390
  1
@@ -2393,10 +2393,10 @@
2393
  },
2394
  "k_proj": {
2395
  "group_size": {
2396
- "2": 64
2397
  },
2398
  "bits": [
2399
- 2
2400
  ],
2401
  "bits_prop": [
2402
  1
@@ -2441,10 +2441,10 @@
2441
  },
2442
  "gate_proj": {
2443
  "group_size": {
2444
- "2": 64
2445
  },
2446
  "bits": [
2447
- 2
2448
  ],
2449
  "bits_prop": [
2450
  1
@@ -2465,8 +2465,8 @@
2465
  }
2466
  },
2467
  "model.layers.28": {
2468
- "accuracy": 0.9392316341400146,
2469
- "total_bits": 519946272,
2470
  "q_proj": {
2471
  "group_size": {
2472
  "4": 128
@@ -2517,10 +2517,10 @@
2517
  },
2518
  "up_proj": {
2519
  "group_size": {
2520
- "2": 64
2521
  },
2522
  "bits": [
2523
- 2
2524
  ],
2525
  "bits_prop": [
2526
  1
@@ -2529,10 +2529,10 @@
2529
  },
2530
  "gate_proj": {
2531
  "group_size": {
2532
- "2": 64
2533
  },
2534
  "bits": [
2535
- 2
2536
  ],
2537
  "bits_prop": [
2538
  1
@@ -2553,14 +2553,14 @@
2553
  }
2554
  },
2555
  "model.layers.29": {
2556
- "accuracy": 0.9373917579650879,
2557
- "total_bits": 482790432,
2558
  "q_proj": {
2559
  "group_size": {
2560
- "2": 64
2561
  },
2562
  "bits": [
2563
- 2
2564
  ],
2565
  "bits_prop": [
2566
  1
@@ -2569,10 +2569,10 @@
2569
  },
2570
  "k_proj": {
2571
  "group_size": {
2572
- "2": 64
2573
  },
2574
  "bits": [
2575
- 2
2576
  ],
2577
  "bits_prop": [
2578
  1
@@ -2605,10 +2605,10 @@
2605
  },
2606
  "up_proj": {
2607
  "group_size": {
2608
- "2": 64
2609
  },
2610
  "bits": [
2611
- 2
2612
  ],
2613
  "bits_prop": [
2614
  1
@@ -2617,10 +2617,10 @@
2617
  },
2618
  "gate_proj": {
2619
  "group_size": {
2620
- "2": 64
2621
  },
2622
  "bits": [
2623
- 2
2624
  ],
2625
  "bits_prop": [
2626
  1
@@ -2641,14 +2641,14 @@
2641
  }
2642
  },
2643
  "model.layers.30": {
2644
- "accuracy": 0.9403772354125977,
2645
- "total_bits": 482790432,
2646
  "q_proj": {
2647
  "group_size": {
2648
- "2": 64
2649
  },
2650
  "bits": [
2651
- 2
2652
  ],
2653
  "bits_prop": [
2654
  1
@@ -2657,10 +2657,10 @@
2657
  },
2658
  "k_proj": {
2659
  "group_size": {
2660
- "2": 64
2661
  },
2662
  "bits": [
2663
- 2
2664
  ],
2665
  "bits_prop": [
2666
  1
@@ -2693,10 +2693,10 @@
2693
  },
2694
  "up_proj": {
2695
  "group_size": {
2696
- "2": 64
2697
  },
2698
  "bits": [
2699
- 2
2700
  ],
2701
  "bits_prop": [
2702
  1
@@ -2705,10 +2705,10 @@
2705
  },
2706
  "gate_proj": {
2707
  "group_size": {
2708
- "2": 64
2709
  },
2710
  "bits": [
2711
- 2
2712
  ],
2713
  "bits_prop": [
2714
  1
 
1
  {
2
  "measurement": {
3
  "model.layers.0": {
4
+ "accuracy": 0.9242749214172363,
5
+ "total_bits": 360997440,
6
  "q_proj": {
7
  "group_size": {
8
  "2": 64
 
17
  },
18
  "k_proj": {
19
  "group_size": {
20
+ "2": 64
21
  },
22
  "bits": [
23
+ 2
24
  ],
25
  "bits_prop": [
26
  1
 
89
  }
90
  },
91
  "model.layers.1": {
92
+ "accuracy": 0.9216856956481934,
93
+ "total_bits": 486917664,
94
  "q_proj": {
95
  "group_size": {
96
  "2": 64
 
105
  },
106
  "k_proj": {
107
  "group_size": {
108
+ "4": 128
109
  },
110
  "bits": [
111
+ 4
112
  ],
113
  "bits_prop": [
114
  1
 
129
  },
130
  "o_proj": {
131
  "group_size": {
132
+ "4": 128
133
  },
134
  "bits": [
135
+ 4
136
  ],
137
  "bits_prop": [
138
  1
 
177
  }
178
  },
179
  "model.layers.2": {
180
+ "accuracy": 0.8546795845031738,
181
+ "total_bits": 360997440,
182
  "q_proj": {
183
  "group_size": {
184
+ "2": 64
185
  },
186
  "bits": [
187
+ 2
188
  ],
189
  "bits_prop": [
190
  1
 
193
  },
194
  "k_proj": {
195
  "group_size": {
196
+ "2": 64
197
  },
198
  "bits": [
199
+ 2
200
  ],
201
  "bits_prop": [
202
  1
 
217
  },
218
  "o_proj": {
219
  "group_size": {
220
+ "2": 64
221
  },
222
  "bits": [
223
+ 2
224
  ],
225
  "bits_prop": [
226
  1
 
253
  },
254
  "down_proj": {
255
  "group_size": {
256
+ "2": 64
257
  },
258
  "bits": [
259
+ 2
260
  ],
261
  "bits_prop": [
262
  1
 
265
  }
266
  },
267
  "model.layers.3": {
268
+ "accuracy": 0.9084997177124023,
269
+ "total_bits": 449761824,
270
  "q_proj": {
271
  "group_size": {
272
+ "2": 64
273
  },
274
  "bits": [
275
+ 2
276
  ],
277
  "bits_prop": [
278
  1
 
305
  },
306
  "o_proj": {
307
  "group_size": {
308
+ "2": 64
309
  },
310
  "bits": [
311
+ 2
312
  ],
313
  "bits_prop": [
314
  1
 
353
  }
354
  },
355
  "model.layers.4": {
356
+ "accuracy": 0.8643641471862793,
357
+ "total_bits": 394026048,
358
  "q_proj": {
359
  "group_size": {
360
+ "2": 64
361
  },
362
  "bits": [
363
+ 2
364
  ],
365
  "bits_prop": [
366
  1
 
369
  },
370
  "k_proj": {
371
  "group_size": {
372
+ "2": 64
373
  },
374
  "bits": [
375
+ 2
376
  ],
377
  "bits_prop": [
378
  1
 
429
  },
430
  "down_proj": {
431
  "group_size": {
432
+ "2": 64
433
  },
434
  "bits": [
435
+ 2
436
  ],
437
  "bits_prop": [
438
  1
 
441
  }
442
  },
443
  "model.layers.5": {
444
+ "accuracy": 0.8657441139221191,
445
+ "total_bits": 365124672,
446
  "q_proj": {
447
  "group_size": {
448
+ "2": 64
449
  },
450
  "bits": [
451
+ 2
452
  ],
453
  "bits_prop": [
454
  1
 
481
  },
482
  "o_proj": {
483
  "group_size": {
484
+ "2": 64
485
  },
486
  "bits": [
487
+ 2
488
  ],
489
  "bits_prop": [
490
  1
 
493
  },
494
  "up_proj": {
495
  "group_size": {
496
+ "2": 64
497
  },
498
  "bits": [
499
+ 2
500
  ],
501
  "bits_prop": [
502
  1
 
505
  },
506
  "gate_proj": {
507
  "group_size": {
508
+ "2": 64
509
  },
510
  "bits": [
511
+ 2
512
  ],
513
  "bits_prop": [
514
  1
 
517
  },
518
  "down_proj": {
519
  "group_size": {
520
+ "2": 64
521
  },
522
  "bits": [
523
+ 2
524
  ],
525
  "bits_prop": [
526
  1
 
529
  }
530
  },
531
  "model.layers.6": {
532
+ "accuracy": 0.877474308013916,
533
+ "total_bits": 449761824,
534
  "q_proj": {
535
  "group_size": {
536
+ "2": 64
537
  },
538
  "bits": [
539
+ 2
540
  ],
541
  "bits_prop": [
542
  1
 
545
  },
546
  "k_proj": {
547
  "group_size": {
548
+ "2": 64
549
  },
550
  "bits": [
551
+ 2
552
  ],
553
  "bits_prop": [
554
  1
 
569
  },
570
  "o_proj": {
571
  "group_size": {
572
+ "2": 64
573
  },
574
  "bits": [
575
+ 2
576
  ],
577
  "bits_prop": [
578
  1
 
581
  },
582
  "up_proj": {
583
  "group_size": {
584
+ "2": 64
585
  },
586
  "bits": [
587
+ 2
588
  ],
589
  "bits_prop": [
590
  1
 
593
  },
594
  "gate_proj": {
595
  "group_size": {
596
+ "2": 64
597
  },
598
  "bits": [
599
+ 2
600
  ],
601
  "bits_prop": [
602
  1
 
617
  }
618
  },
619
  "model.layers.7": {
620
+ "accuracy": 0.8887453079223633,
621
+ "total_bits": 482790432,
622
  "q_proj": {
623
  "group_size": {
624
+ "2": 64
625
  },
626
  "bits": [
627
+ 2
628
  ],
629
  "bits_prop": [
630
  1
 
633
  },
634
  "k_proj": {
635
  "group_size": {
636
+ "2": 64
637
  },
638
  "bits": [
639
+ 2
640
  ],
641
  "bits_prop": [
642
  1
 
669
  },
670
  "up_proj": {
671
  "group_size": {
672
+ "2": 64
673
  },
674
  "bits": [
675
+ 2
676
  ],
677
  "bits_prop": [
678
  1
 
681
  },
682
  "gate_proj": {
683
  "group_size": {
684
+ "2": 64
685
  },
686
  "bits": [
687
+ 2
688
  ],
689
  "bits_prop": [
690
  1
 
705
  }
706
  },
707
  "model.layers.8": {
708
+ "accuracy": 0.9228010177612305,
709
+ "total_bits": 449761824,
710
  "q_proj": {
711
  "group_size": {
712
  "2": 64
 
721
  },
722
  "k_proj": {
723
  "group_size": {
724
+ "2": 64
725
  },
726
  "bits": [
727
+ 2
728
  ],
729
  "bits_prop": [
730
  1
 
745
  },
746
  "o_proj": {
747
  "group_size": {
748
+ "2": 64
749
  },
750
  "bits": [
751
+ 2
752
  ],
753
  "bits_prop": [
754
  1
 
793
  }
794
  },
795
  "model.layers.9": {
796
+ "accuracy": 0.9577234387397766,
797
+ "total_bits": 365124672,
798
  "q_proj": {
799
  "group_size": {
800
+ "2": 64
801
  },
802
  "bits": [
803
+ 2
804
  ],
805
  "bits_prop": [
806
  1
 
809
  },
810
  "k_proj": {
811
  "group_size": {
812
+ "4": 128
813
  },
814
  "bits": [
815
+ 4
816
  ],
817
  "bits_prop": [
818
  1
 
881
  }
882
  },
883
  "model.layers.10": {
884
+ "accuracy": 0.9458887577056885,
885
+ "total_bits": 360997440,
886
  "q_proj": {
887
  "group_size": {
888
  "2": 64
 
921
  },
922
  "o_proj": {
923
  "group_size": {
924
+ "2": 64
925
  },
926
  "bits": [
927
+ 2
928
  ],
929
  "bits_prop": [
930
  1
 
969
  }
970
  },
971
  "model.layers.11": {
972
+ "accuracy": 0.9322950839996338,
973
+ "total_bits": 360997440,
974
  "q_proj": {
975
  "group_size": {
976
+ "2": 64
977
  },
978
  "bits": [
979
+ 2
980
  ],
981
  "bits_prop": [
982
  1
 
1057
  }
1058
  },
1059
  "model.layers.12": {
1060
+ "accuracy": 0.9404451847076416,
1061
+ "total_bits": 365124672,
1062
  "q_proj": {
1063
  "group_size": {
1064
  "2": 64
 
1073
  },
1074
  "k_proj": {
1075
  "group_size": {
1076
+ "4": 128
1077
  },
1078
  "bits": [
1079
+ 4
1080
  ],
1081
  "bits_prop": [
1082
  1
 
1097
  },
1098
  "o_proj": {
1099
  "group_size": {
1100
+ "2": 64
1101
  },
1102
  "bits": [
1103
+ 2
1104
  ],
1105
  "bits_prop": [
1106
  1
 
1145
  }
1146
  },
1147
  "model.layers.13": {
1148
+ "accuracy": 0.9363645315170288,
1149
+ "total_bits": 365124672,
1150
  "q_proj": {
1151
  "group_size": {
1152
  "2": 64
 
1161
  },
1162
  "k_proj": {
1163
  "group_size": {
1164
+ "4": 128
1165
  },
1166
  "bits": [
1167
+ 4
1168
  ],
1169
  "bits_prop": [
1170
  1
 
1185
  },
1186
  "o_proj": {
1187
  "group_size": {
1188
+ "2": 64
1189
  },
1190
  "bits": [
1191
+ 2
1192
  ],
1193
  "bits_prop": [
1194
  1
 
1233
  }
1234
  },
1235
  "model.layers.14": {
1236
+ "accuracy": 0.9359749555587769,
1237
+ "total_bits": 365124672,
1238
  "q_proj": {
1239
  "group_size": {
1240
  "2": 64
 
1249
  },
1250
  "k_proj": {
1251
  "group_size": {
1252
+ "4": 128
1253
  },
1254
  "bits": [
1255
+ 4
1256
  ],
1257
  "bits_prop": [
1258
  1
 
1273
  },
1274
  "o_proj": {
1275
  "group_size": {
1276
+ "2": 64
1277
  },
1278
  "bits": [
1279
+ 2
1280
  ],
1281
  "bits_prop": [
1282
  1
 
1321
  }
1322
  },
1323
  "model.layers.15": {
1324
+ "accuracy": 0.9322938919067383,
1325
+ "total_bits": 365124672,
1326
  "q_proj": {
1327
  "group_size": {
1328
  "2": 64
 
1361
  },
1362
  "o_proj": {
1363
  "group_size": {
1364
+ "2": 64
1365
  },
1366
  "bits": [
1367
+ 2
1368
  ],
1369
  "bits_prop": [
1370
  1
 
1397
  },
1398
  "down_proj": {
1399
  "group_size": {
1400
+ "2": 64
1401
  },
1402
  "bits": [
1403
+ 2
1404
  ],
1405
  "bits_prop": [
1406
  1
 
1409
  }
1410
  },
1411
  "model.layers.16": {
1412
+ "accuracy": 0.939303994178772,
1413
+ "total_bits": 449761824,
1414
  "q_proj": {
1415
  "group_size": {
1416
  "2": 64
 
1425
  },
1426
  "k_proj": {
1427
  "group_size": {
1428
+ "2": 64
1429
  },
1430
  "bits": [
1431
+ 2
1432
  ],
1433
  "bits_prop": [
1434
  1
 
1449
  },
1450
  "o_proj": {
1451
  "group_size": {
1452
+ "2": 64
1453
  },
1454
  "bits": [
1455
+ 2
1456
  ],
1457
  "bits_prop": [
1458
  1
 
1497
  }
1498
  },
1499
  "model.layers.17": {
1500
+ "accuracy": 0.9451323747634888,
1501
+ "total_bits": 486917664,
1502
  "q_proj": {
1503
  "group_size": {
1504
+ "2": 64
1505
  },
1506
  "bits": [
1507
+ 2
1508
  ],
1509
  "bits_prop": [
1510
  1
 
1513
  },
1514
  "k_proj": {
1515
  "group_size": {
1516
+ "4": 128
1517
  },
1518
  "bits": [
1519
+ 4
1520
  ],
1521
  "bits_prop": [
1522
  1
 
1585
  }
1586
  },
1587
  "model.layers.18": {
1588
+ "accuracy": 0.9493275880813599,
1589
+ "total_bits": 519946272,
1590
  "q_proj": {
1591
  "group_size": {
1592
  "4": 128
 
1601
  },
1602
  "k_proj": {
1603
  "group_size": {
1604
+ "4": 128
1605
  },
1606
  "bits": [
1607
+ 4
1608
  ],
1609
  "bits_prop": [
1610
  1
 
1673
  }
1674
  },
1675
  "model.layers.19": {
1676
+ "accuracy": 0.9514966011047363,
1677
+ "total_bits": 571557408,
1678
  "q_proj": {
1679
  "group_size": {
1680
+ "2": 64
1681
  },
1682
  "bits": [
1683
+ 2
1684
  ],
1685
  "bits_prop": [
1686
  1
 
1725
  },
1726
  "up_proj": {
1727
  "group_size": {
1728
+ "4": 128
1729
  },
1730
  "bits": [
1731
+ 4
1732
  ],
1733
  "bits_prop": [
1734
  1
 
1761
  }
1762
  },
1763
  "model.layers.20": {
1764
+ "accuracy": 0.955375075340271,
1765
+ "total_bits": 604586016,
1766
  "q_proj": {
1767
  "group_size": {
1768
  "4": 128
 
1777
  },
1778
  "k_proj": {
1779
  "group_size": {
1780
+ "2": 64
1781
  },
1782
  "bits": [
1783
+ 2
1784
  ],
1785
  "bits_prop": [
1786
  1
 
1813
  },
1814
  "up_proj": {
1815
  "group_size": {
1816
+ "4": 128
1817
  },
1818
  "bits": [
1819
+ 4
1820
  ],
1821
  "bits_prop": [
1822
  1
 
1849
  }
1850
  },
1851
  "model.layers.21": {
1852
+ "accuracy": 0.9731628894805908,
1853
+ "total_bits": 664451616,
1854
  "q_proj": {
1855
  "group_size": {
1856
+ "2": 64
1857
  },
1858
  "bits": [
1859
+ 2
1860
  ],
1861
  "bits_prop": [
1862
  1
 
1901
  },
1902
  "up_proj": {
1903
  "group_size": {
1904
+ "4": 128
1905
  },
1906
  "bits": [
1907
+ 4
1908
  ],
1909
  "bits_prop": [
1910
  1
 
1913
  },
1914
  "gate_proj": {
1915
  "group_size": {
1916
+ "4": 128
1917
  },
1918
  "bits": [
1919
+ 4
1920
  ],
1921
  "bits_prop": [
1922
  1
 
1937
  }
1938
  },
1939
  "model.layers.22": {
1940
+ "accuracy": 0.9785275459289551,
1941
+ "total_bits": 697480224,
1942
  "q_proj": {
1943
  "group_size": {
1944
  "4": 128
 
1953
  },
1954
  "k_proj": {
1955
  "group_size": {
1956
+ "4": 128
1957
  },
1958
  "bits": [
1959
+ 4
1960
  ],
1961
  "bits_prop": [
1962
  1
 
2001
  },
2002
  "gate_proj": {
2003
  "group_size": {
2004
+ "4": 128
2005
  },
2006
  "bits": [
2007
+ 4
2008
  ],
2009
  "bits_prop": [
2010
  1
 
2025
  }
2026
  },
2027
  "model.layers.23": {
2028
+ "accuracy": 0.9788622856140137,
2029
+ "total_bits": 697480224,
2030
  "q_proj": {
2031
  "group_size": {
2032
  "4": 128
 
2041
  },
2042
  "k_proj": {
2043
  "group_size": {
2044
+ "4": 128
2045
  },
2046
  "bits": [
2047
+ 4
2048
  ],
2049
  "bits_prop": [
2050
  1
 
2089
  },
2090
  "gate_proj": {
2091
  "group_size": {
2092
+ "4": 128
2093
  },
2094
  "bits": [
2095
+ 4
2096
  ],
2097
  "bits_prop": [
2098
  1
 
2113
  }
2114
  },
2115
  "model.layers.24": {
2116
+ "accuracy": 0.9794007539749146,
2117
+ "total_bits": 697480224,
2118
  "q_proj": {
2119
  "group_size": {
2120
  "4": 128
 
2129
  },
2130
  "k_proj": {
2131
  "group_size": {
2132
+ "4": 128
2133
  },
2134
  "bits": [
2135
+ 4
2136
  ],
2137
  "bits_prop": [
2138
  1
 
2177
  },
2178
  "gate_proj": {
2179
  "group_size": {
2180
+ "4": 128
2181
  },
2182
  "bits": [
2183
+ 4
2184
  ],
2185
  "bits_prop": [
2186
  1
 
2201
  }
2202
  },
2203
  "model.layers.25": {
2204
+ "accuracy": 0.9806145429611206,
2205
+ "total_bits": 697480224,
2206
  "q_proj": {
2207
  "group_size": {
2208
  "4": 128
 
2217
  },
2218
  "k_proj": {
2219
  "group_size": {
2220
+ "4": 128
2221
  },
2222
  "bits": [
2223
+ 4
2224
  ],
2225
  "bits_prop": [
2226
  1
 
2265
  },
2266
  "gate_proj": {
2267
  "group_size": {
2268
+ "4": 128
2269
  },
2270
  "bits": [
2271
+ 4
2272
  ],
2273
  "bits_prop": [
2274
  1
 
2289
  }
2290
  },
2291
  "model.layers.26": {
2292
+ "accuracy": 0.9806764125823975,
2293
+ "total_bits": 697480224,
2294
  "q_proj": {
2295
  "group_size": {
2296
+ "4": 128
2297
  },
2298
  "bits": [
2299
+ 4
2300
  ],
2301
  "bits_prop": [
2302
  1
 
2305
  },
2306
  "k_proj": {
2307
  "group_size": {
2308
+ "4": 128
2309
  },
2310
  "bits": [
2311
+ 4
2312
  ],
2313
  "bits_prop": [
2314
  1
 
2341
  },
2342
  "up_proj": {
2343
  "group_size": {
2344
+ "4": 128
2345
  },
2346
  "bits": [
2347
+ 4
2348
  ],
2349
  "bits_prop": [
2350
  1
 
2377
  }
2378
  },
2379
  "model.layers.27": {
2380
+ "accuracy": 0.9815640449523926,
2381
+ "total_bits": 697480224,
2382
  "q_proj": {
2383
  "group_size": {
2384
+ "4": 128
2385
  },
2386
  "bits": [
2387
+ 4
2388
  ],
2389
  "bits_prop": [
2390
  1
 
2393
  },
2394
  "k_proj": {
2395
  "group_size": {
2396
+ "4": 128
2397
  },
2398
  "bits": [
2399
+ 4
2400
  ],
2401
  "bits_prop": [
2402
  1
 
2441
  },
2442
  "gate_proj": {
2443
  "group_size": {
2444
+ "4": 128
2445
  },
2446
  "bits": [
2447
+ 4
2448
  ],
2449
  "bits_prop": [
2450
  1
 
2465
  }
2466
  },
2467
  "model.layers.28": {
2468
+ "accuracy": 0.9820178747177124,
2469
+ "total_bits": 697480224,
2470
  "q_proj": {
2471
  "group_size": {
2472
  "4": 128
 
2517
  },
2518
  "up_proj": {
2519
  "group_size": {
2520
+ "4": 128
2521
  },
2522
  "bits": [
2523
+ 4
2524
  ],
2525
  "bits_prop": [
2526
  1
 
2529
  },
2530
  "gate_proj": {
2531
  "group_size": {
2532
+ "4": 128
2533
  },
2534
  "bits": [
2535
+ 4
2536
  ],
2537
  "bits_prop": [
2538
  1
 
2553
  }
2554
  },
2555
  "model.layers.29": {
2556
+ "accuracy": 0.9836413264274597,
2557
+ "total_bits": 697480224,
2558
  "q_proj": {
2559
  "group_size": {
2560
+ "4": 128
2561
  },
2562
  "bits": [
2563
+ 4
2564
  ],
2565
  "bits_prop": [
2566
  1
 
2569
  },
2570
  "k_proj": {
2571
  "group_size": {
2572
+ "4": 128
2573
  },
2574
  "bits": [
2575
+ 4
2576
  ],
2577
  "bits_prop": [
2578
  1
 
2605
  },
2606
  "up_proj": {
2607
  "group_size": {
2608
+ "4": 128
2609
  },
2610
  "bits": [
2611
+ 4
2612
  ],
2613
  "bits_prop": [
2614
  1
 
2617
  },
2618
  "gate_proj": {
2619
  "group_size": {
2620
+ "4": 128
2621
  },
2622
  "bits": [
2623
+ 4
2624
  ],
2625
  "bits_prop": [
2626
  1
 
2641
  }
2642
  },
2643
  "model.layers.30": {
2644
+ "accuracy": 0.9838729500770569,
2645
+ "total_bits": 697480224,
2646
  "q_proj": {
2647
  "group_size": {
2648
+ "4": 128
2649
  },
2650
  "bits": [
2651
+ 4
2652
  ],
2653
  "bits_prop": [
2654
  1
 
2657
  },
2658
  "k_proj": {
2659
  "group_size": {
2660
+ "4": 128
2661
  },
2662
  "bits": [
2663
+ 4
2664
  ],
2665
  "bits_prop": [
2666
  1
 
2693
  },
2694
  "up_proj": {
2695
  "group_size": {
2696
+ "4": 128
2697
  },
2698
  "bits": [
2699
+ 4
2700
  ],
2701
  "bits_prop": [
2702
  1
 
2705
  },
2706
  "gate_proj": {
2707
  "group_size": {
2708
+ "4": 128
2709
  },
2710
  "bits": [
2711
+ 4
2712
  ],
2713
  "bits_prop": [
2714
  1