NicoNico6 commited on
Commit
8b40c52
1 Parent(s): b991fa3
Files changed (2) hide show
  1. model.safetensors +2 -2
  2. quant_strategy.json +325 -337
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d255c3bcb44feffe9b2e74fe209d41b3b5752e1841db26dad24a05c8e4cac718
3
- size 2843027248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f95f93237e56b66678970c36837ab144fa892ee7c8e34dd4d735fd940b762082
3
+ size 2847057704
quant_strategy.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "measurement": {
3
  "model.layers.0": {
4
- "accuracy": 0.8845478693644206,
5
- "total_bits": 456407296,
6
  "q_proj": {
7
  "group_size": {
8
  "4": 128,
@@ -13,8 +13,8 @@
13
  2
14
  ],
15
  "bits_prop": [
16
- 0.01,
17
- 0.99
18
  ],
19
  "scale_bits": 4
20
  },
@@ -28,8 +28,8 @@
28
  2
29
  ],
30
  "bits_prop": [
31
- 0.01,
32
- 0.99
33
  ],
34
  "scale_bits": 4
35
  },
@@ -43,8 +43,8 @@
43
  2
44
  ],
45
  "bits_prop": [
46
- 0.05,
47
- 0.95
48
  ],
49
  "scale_bits": 4
50
  },
@@ -58,8 +58,8 @@
58
  2
59
  ],
60
  "bits_prop": [
61
- 0.02,
62
- 0.98
63
  ],
64
  "scale_bits": 4
65
  },
@@ -73,8 +73,8 @@
73
  2
74
  ],
75
  "bits_prop": [
76
- 0.01,
77
- 0.99
78
  ],
79
  "scale_bits": 4
80
  },
@@ -88,8 +88,8 @@
88
  2
89
  ],
90
  "bits_prop": [
91
- 0.01,
92
- 0.99
93
  ],
94
  "scale_bits": 4
95
  },
@@ -103,15 +103,15 @@
103
  2
104
  ],
105
  "bits_prop": [
106
- 0.02,
107
- 0.98
108
  ],
109
  "scale_bits": 4
110
  }
111
  },
112
  "model.layers.1": {
113
- "accuracy": 0.8679930369059244,
114
- "total_bits": 456407296,
115
  "q_proj": {
116
  "group_size": {
117
  "4": 128,
@@ -122,8 +122,8 @@
122
  2
123
  ],
124
  "bits_prop": [
125
- 0.01,
126
- 0.99
127
  ],
128
  "scale_bits": 4
129
  },
@@ -137,8 +137,8 @@
137
  2
138
  ],
139
  "bits_prop": [
140
- 0.01,
141
- 0.99
142
  ],
143
  "scale_bits": 4
144
  },
@@ -152,8 +152,8 @@
152
  2
153
  ],
154
  "bits_prop": [
155
- 0.05,
156
- 0.95
157
  ],
158
  "scale_bits": 4
159
  },
@@ -182,8 +182,8 @@
182
  2
183
  ],
184
  "bits_prop": [
185
- 0.01,
186
- 0.99
187
  ],
188
  "scale_bits": 4
189
  },
@@ -197,8 +197,8 @@
197
  2
198
  ],
199
  "bits_prop": [
200
- 0.01,
201
- 0.99
202
  ],
203
  "scale_bits": 4
204
  },
@@ -212,14 +212,14 @@
212
  2
213
  ],
214
  "bits_prop": [
215
- 0.02,
216
- 0.98
217
  ],
218
  "scale_bits": 4
219
  }
220
  },
221
  "model.layers.2": {
222
- "accuracy": 0.8931070963541666,
223
  "total_bits": 456407296,
224
  "q_proj": {
225
  "group_size": {
@@ -328,7 +328,7 @@
328
  }
329
  },
330
  "model.layers.3": {
331
- "accuracy": 0.8671232461929321,
332
  "total_bits": 456407296,
333
  "q_proj": {
334
  "group_size": {
@@ -437,7 +437,7 @@
437
  }
438
  },
439
  "model.layers.4": {
440
- "accuracy": 0.8653159936269124,
441
  "total_bits": 456407296,
442
  "q_proj": {
443
  "group_size": {
@@ -546,7 +546,7 @@
546
  }
547
  },
548
  "model.layers.5": {
549
- "accuracy": 0.8537668387095134,
550
  "total_bits": 456407296,
551
  "q_proj": {
552
  "group_size": {
@@ -655,7 +655,7 @@
655
  }
656
  },
657
  "model.layers.6": {
658
- "accuracy": 0.854811668395996,
659
  "total_bits": 456407296,
660
  "q_proj": {
661
  "group_size": {
@@ -764,8 +764,8 @@
764
  }
765
  },
766
  "model.layers.7": {
767
- "accuracy": 0.8464771111806234,
768
- "total_bits": 469252352,
769
  "q_proj": {
770
  "group_size": {
771
  "4": 128,
@@ -776,8 +776,8 @@
776
  2
777
  ],
778
  "bits_prop": [
779
- 0.05,
780
- 0.95
781
  ],
782
  "scale_bits": 4
783
  },
@@ -791,8 +791,8 @@
791
  2
792
  ],
793
  "bits_prop": [
794
- 0.05,
795
- 0.95
796
  ],
797
  "scale_bits": 4
798
  },
@@ -821,8 +821,8 @@
821
  2
822
  ],
823
  "bits_prop": [
824
- 0.05,
825
- 0.95
826
  ],
827
  "scale_bits": 4
828
  },
@@ -836,8 +836,8 @@
836
  2
837
  ],
838
  "bits_prop": [
839
- 0.05,
840
- 0.95
841
  ],
842
  "scale_bits": 4
843
  },
@@ -851,8 +851,8 @@
851
  2
852
  ],
853
  "bits_prop": [
854
- 0.05,
855
- 0.95
856
  ],
857
  "scale_bits": 4
858
  },
@@ -866,15 +866,15 @@
866
  2
867
  ],
868
  "bits_prop": [
869
- 0.05,
870
- 0.95
871
  ],
872
  "scale_bits": 4
873
  }
874
  },
875
  "model.layers.8": {
876
- "accuracy": 0.8413853645324708,
877
- "total_bits": 465844480,
878
  "q_proj": {
879
  "group_size": {
880
  "4": 128,
@@ -930,8 +930,8 @@
930
  2
931
  ],
932
  "bits_prop": [
933
- 0.01,
934
- 0.99
935
  ],
936
  "scale_bits": 4
937
  },
@@ -975,15 +975,15 @@
975
  2
976
  ],
977
  "bits_prop": [
978
- 0.1,
979
- 0.9
980
  ],
981
  "scale_bits": 4
982
  }
983
  },
984
  "model.layers.9": {
985
- "accuracy": 0.8449150721232097,
986
- "total_bits": 478689536,
987
  "q_proj": {
988
  "group_size": {
989
  "4": 128,
@@ -994,8 +994,8 @@
994
  2
995
  ],
996
  "bits_prop": [
997
- 0.02,
998
- 0.98
999
  ],
1000
  "scale_bits": 4
1001
  },
@@ -1009,8 +1009,8 @@
1009
  2
1010
  ],
1011
  "bits_prop": [
1012
- 0.02,
1013
- 0.98
1014
  ],
1015
  "scale_bits": 4
1016
  },
@@ -1024,8 +1024,8 @@
1024
  2
1025
  ],
1026
  "bits_prop": [
1027
- 0.2,
1028
- 0.8
1029
  ],
1030
  "scale_bits": 4
1031
  },
@@ -1054,8 +1054,8 @@
1054
  2
1055
  ],
1056
  "bits_prop": [
1057
- 0.02,
1058
- 0.98
1059
  ],
1060
  "scale_bits": 4
1061
  },
@@ -1069,8 +1069,8 @@
1069
  2
1070
  ],
1071
  "bits_prop": [
1072
- 0.02,
1073
- 0.98
1074
  ],
1075
  "scale_bits": 4
1076
  },
@@ -1084,15 +1084,15 @@
1084
  2
1085
  ],
1086
  "bits_prop": [
1087
- 0.2,
1088
- 0.8
1089
  ],
1090
  "scale_bits": 4
1091
  }
1092
  },
1093
  "model.layers.10": {
1094
- "accuracy": 0.8419138590494792,
1095
- "total_bits": 478689536,
1096
  "q_proj": {
1097
  "group_size": {
1098
  "4": 128,
@@ -1103,8 +1103,8 @@
1103
  2
1104
  ],
1105
  "bits_prop": [
1106
- 0.02,
1107
- 0.98
1108
  ],
1109
  "scale_bits": 4
1110
  },
@@ -1118,8 +1118,8 @@
1118
  2
1119
  ],
1120
  "bits_prop": [
1121
- 0.02,
1122
- 0.98
1123
  ],
1124
  "scale_bits": 4
1125
  },
@@ -1133,8 +1133,8 @@
1133
  2
1134
  ],
1135
  "bits_prop": [
1136
- 0.2,
1137
- 0.8
1138
  ],
1139
  "scale_bits": 4
1140
  },
@@ -1163,8 +1163,8 @@
1163
  2
1164
  ],
1165
  "bits_prop": [
1166
- 0.02,
1167
- 0.98
1168
  ],
1169
  "scale_bits": 4
1170
  },
@@ -1178,8 +1178,8 @@
1178
  2
1179
  ],
1180
  "bits_prop": [
1181
- 0.02,
1182
- 0.98
1183
  ],
1184
  "scale_bits": 4
1185
  },
@@ -1193,15 +1193,15 @@
1193
  2
1194
  ],
1195
  "bits_prop": [
1196
- 0.2,
1197
- 0.8
1198
  ],
1199
  "scale_bits": 4
1200
  }
1201
  },
1202
  "model.layers.11": {
1203
- "accuracy": 0.8442544937133789,
1204
- "total_bits": 478689536,
1205
  "q_proj": {
1206
  "group_size": {
1207
  "4": 128,
@@ -1212,8 +1212,8 @@
1212
  2
1213
  ],
1214
  "bits_prop": [
1215
- 0.02,
1216
- 0.98
1217
  ],
1218
  "scale_bits": 4
1219
  },
@@ -1227,8 +1227,8 @@
1227
  2
1228
  ],
1229
  "bits_prop": [
1230
- 0.02,
1231
- 0.98
1232
  ],
1233
  "scale_bits": 4
1234
  },
@@ -1242,8 +1242,8 @@
1242
  2
1243
  ],
1244
  "bits_prop": [
1245
- 0.2,
1246
- 0.8
1247
  ],
1248
  "scale_bits": 4
1249
  },
@@ -1272,8 +1272,8 @@
1272
  2
1273
  ],
1274
  "bits_prop": [
1275
- 0.02,
1276
- 0.98
1277
  ],
1278
  "scale_bits": 4
1279
  },
@@ -1287,8 +1287,8 @@
1287
  2
1288
  ],
1289
  "bits_prop": [
1290
- 0.02,
1291
- 0.98
1292
  ],
1293
  "scale_bits": 4
1294
  },
@@ -1302,15 +1302,15 @@
1302
  2
1303
  ],
1304
  "bits_prop": [
1305
- 0.2,
1306
- 0.8
1307
  ],
1308
  "scale_bits": 4
1309
  }
1310
  },
1311
  "model.layers.12": {
1312
- "accuracy": 0.8488562901814779,
1313
- "total_bits": 507787520,
1314
  "q_proj": {
1315
  "group_size": {
1316
  "4": 128,
@@ -1321,8 +1321,8 @@
1321
  2
1322
  ],
1323
  "bits_prop": [
1324
- 0.1,
1325
- 0.9
1326
  ],
1327
  "scale_bits": 4
1328
  },
@@ -1336,8 +1336,8 @@
1336
  2
1337
  ],
1338
  "bits_prop": [
1339
- 0.1,
1340
- 0.9
1341
  ],
1342
  "scale_bits": 4
1343
  },
@@ -1351,8 +1351,8 @@
1351
  2
1352
  ],
1353
  "bits_prop": [
1354
- 0.2,
1355
- 0.8
1356
  ],
1357
  "scale_bits": 4
1358
  },
@@ -1366,8 +1366,8 @@
1366
  2
1367
  ],
1368
  "bits_prop": [
1369
- 0.1,
1370
- 0.9
1371
  ],
1372
  "scale_bits": 4
1373
  },
@@ -1381,8 +1381,8 @@
1381
  2
1382
  ],
1383
  "bits_prop": [
1384
- 0.1,
1385
- 0.9
1386
  ],
1387
  "scale_bits": 4
1388
  },
@@ -1396,8 +1396,8 @@
1396
  2
1397
  ],
1398
  "bits_prop": [
1399
- 0.1,
1400
- 0.9
1401
  ],
1402
  "scale_bits": 4
1403
  },
@@ -1411,15 +1411,15 @@
1411
  2
1412
  ],
1413
  "bits_prop": [
1414
- 0.2,
1415
- 0.8
1416
  ],
1417
  "scale_bits": 4
1418
  }
1419
  },
1420
  "model.layers.13": {
1421
- "accuracy": 0.8254111607869467,
1422
- "total_bits": 478689536,
1423
  "q_proj": {
1424
  "group_size": {
1425
  "4": 128,
@@ -1430,8 +1430,8 @@
1430
  2
1431
  ],
1432
  "bits_prop": [
1433
- 0.02,
1434
- 0.98
1435
  ],
1436
  "scale_bits": 4
1437
  },
@@ -1445,8 +1445,8 @@
1445
  2
1446
  ],
1447
  "bits_prop": [
1448
- 0.02,
1449
- 0.98
1450
  ],
1451
  "scale_bits": 4
1452
  },
@@ -1460,8 +1460,8 @@
1460
  2
1461
  ],
1462
  "bits_prop": [
1463
- 0.2,
1464
- 0.8
1465
  ],
1466
  "scale_bits": 4
1467
  },
@@ -1490,8 +1490,8 @@
1490
  2
1491
  ],
1492
  "bits_prop": [
1493
- 0.02,
1494
- 0.98
1495
  ],
1496
  "scale_bits": 4
1497
  },
@@ -1505,8 +1505,8 @@
1505
  2
1506
  ],
1507
  "bits_prop": [
1508
- 0.02,
1509
- 0.98
1510
  ],
1511
  "scale_bits": 4
1512
  },
@@ -1520,15 +1520,15 @@
1520
  2
1521
  ],
1522
  "bits_prop": [
1523
- 0.2,
1524
- 0.8
1525
  ],
1526
  "scale_bits": 4
1527
  }
1528
  },
1529
  "model.layers.14": {
1530
- "accuracy": 0.8255802790323894,
1531
- "total_bits": 478689536,
1532
  "q_proj": {
1533
  "group_size": {
1534
  "4": 128,
@@ -1539,8 +1539,8 @@
1539
  2
1540
  ],
1541
  "bits_prop": [
1542
- 0.02,
1543
- 0.98
1544
  ],
1545
  "scale_bits": 4
1546
  },
@@ -1554,8 +1554,8 @@
1554
  2
1555
  ],
1556
  "bits_prop": [
1557
- 0.02,
1558
- 0.98
1559
  ],
1560
  "scale_bits": 4
1561
  },
@@ -1569,8 +1569,8 @@
1569
  2
1570
  ],
1571
  "bits_prop": [
1572
- 0.2,
1573
- 0.8
1574
  ],
1575
  "scale_bits": 4
1576
  },
@@ -1599,8 +1599,8 @@
1599
  2
1600
  ],
1601
  "bits_prop": [
1602
- 0.02,
1603
- 0.98
1604
  ],
1605
  "scale_bits": 4
1606
  },
@@ -1614,8 +1614,8 @@
1614
  2
1615
  ],
1616
  "bits_prop": [
1617
- 0.02,
1618
- 0.98
1619
  ],
1620
  "scale_bits": 4
1621
  },
@@ -1629,15 +1629,15 @@
1629
  2
1630
  ],
1631
  "bits_prop": [
1632
- 0.2,
1633
- 0.8
1634
  ],
1635
  "scale_bits": 4
1636
  }
1637
  },
1638
  "model.layers.15": {
1639
- "accuracy": 0.8435783386230468,
1640
- "total_bits": 513030400,
1641
  "q_proj": {
1642
  "group_size": {
1643
  "4": 128,
@@ -1648,9 +1648,9 @@
1648
  2
1649
  ],
1650
  "bits_prop": [
1651
- 0.05,
1652
- 0.95
1653
- ],
1654
  "scale_bits": 4
1655
  },
1656
  "k_proj": {
@@ -1663,8 +1663,8 @@
1663
  2
1664
  ],
1665
  "bits_prop": [
1666
- 0.05,
1667
- 0.95
1668
  ],
1669
  "scale_bits": 4
1670
  },
@@ -1678,8 +1678,8 @@
1678
  2
1679
  ],
1680
  "bits_prop": [
1681
- 0.4,
1682
- 0.6
1683
  ],
1684
  "scale_bits": 4
1685
  },
@@ -1693,8 +1693,8 @@
1693
  2
1694
  ],
1695
  "bits_prop": [
1696
- 0.05,
1697
- 0.95
1698
  ],
1699
  "scale_bits": 4
1700
  },
@@ -1708,8 +1708,8 @@
1708
  2
1709
  ],
1710
  "bits_prop": [
1711
- 0.05,
1712
- 0.95
1713
  ],
1714
  "scale_bits": 4
1715
  },
@@ -1723,8 +1723,8 @@
1723
  2
1724
  ],
1725
  "bits_prop": [
1726
- 0.05,
1727
- 0.95
1728
  ],
1729
  "scale_bits": 4
1730
  },
@@ -1738,15 +1738,15 @@
1738
  2
1739
  ],
1740
  "bits_prop": [
1741
- 0.4,
1742
- 0.6
1743
  ],
1744
  "scale_bits": 4
1745
  }
1746
  },
1747
  "model.layers.16": {
1748
- "accuracy": 0.8512028058369955,
1749
- "total_bits": 513030400,
1750
  "q_proj": {
1751
  "group_size": {
1752
  "4": 128,
@@ -1757,8 +1757,8 @@
1757
  2
1758
  ],
1759
  "bits_prop": [
1760
- 0.05,
1761
- 0.95
1762
  ],
1763
  "scale_bits": 4
1764
  },
@@ -1772,8 +1772,8 @@
1772
  2
1773
  ],
1774
  "bits_prop": [
1775
- 0.05,
1776
- 0.95
1777
  ],
1778
  "scale_bits": 4
1779
  },
@@ -1787,8 +1787,8 @@
1787
  2
1788
  ],
1789
  "bits_prop": [
1790
- 0.4,
1791
- 0.6
1792
  ],
1793
  "scale_bits": 4
1794
  },
@@ -1802,8 +1802,8 @@
1802
  2
1803
  ],
1804
  "bits_prop": [
1805
- 0.05,
1806
- 0.95
1807
  ],
1808
  "scale_bits": 4
1809
  },
@@ -1817,8 +1817,8 @@
1817
  2
1818
  ],
1819
  "bits_prop": [
1820
- 0.05,
1821
- 0.95
1822
  ],
1823
  "scale_bits": 4
1824
  },
@@ -1832,8 +1832,8 @@
1832
  2
1833
  ],
1834
  "bits_prop": [
1835
- 0.05,
1836
- 0.95
1837
  ],
1838
  "scale_bits": 4
1839
  },
@@ -1847,14 +1847,14 @@
1847
  2
1848
  ],
1849
  "bits_prop": [
1850
- 0.4,
1851
- 0.6
1852
  ],
1853
  "scale_bits": 4
1854
  }
1855
  },
1856
  "model.layers.17": {
1857
- "accuracy": 0.8347673416137695,
1858
  "total_bits": 478689536,
1859
  "q_proj": {
1860
  "group_size": {
@@ -1963,8 +1963,8 @@
1963
  }
1964
  },
1965
  "model.layers.18": {
1966
- "accuracy": 0.8424202601114908,
1967
- "total_bits": 478689536,
1968
  "q_proj": {
1969
  "group_size": {
1970
  "4": 128,
@@ -1975,8 +1975,8 @@
1975
  2
1976
  ],
1977
  "bits_prop": [
1978
- 0.02,
1979
- 0.98
1980
  ],
1981
  "scale_bits": 4
1982
  },
@@ -1990,8 +1990,8 @@
1990
  2
1991
  ],
1992
  "bits_prop": [
1993
- 0.02,
1994
- 0.98
1995
  ],
1996
  "scale_bits": 4
1997
  },
@@ -2020,8 +2020,8 @@
2020
  2
2021
  ],
2022
  "bits_prop": [
2023
- 0.02,
2024
- 0.98
2025
  ],
2026
  "scale_bits": 4
2027
  },
@@ -2035,8 +2035,8 @@
2035
  2
2036
  ],
2037
  "bits_prop": [
2038
- 0.02,
2039
- 0.98
2040
  ],
2041
  "scale_bits": 4
2042
  },
@@ -2050,8 +2050,8 @@
2050
  2
2051
  ],
2052
  "bits_prop": [
2053
- 0.02,
2054
- 0.98
2055
  ],
2056
  "scale_bits": 4
2057
  },
@@ -2072,7 +2072,7 @@
2072
  }
2073
  },
2074
  "model.layers.19": {
2075
- "accuracy": 0.8508459726969401,
2076
  "total_bits": 478689536,
2077
  "q_proj": {
2078
  "group_size": {
@@ -2181,7 +2181,7 @@
2181
  }
2182
  },
2183
  "model.layers.20": {
2184
- "accuracy": 0.845508893330892,
2185
  "total_bits": 478689536,
2186
  "q_proj": {
2187
  "group_size": {
@@ -2290,7 +2290,7 @@
2290
  }
2291
  },
2292
  "model.layers.21": {
2293
- "accuracy": 0.8430344263712566,
2294
  "total_bits": 478689536,
2295
  "q_proj": {
2296
  "group_size": {
@@ -2399,8 +2399,8 @@
2399
  }
2400
  },
2401
  "model.layers.22": {
2402
- "accuracy": 0.8357404073079426,
2403
- "total_bits": 478689536,
2404
  "q_proj": {
2405
  "group_size": {
2406
  "4": 128,
@@ -2411,8 +2411,8 @@
2411
  2
2412
  ],
2413
  "bits_prop": [
2414
- 0.02,
2415
- 0.98
2416
  ],
2417
  "scale_bits": 4
2418
  },
@@ -2426,8 +2426,8 @@
2426
  2
2427
  ],
2428
  "bits_prop": [
2429
- 0.02,
2430
- 0.98
2431
  ],
2432
  "scale_bits": 4
2433
  },
@@ -2441,8 +2441,8 @@
2441
  2
2442
  ],
2443
  "bits_prop": [
2444
- 0.2,
2445
- 0.8
2446
  ],
2447
  "scale_bits": 4
2448
  },
@@ -2456,8 +2456,8 @@
2456
  2
2457
  ],
2458
  "bits_prop": [
2459
- 0.02,
2460
- 0.98
2461
  ],
2462
  "scale_bits": 4
2463
  },
@@ -2471,8 +2471,8 @@
2471
  2
2472
  ],
2473
  "bits_prop": [
2474
- 0.02,
2475
- 0.98
2476
  ],
2477
  "scale_bits": 4
2478
  },
@@ -2486,8 +2486,8 @@
2486
  2
2487
  ],
2488
  "bits_prop": [
2489
- 0.02,
2490
- 0.98
2491
  ],
2492
  "scale_bits": 4
2493
  },
@@ -2501,15 +2501,15 @@
2501
  2
2502
  ],
2503
  "bits_prop": [
2504
- 0.2,
2505
- 0.8
2506
  ],
2507
  "scale_bits": 4
2508
  }
2509
  },
2510
  "model.layers.23": {
2511
- "accuracy": 0.8460915883382162,
2512
- "total_bits": 513030400,
2513
  "q_proj": {
2514
  "group_size": {
2515
  "4": 128,
@@ -2520,8 +2520,8 @@
2520
  2
2521
  ],
2522
  "bits_prop": [
2523
- 0.05,
2524
- 0.95
2525
  ],
2526
  "scale_bits": 4
2527
  },
@@ -2535,8 +2535,8 @@
2535
  2
2536
  ],
2537
  "bits_prop": [
2538
- 0.05,
2539
- 0.95
2540
  ],
2541
  "scale_bits": 4
2542
  },
@@ -2550,8 +2550,8 @@
2550
  2
2551
  ],
2552
  "bits_prop": [
2553
- 0.4,
2554
- 0.6
2555
  ],
2556
  "scale_bits": 4
2557
  },
@@ -2565,8 +2565,8 @@
2565
  2
2566
  ],
2567
  "bits_prop": [
2568
- 0.05,
2569
- 0.95
2570
  ],
2571
  "scale_bits": 4
2572
  },
@@ -2580,8 +2580,8 @@
2580
  2
2581
  ],
2582
  "bits_prop": [
2583
- 0.05,
2584
- 0.95
2585
  ],
2586
  "scale_bits": 4
2587
  },
@@ -2595,8 +2595,8 @@
2595
  2
2596
  ],
2597
  "bits_prop": [
2598
- 0.05,
2599
- 0.95
2600
  ],
2601
  "scale_bits": 4
2602
  },
@@ -2610,15 +2610,15 @@
2610
  2
2611
  ],
2612
  "bits_prop": [
2613
- 0.4,
2614
- 0.6
2615
  ],
2616
  "scale_bits": 4
2617
  }
2618
  },
2619
  "model.layers.24": {
2620
- "accuracy": 0.8472445805867513,
2621
- "total_bits": 513030400,
2622
  "q_proj": {
2623
  "group_size": {
2624
  "4": 128,
@@ -2629,8 +2629,8 @@
2629
  2
2630
  ],
2631
  "bits_prop": [
2632
- 0.05,
2633
- 0.95
2634
  ],
2635
  "scale_bits": 4
2636
  },
@@ -2644,8 +2644,8 @@
2644
  2
2645
  ],
2646
  "bits_prop": [
2647
- 0.05,
2648
- 0.95
2649
  ],
2650
  "scale_bits": 4
2651
  },
@@ -2659,8 +2659,8 @@
2659
  2
2660
  ],
2661
  "bits_prop": [
2662
- 0.4,
2663
- 0.6
2664
  ],
2665
  "scale_bits": 4
2666
  },
@@ -2674,8 +2674,8 @@
2674
  2
2675
  ],
2676
  "bits_prop": [
2677
- 0.05,
2678
- 0.95
2679
  ],
2680
  "scale_bits": 4
2681
  },
@@ -2689,8 +2689,8 @@
2689
  2
2690
  ],
2691
  "bits_prop": [
2692
- 0.05,
2693
- 0.95
2694
  ],
2695
  "scale_bits": 4
2696
  },
@@ -2704,8 +2704,8 @@
2704
  2
2705
  ],
2706
  "bits_prop": [
2707
- 0.05,
2708
- 0.95
2709
  ],
2710
  "scale_bits": 4
2711
  },
@@ -2719,15 +2719,15 @@
2719
  2
2720
  ],
2721
  "bits_prop": [
2722
- 0.4,
2723
- 0.6
2724
  ],
2725
  "scale_bits": 4
2726
  }
2727
  },
2728
  "model.layers.25": {
2729
- "accuracy": 0.8413645426432291,
2730
- "total_bits": 513030400,
2731
  "q_proj": {
2732
  "group_size": {
2733
  "4": 128,
@@ -2738,8 +2738,8 @@
2738
  2
2739
  ],
2740
  "bits_prop": [
2741
- 0.05,
2742
- 0.95
2743
  ],
2744
  "scale_bits": 4
2745
  },
@@ -2753,8 +2753,8 @@
2753
  2
2754
  ],
2755
  "bits_prop": [
2756
- 0.05,
2757
- 0.95
2758
  ],
2759
  "scale_bits": 4
2760
  },
@@ -2768,8 +2768,8 @@
2768
  2
2769
  ],
2770
  "bits_prop": [
2771
- 0.4,
2772
- 0.6
2773
  ],
2774
  "scale_bits": 4
2775
  },
@@ -2783,8 +2783,8 @@
2783
  2
2784
  ],
2785
  "bits_prop": [
2786
- 0.05,
2787
- 0.95
2788
  ],
2789
  "scale_bits": 4
2790
  },
@@ -2798,8 +2798,8 @@
2798
  2
2799
  ],
2800
  "bits_prop": [
2801
- 0.05,
2802
- 0.95
2803
  ],
2804
  "scale_bits": 4
2805
  },
@@ -2813,8 +2813,8 @@
2813
  2
2814
  ],
2815
  "bits_prop": [
2816
- 0.05,
2817
- 0.95
2818
  ],
2819
  "scale_bits": 4
2820
  },
@@ -2828,15 +2828,15 @@
2828
  2
2829
  ],
2830
  "bits_prop": [
2831
- 0.4,
2832
- 0.6
2833
  ],
2834
  "scale_bits": 4
2835
  }
2836
  },
2837
  "model.layers.26": {
2838
- "accuracy": 0.8339576721191405,
2839
- "total_bits": 513030400,
2840
  "q_proj": {
2841
  "group_size": {
2842
  "4": 128,
@@ -2847,8 +2847,8 @@
2847
  2
2848
  ],
2849
  "bits_prop": [
2850
- 0.05,
2851
- 0.95
2852
  ],
2853
  "scale_bits": 4
2854
  },
@@ -2862,8 +2862,8 @@
2862
  2
2863
  ],
2864
  "bits_prop": [
2865
- 0.05,
2866
- 0.95
2867
  ],
2868
  "scale_bits": 4
2869
  },
@@ -2877,8 +2877,8 @@
2877
  2
2878
  ],
2879
  "bits_prop": [
2880
- 0.4,
2881
- 0.6
2882
  ],
2883
  "scale_bits": 4
2884
  },
@@ -2892,8 +2892,8 @@
2892
  2
2893
  ],
2894
  "bits_prop": [
2895
- 0.05,
2896
- 0.95
2897
  ],
2898
  "scale_bits": 4
2899
  },
@@ -2907,8 +2907,8 @@
2907
  2
2908
  ],
2909
  "bits_prop": [
2910
- 0.05,
2911
- 0.95
2912
  ],
2913
  "scale_bits": 4
2914
  },
@@ -2922,8 +2922,8 @@
2922
  2
2923
  ],
2924
  "bits_prop": [
2925
- 0.05,
2926
- 0.95
2927
  ],
2928
  "scale_bits": 4
2929
  },
@@ -2937,14 +2937,14 @@
2937
  2
2938
  ],
2939
  "bits_prop": [
2940
- 0.4,
2941
- 0.6
2942
  ],
2943
  "scale_bits": 4
2944
  }
2945
  },
2946
  "model.layers.27": {
2947
- "accuracy": 0.8268442153930664,
2948
  "total_bits": 478689536,
2949
  "q_proj": {
2950
  "group_size": {
@@ -3053,7 +3053,7 @@
3053
  }
3054
  },
3055
  "model.layers.28": {
3056
- "accuracy": 0.8344643910725912,
3057
  "total_bits": 478689536,
3058
  "q_proj": {
3059
  "group_size": {
@@ -3162,8 +3162,8 @@
3162
  }
3163
  },
3164
  "model.layers.29": {
3165
- "accuracy": 0.8343839645385742,
3166
- "total_bits": 478689536,
3167
  "q_proj": {
3168
  "group_size": {
3169
  "4": 128,
@@ -3174,8 +3174,8 @@
3174
  2
3175
  ],
3176
  "bits_prop": [
3177
- 0.02,
3178
- 0.98
3179
  ],
3180
  "scale_bits": 4
3181
  },
@@ -3189,8 +3189,8 @@
3189
  2
3190
  ],
3191
  "bits_prop": [
3192
- 0.02,
3193
- 0.98
3194
  ],
3195
  "scale_bits": 4
3196
  },
@@ -3204,8 +3204,8 @@
3204
  2
3205
  ],
3206
  "bits_prop": [
3207
- 0.2,
3208
- 0.8
3209
  ],
3210
  "scale_bits": 4
3211
  },
@@ -3219,8 +3219,8 @@
3219
  2
3220
  ],
3221
  "bits_prop": [
3222
- 0.02,
3223
- 0.98
3224
  ],
3225
  "scale_bits": 4
3226
  },
@@ -3234,8 +3234,8 @@
3234
  2
3235
  ],
3236
  "bits_prop": [
3237
- 0.02,
3238
- 0.98
3239
  ],
3240
  "scale_bits": 4
3241
  },
@@ -3249,8 +3249,8 @@
3249
  2
3250
  ],
3251
  "bits_prop": [
3252
- 0.02,
3253
- 0.98
3254
  ],
3255
  "scale_bits": 4
3256
  },
@@ -3264,15 +3264,15 @@
3264
  2
3265
  ],
3266
  "bits_prop": [
3267
- 0.2,
3268
- 0.8
3269
  ],
3270
  "scale_bits": 4
3271
  }
3272
  },
3273
  "model.layers.30": {
3274
- "accuracy": 0.8395535151163737,
3275
- "total_bits": 478689536,
3276
  "q_proj": {
3277
  "group_size": {
3278
  "4": 128,
@@ -3283,8 +3283,8 @@
3283
  2
3284
  ],
3285
  "bits_prop": [
3286
- 0.02,
3287
- 0.98
3288
  ],
3289
  "scale_bits": 4
3290
  },
@@ -3298,23 +3298,20 @@
3298
  2
3299
  ],
3300
  "bits_prop": [
3301
- 0.02,
3302
- 0.98
3303
  ],
3304
  "scale_bits": 4
3305
  },
3306
  "v_proj": {
3307
  "group_size": {
3308
- "4": 128,
3309
- "2": 128
3310
  },
3311
  "bits": [
3312
- 4,
3313
- 2
3314
  ],
3315
  "bits_prop": [
3316
- 0.2,
3317
- 0.8
3318
  ],
3319
  "scale_bits": 4
3320
  },
@@ -3328,8 +3325,8 @@
3328
  2
3329
  ],
3330
  "bits_prop": [
3331
- 0.02,
3332
- 0.98
3333
  ],
3334
  "scale_bits": 4
3335
  },
@@ -3343,8 +3340,8 @@
3343
  2
3344
  ],
3345
  "bits_prop": [
3346
- 0.02,
3347
- 0.98
3348
  ],
3349
  "scale_bits": 4
3350
  },
@@ -3358,30 +3355,27 @@
3358
  2
3359
  ],
3360
  "bits_prop": [
3361
- 0.02,
3362
- 0.98
3363
  ],
3364
  "scale_bits": 4
3365
  },
3366
  "down_proj": {
3367
  "group_size": {
3368
- "4": 128,
3369
- "2": 128
3370
  },
3371
  "bits": [
3372
- 4,
3373
- 2
3374
  ],
3375
  "bits_prop": [
3376
- 0.2,
3377
- 0.8
3378
  ],
3379
  "scale_bits": 4
3380
  }
3381
  },
3382
  "model.layers.31": {
3383
- "accuracy": 0.8636307716369629,
3384
- "total_bits": 456407296,
3385
  "q_proj": {
3386
  "group_size": {
3387
  "4": 128,
@@ -3392,8 +3386,8 @@
3392
  2
3393
  ],
3394
  "bits_prop": [
3395
- 0.01,
3396
- 0.99
3397
  ],
3398
  "scale_bits": 4
3399
  },
@@ -3407,23 +3401,20 @@
3407
  2
3408
  ],
3409
  "bits_prop": [
3410
- 0.01,
3411
- 0.99
3412
  ],
3413
  "scale_bits": 4
3414
  },
3415
  "v_proj": {
3416
  "group_size": {
3417
- "4": 128,
3418
- "2": 128
3419
  },
3420
  "bits": [
3421
- 4,
3422
- 2
3423
  ],
3424
  "bits_prop": [
3425
- 0.05,
3426
- 0.95
3427
  ],
3428
  "scale_bits": 4
3429
  },
@@ -3437,8 +3428,8 @@
3437
  2
3438
  ],
3439
  "bits_prop": [
3440
- 0.02,
3441
- 0.98
3442
  ],
3443
  "scale_bits": 4
3444
  },
@@ -3452,8 +3443,8 @@
3452
  2
3453
  ],
3454
  "bits_prop": [
3455
- 0.01,
3456
- 0.99
3457
  ],
3458
  "scale_bits": 4
3459
  },
@@ -3467,26 +3458,23 @@
3467
  2
3468
  ],
3469
  "bits_prop": [
3470
- 0.01,
3471
- 0.99
3472
  ],
3473
  "scale_bits": 4
3474
  },
3475
  "down_proj": {
3476
  "group_size": {
3477
- "4": 128,
3478
- "2": 128
3479
  },
3480
  "bits": [
3481
- 4,
3482
- 2
3483
  ],
3484
  "bits_prop": [
3485
- 0.02,
3486
- 0.98
3487
  ],
3488
  "scale_bits": 4
3489
  }
3490
  }
3491
  }
3492
- }
 
1
  {
2
  "measurement": {
3
  "model.layers.0": {
4
+ "accuracy": 0.8987722396850586,
5
+ "total_bits": 507787520,
6
  "q_proj": {
7
  "group_size": {
8
  "4": 128,
 
13
  2
14
  ],
15
  "bits_prop": [
16
+ 0.1,
17
+ 0.9
18
  ],
19
  "scale_bits": 4
20
  },
 
28
  2
29
  ],
30
  "bits_prop": [
31
+ 0.1,
32
+ 0.9
33
  ],
34
  "scale_bits": 4
35
  },
 
43
  2
44
  ],
45
  "bits_prop": [
46
+ 0.2,
47
+ 0.8
48
  ],
49
  "scale_bits": 4
50
  },
 
58
  2
59
  ],
60
  "bits_prop": [
61
+ 0.1,
62
+ 0.9
63
  ],
64
  "scale_bits": 4
65
  },
 
73
  2
74
  ],
75
  "bits_prop": [
76
+ 0.1,
77
+ 0.9
78
  ],
79
  "scale_bits": 4
80
  },
 
88
  2
89
  ],
90
  "bits_prop": [
91
+ 0.1,
92
+ 0.9
93
  ],
94
  "scale_bits": 4
95
  },
 
103
  2
104
  ],
105
  "bits_prop": [
106
+ 0.2,
107
+ 0.8
108
  ],
109
  "scale_bits": 4
110
  }
111
  },
112
  "model.layers.1": {
113
+ "accuracy": 0.9002890586853027,
114
+ "total_bits": 478689536,
115
  "q_proj": {
116
  "group_size": {
117
  "4": 128,
 
122
  2
123
  ],
124
  "bits_prop": [
125
+ 0.02,
126
+ 0.98
127
  ],
128
  "scale_bits": 4
129
  },
 
137
  2
138
  ],
139
  "bits_prop": [
140
+ 0.02,
141
+ 0.98
142
  ],
143
  "scale_bits": 4
144
  },
 
152
  2
153
  ],
154
  "bits_prop": [
155
+ 0.2,
156
+ 0.8
157
  ],
158
  "scale_bits": 4
159
  },
 
182
  2
183
  ],
184
  "bits_prop": [
185
+ 0.02,
186
+ 0.98
187
  ],
188
  "scale_bits": 4
189
  },
 
197
  2
198
  ],
199
  "bits_prop": [
200
+ 0.02,
201
+ 0.98
202
  ],
203
  "scale_bits": 4
204
  },
 
212
  2
213
  ],
214
  "bits_prop": [
215
+ 0.2,
216
+ 0.8
217
  ],
218
  "scale_bits": 4
219
  }
220
  },
221
  "model.layers.2": {
222
+ "accuracy": 0.9608855843544006,
223
  "total_bits": 456407296,
224
  "q_proj": {
225
  "group_size": {
 
328
  }
329
  },
330
  "model.layers.3": {
331
+ "accuracy": 0.9555550813674927,
332
  "total_bits": 456407296,
333
  "q_proj": {
334
  "group_size": {
 
437
  }
438
  },
439
  "model.layers.4": {
440
+ "accuracy": 0.9499313831329346,
441
  "total_bits": 456407296,
442
  "q_proj": {
443
  "group_size": {
 
546
  }
547
  },
548
  "model.layers.5": {
549
+ "accuracy": 0.94085693359375,
550
  "total_bits": 456407296,
551
  "q_proj": {
552
  "group_size": {
 
655
  }
656
  },
657
  "model.layers.6": {
658
+ "accuracy": 0.9383460283279419,
659
  "total_bits": 456407296,
660
  "q_proj": {
661
  "group_size": {
 
764
  }
765
  },
766
  "model.layers.7": {
767
+ "accuracy": 0.9287691116333008,
768
+ "total_bits": 456407296,
769
  "q_proj": {
770
  "group_size": {
771
  "4": 128,
 
776
  2
777
  ],
778
  "bits_prop": [
779
+ 0.01,
780
+ 0.99
781
  ],
782
  "scale_bits": 4
783
  },
 
791
  2
792
  ],
793
  "bits_prop": [
794
+ 0.01,
795
+ 0.99
796
  ],
797
  "scale_bits": 4
798
  },
 
821
  2
822
  ],
823
  "bits_prop": [
824
+ 0.02,
825
+ 0.98
826
  ],
827
  "scale_bits": 4
828
  },
 
836
  2
837
  ],
838
  "bits_prop": [
839
+ 0.01,
840
+ 0.99
841
  ],
842
  "scale_bits": 4
843
  },
 
851
  2
852
  ],
853
  "bits_prop": [
854
+ 0.01,
855
+ 0.99
856
  ],
857
  "scale_bits": 4
858
  },
 
866
  2
867
  ],
868
  "bits_prop": [
869
+ 0.02,
870
+ 0.98
871
  ],
872
  "scale_bits": 4
873
  }
874
  },
875
  "model.layers.8": {
876
+ "accuracy": 0.9277379512786865,
877
+ "total_bits": 456407296,
878
  "q_proj": {
879
  "group_size": {
880
  "4": 128,
 
930
  2
931
  ],
932
  "bits_prop": [
933
+ 0.02,
934
+ 0.98
935
  ],
936
  "scale_bits": 4
937
  },
 
975
  2
976
  ],
977
  "bits_prop": [
978
+ 0.02,
979
+ 0.98
980
  ],
981
  "scale_bits": 4
982
  }
983
  },
984
  "model.layers.9": {
985
+ "accuracy": 0.9248785972595215,
986
+ "total_bits": 456407296,
987
  "q_proj": {
988
  "group_size": {
989
  "4": 128,
 
994
  2
995
  ],
996
  "bits_prop": [
997
+ 0.01,
998
+ 0.99
999
  ],
1000
  "scale_bits": 4
1001
  },
 
1009
  2
1010
  ],
1011
  "bits_prop": [
1012
+ 0.01,
1013
+ 0.99
1014
  ],
1015
  "scale_bits": 4
1016
  },
 
1024
  2
1025
  ],
1026
  "bits_prop": [
1027
+ 0.05,
1028
+ 0.95
1029
  ],
1030
  "scale_bits": 4
1031
  },
 
1054
  2
1055
  ],
1056
  "bits_prop": [
1057
+ 0.01,
1058
+ 0.99
1059
  ],
1060
  "scale_bits": 4
1061
  },
 
1069
  2
1070
  ],
1071
  "bits_prop": [
1072
+ 0.01,
1073
+ 0.99
1074
  ],
1075
  "scale_bits": 4
1076
  },
 
1084
  2
1085
  ],
1086
  "bits_prop": [
1087
+ 0.02,
1088
+ 0.98
1089
  ],
1090
  "scale_bits": 4
1091
  }
1092
  },
1093
  "model.layers.10": {
1094
+ "accuracy": 0.9232504367828369,
1095
+ "total_bits": 456407296,
1096
  "q_proj": {
1097
  "group_size": {
1098
  "4": 128,
 
1103
  2
1104
  ],
1105
  "bits_prop": [
1106
+ 0.01,
1107
+ 0.99
1108
  ],
1109
  "scale_bits": 4
1110
  },
 
1118
  2
1119
  ],
1120
  "bits_prop": [
1121
+ 0.01,
1122
+ 0.99
1123
  ],
1124
  "scale_bits": 4
1125
  },
 
1133
  2
1134
  ],
1135
  "bits_prop": [
1136
+ 0.05,
1137
+ 0.95
1138
  ],
1139
  "scale_bits": 4
1140
  },
 
1163
  2
1164
  ],
1165
  "bits_prop": [
1166
+ 0.01,
1167
+ 0.99
1168
  ],
1169
  "scale_bits": 4
1170
  },
 
1178
  2
1179
  ],
1180
  "bits_prop": [
1181
+ 0.01,
1182
+ 0.99
1183
  ],
1184
  "scale_bits": 4
1185
  },
 
1193
  2
1194
  ],
1195
  "bits_prop": [
1196
+ 0.02,
1197
+ 0.98
1198
  ],
1199
  "scale_bits": 4
1200
  }
1201
  },
1202
  "model.layers.11": {
1203
+ "accuracy": 0.9223508834838867,
1204
+ "total_bits": 456407296,
1205
  "q_proj": {
1206
  "group_size": {
1207
  "4": 128,
 
1212
  2
1213
  ],
1214
  "bits_prop": [
1215
+ 0.01,
1216
+ 0.99
1217
  ],
1218
  "scale_bits": 4
1219
  },
 
1227
  2
1228
  ],
1229
  "bits_prop": [
1230
+ 0.01,
1231
+ 0.99
1232
  ],
1233
  "scale_bits": 4
1234
  },
 
1242
  2
1243
  ],
1244
  "bits_prop": [
1245
+ 0.05,
1246
+ 0.95
1247
  ],
1248
  "scale_bits": 4
1249
  },
 
1272
  2
1273
  ],
1274
  "bits_prop": [
1275
+ 0.01,
1276
+ 0.99
1277
  ],
1278
  "scale_bits": 4
1279
  },
 
1287
  2
1288
  ],
1289
  "bits_prop": [
1290
+ 0.01,
1291
+ 0.99
1292
  ],
1293
  "scale_bits": 4
1294
  },
 
1302
  2
1303
  ],
1304
  "bits_prop": [
1305
+ 0.02,
1306
+ 0.98
1307
  ],
1308
  "scale_bits": 4
1309
  }
1310
  },
1311
  "model.layers.12": {
1312
+ "accuracy": 0.9154980182647705,
1313
+ "total_bits": 456407296,
1314
  "q_proj": {
1315
  "group_size": {
1316
  "4": 128,
 
1321
  2
1322
  ],
1323
  "bits_prop": [
1324
+ 0.01,
1325
+ 0.99
1326
  ],
1327
  "scale_bits": 4
1328
  },
 
1336
  2
1337
  ],
1338
  "bits_prop": [
1339
+ 0.01,
1340
+ 0.99
1341
  ],
1342
  "scale_bits": 4
1343
  },
 
1351
  2
1352
  ],
1353
  "bits_prop": [
1354
+ 0.05,
1355
+ 0.95
1356
  ],
1357
  "scale_bits": 4
1358
  },
 
1366
  2
1367
  ],
1368
  "bits_prop": [
1369
+ 0.02,
1370
+ 0.98
1371
  ],
1372
  "scale_bits": 4
1373
  },
 
1381
  2
1382
  ],
1383
  "bits_prop": [
1384
+ 0.01,
1385
+ 0.99
1386
  ],
1387
  "scale_bits": 4
1388
  },
 
1396
  2
1397
  ],
1398
  "bits_prop": [
1399
+ 0.01,
1400
+ 0.99
1401
  ],
1402
  "scale_bits": 4
1403
  },
 
1411
  2
1412
  ],
1413
  "bits_prop": [
1414
+ 0.02,
1415
+ 0.98
1416
  ],
1417
  "scale_bits": 4
1418
  }
1419
  },
1420
  "model.layers.13": {
1421
+ "accuracy": 0.9131753444671631,
1422
+ "total_bits": 456407296,
1423
  "q_proj": {
1424
  "group_size": {
1425
  "4": 128,
 
1430
  2
1431
  ],
1432
  "bits_prop": [
1433
+ 0.01,
1434
+ 0.99
1435
  ],
1436
  "scale_bits": 4
1437
  },
 
1445
  2
1446
  ],
1447
  "bits_prop": [
1448
+ 0.01,
1449
+ 0.99
1450
  ],
1451
  "scale_bits": 4
1452
  },
 
1460
  2
1461
  ],
1462
  "bits_prop": [
1463
+ 0.05,
1464
+ 0.95
1465
  ],
1466
  "scale_bits": 4
1467
  },
 
1490
  2
1491
  ],
1492
  "bits_prop": [
1493
+ 0.01,
1494
+ 0.99
1495
  ],
1496
  "scale_bits": 4
1497
  },
 
1505
  2
1506
  ],
1507
  "bits_prop": [
1508
+ 0.01,
1509
+ 0.99
1510
  ],
1511
  "scale_bits": 4
1512
  },
 
1520
  2
1521
  ],
1522
  "bits_prop": [
1523
+ 0.02,
1524
+ 0.98
1525
  ],
1526
  "scale_bits": 4
1527
  }
1528
  },
1529
  "model.layers.14": {
1530
+ "accuracy": 0.9053680896759033,
1531
+ "total_bits": 456407296,
1532
  "q_proj": {
1533
  "group_size": {
1534
  "4": 128,
 
1539
  2
1540
  ],
1541
  "bits_prop": [
1542
+ 0.01,
1543
+ 0.99
1544
  ],
1545
  "scale_bits": 4
1546
  },
 
1554
  2
1555
  ],
1556
  "bits_prop": [
1557
+ 0.01,
1558
+ 0.99
1559
  ],
1560
  "scale_bits": 4
1561
  },
 
1569
  2
1570
  ],
1571
  "bits_prop": [
1572
+ 0.05,
1573
+ 0.95
1574
  ],
1575
  "scale_bits": 4
1576
  },
 
1599
  2
1600
  ],
1601
  "bits_prop": [
1602
+ 0.01,
1603
+ 0.99
1604
  ],
1605
  "scale_bits": 4
1606
  },
 
1614
  2
1615
  ],
1616
  "bits_prop": [
1617
+ 0.01,
1618
+ 0.99
1619
  ],
1620
  "scale_bits": 4
1621
  },
 
1629
  2
1630
  ],
1631
  "bits_prop": [
1632
+ 0.02,
1633
+ 0.98
1634
  ],
1635
  "scale_bits": 4
1636
  }
1637
  },
1638
  "model.layers.15": {
1639
+ "accuracy": 0.9041793346405029,
1640
+ "total_bits": 465844480,
1641
  "q_proj": {
1642
  "group_size": {
1643
  "4": 128,
 
1648
  2
1649
  ],
1650
  "bits_prop": [
1651
+ 0.01,
1652
+ 0.99
1653
+ ],
1654
  "scale_bits": 4
1655
  },
1656
  "k_proj": {
 
1663
  2
1664
  ],
1665
  "bits_prop": [
1666
+ 0.01,
1667
+ 0.99
1668
  ],
1669
  "scale_bits": 4
1670
  },
 
1678
  2
1679
  ],
1680
  "bits_prop": [
1681
+ 0.05,
1682
+ 0.95
1683
  ],
1684
  "scale_bits": 4
1685
  },
 
1693
  2
1694
  ],
1695
  "bits_prop": [
1696
+ 0.01,
1697
+ 0.99
1698
  ],
1699
  "scale_bits": 4
1700
  },
 
1708
  2
1709
  ],
1710
  "bits_prop": [
1711
+ 0.01,
1712
+ 0.99
1713
  ],
1714
  "scale_bits": 4
1715
  },
 
1723
  2
1724
  ],
1725
  "bits_prop": [
1726
+ 0.01,
1727
+ 0.99
1728
  ],
1729
  "scale_bits": 4
1730
  },
 
1738
  2
1739
  ],
1740
  "bits_prop": [
1741
+ 0.1,
1742
+ 0.9
1743
  ],
1744
  "scale_bits": 4
1745
  }
1746
  },
1747
  "model.layers.16": {
1748
+ "accuracy": 0.9069504737854004,
1749
+ "total_bits": 478689536,
1750
  "q_proj": {
1751
  "group_size": {
1752
  "4": 128,
 
1757
  2
1758
  ],
1759
  "bits_prop": [
1760
+ 0.02,
1761
+ 0.98
1762
  ],
1763
  "scale_bits": 4
1764
  },
 
1772
  2
1773
  ],
1774
  "bits_prop": [
1775
+ 0.02,
1776
+ 0.98
1777
  ],
1778
  "scale_bits": 4
1779
  },
 
1787
  2
1788
  ],
1789
  "bits_prop": [
1790
+ 0.2,
1791
+ 0.8
1792
  ],
1793
  "scale_bits": 4
1794
  },
 
1802
  2
1803
  ],
1804
  "bits_prop": [
1805
+ 0.02,
1806
+ 0.98
1807
  ],
1808
  "scale_bits": 4
1809
  },
 
1817
  2
1818
  ],
1819
  "bits_prop": [
1820
+ 0.02,
1821
+ 0.98
1822
  ],
1823
  "scale_bits": 4
1824
  },
 
1832
  2
1833
  ],
1834
  "bits_prop": [
1835
+ 0.02,
1836
+ 0.98
1837
  ],
1838
  "scale_bits": 4
1839
  },
 
1847
  2
1848
  ],
1849
  "bits_prop": [
1850
+ 0.2,
1851
+ 0.8
1852
  ],
1853
  "scale_bits": 4
1854
  }
1855
  },
1856
  "model.layers.17": {
1857
+ "accuracy": 0.9034838676452637,
1858
  "total_bits": 478689536,
1859
  "q_proj": {
1860
  "group_size": {
 
1963
  }
1964
  },
1965
  "model.layers.18": {
1966
+ "accuracy": 0.9072866439819336,
1967
+ "total_bits": 507787520,
1968
  "q_proj": {
1969
  "group_size": {
1970
  "4": 128,
 
1975
  2
1976
  ],
1977
  "bits_prop": [
1978
+ 0.1,
1979
+ 0.9
1980
  ],
1981
  "scale_bits": 4
1982
  },
 
1990
  2
1991
  ],
1992
  "bits_prop": [
1993
+ 0.1,
1994
+ 0.9
1995
  ],
1996
  "scale_bits": 4
1997
  },
 
2020
  2
2021
  ],
2022
  "bits_prop": [
2023
+ 0.1,
2024
+ 0.9
2025
  ],
2026
  "scale_bits": 4
2027
  },
 
2035
  2
2036
  ],
2037
  "bits_prop": [
2038
+ 0.1,
2039
+ 0.9
2040
  ],
2041
  "scale_bits": 4
2042
  },
 
2050
  2
2051
  ],
2052
  "bits_prop": [
2053
+ 0.1,
2054
+ 0.9
2055
  ],
2056
  "scale_bits": 4
2057
  },
 
2072
  }
2073
  },
2074
  "model.layers.19": {
2075
+ "accuracy": 0.9008588790893555,
2076
  "total_bits": 478689536,
2077
  "q_proj": {
2078
  "group_size": {
 
2181
  }
2182
  },
2183
  "model.layers.20": {
2184
+ "accuracy": 0.9053101539611816,
2185
  "total_bits": 478689536,
2186
  "q_proj": {
2187
  "group_size": {
 
2290
  }
2291
  },
2292
  "model.layers.21": {
2293
+ "accuracy": 0.9090385437011719,
2294
  "total_bits": 478689536,
2295
  "q_proj": {
2296
  "group_size": {
 
2399
  }
2400
  },
2401
  "model.layers.22": {
2402
+ "accuracy": 0.9069957733154297,
2403
+ "total_bits": 465844480,
2404
  "q_proj": {
2405
  "group_size": {
2406
  "4": 128,
 
2411
  2
2412
  ],
2413
  "bits_prop": [
2414
+ 0.01,
2415
+ 0.99
2416
  ],
2417
  "scale_bits": 4
2418
  },
 
2426
  2
2427
  ],
2428
  "bits_prop": [
2429
+ 0.01,
2430
+ 0.99
2431
  ],
2432
  "scale_bits": 4
2433
  },
 
2441
  2
2442
  ],
2443
  "bits_prop": [
2444
+ 0.05,
2445
+ 0.95
2446
  ],
2447
  "scale_bits": 4
2448
  },
 
2456
  2
2457
  ],
2458
  "bits_prop": [
2459
+ 0.01,
2460
+ 0.99
2461
  ],
2462
  "scale_bits": 4
2463
  },
 
2471
  2
2472
  ],
2473
  "bits_prop": [
2474
+ 0.01,
2475
+ 0.99
2476
  ],
2477
  "scale_bits": 4
2478
  },
 
2486
  2
2487
  ],
2488
  "bits_prop": [
2489
+ 0.01,
2490
+ 0.99
2491
  ],
2492
  "scale_bits": 4
2493
  },
 
2501
  2
2502
  ],
2503
  "bits_prop": [
2504
+ 0.1,
2505
+ 0.9
2506
  ],
2507
  "scale_bits": 4
2508
  }
2509
  },
2510
  "model.layers.23": {
2511
+ "accuracy": 0.9098095893859863,
2512
+ "total_bits": 478689536,
2513
  "q_proj": {
2514
  "group_size": {
2515
  "4": 128,
 
2520
  2
2521
  ],
2522
  "bits_prop": [
2523
+ 0.02,
2524
+ 0.98
2525
  ],
2526
  "scale_bits": 4
2527
  },
 
2535
  2
2536
  ],
2537
  "bits_prop": [
2538
+ 0.02,
2539
+ 0.98
2540
  ],
2541
  "scale_bits": 4
2542
  },
 
2550
  2
2551
  ],
2552
  "bits_prop": [
2553
+ 0.2,
2554
+ 0.8
2555
  ],
2556
  "scale_bits": 4
2557
  },
 
2565
  2
2566
  ],
2567
  "bits_prop": [
2568
+ 0.02,
2569
+ 0.98
2570
  ],
2571
  "scale_bits": 4
2572
  },
 
2580
  2
2581
  ],
2582
  "bits_prop": [
2583
+ 0.02,
2584
+ 0.98
2585
  ],
2586
  "scale_bits": 4
2587
  },
 
2595
  2
2596
  ],
2597
  "bits_prop": [
2598
+ 0.02,
2599
+ 0.98
2600
  ],
2601
  "scale_bits": 4
2602
  },
 
2610
  2
2611
  ],
2612
  "bits_prop": [
2613
+ 0.2,
2614
+ 0.8
2615
  ],
2616
  "scale_bits": 4
2617
  }
2618
  },
2619
  "model.layers.24": {
2620
+ "accuracy": 0.9100494384765625,
2621
+ "total_bits": 478689536,
2622
  "q_proj": {
2623
  "group_size": {
2624
  "4": 128,
 
2629
  2
2630
  ],
2631
  "bits_prop": [
2632
+ 0.02,
2633
+ 0.98
2634
  ],
2635
  "scale_bits": 4
2636
  },
 
2644
  2
2645
  ],
2646
  "bits_prop": [
2647
+ 0.02,
2648
+ 0.98
2649
  ],
2650
  "scale_bits": 4
2651
  },
 
2659
  2
2660
  ],
2661
  "bits_prop": [
2662
+ 0.2,
2663
+ 0.8
2664
  ],
2665
  "scale_bits": 4
2666
  },
 
2674
  2
2675
  ],
2676
  "bits_prop": [
2677
+ 0.02,
2678
+ 0.98
2679
  ],
2680
  "scale_bits": 4
2681
  },
 
2689
  2
2690
  ],
2691
  "bits_prop": [
2692
+ 0.02,
2693
+ 0.98
2694
  ],
2695
  "scale_bits": 4
2696
  },
 
2704
  2
2705
  ],
2706
  "bits_prop": [
2707
+ 0.02,
2708
+ 0.98
2709
  ],
2710
  "scale_bits": 4
2711
  },
 
2719
  2
2720
  ],
2721
  "bits_prop": [
2722
+ 0.2,
2723
+ 0.8
2724
  ],
2725
  "scale_bits": 4
2726
  }
2727
  },
2728
  "model.layers.25": {
2729
+ "accuracy": 0.9088339805603027,
2730
+ "total_bits": 478689536,
2731
  "q_proj": {
2732
  "group_size": {
2733
  "4": 128,
 
2738
  2
2739
  ],
2740
  "bits_prop": [
2741
+ 0.02,
2742
+ 0.98
2743
  ],
2744
  "scale_bits": 4
2745
  },
 
2753
  2
2754
  ],
2755
  "bits_prop": [
2756
+ 0.02,
2757
+ 0.98
2758
  ],
2759
  "scale_bits": 4
2760
  },
 
2768
  2
2769
  ],
2770
  "bits_prop": [
2771
+ 0.2,
2772
+ 0.8
2773
  ],
2774
  "scale_bits": 4
2775
  },
 
2783
  2
2784
  ],
2785
  "bits_prop": [
2786
+ 0.02,
2787
+ 0.98
2788
  ],
2789
  "scale_bits": 4
2790
  },
 
2798
  2
2799
  ],
2800
  "bits_prop": [
2801
+ 0.02,
2802
+ 0.98
2803
  ],
2804
  "scale_bits": 4
2805
  },
 
2813
  2
2814
  ],
2815
  "bits_prop": [
2816
+ 0.02,
2817
+ 0.98
2818
  ],
2819
  "scale_bits": 4
2820
  },
 
2828
  2
2829
  ],
2830
  "bits_prop": [
2831
+ 0.2,
2832
+ 0.8
2833
  ],
2834
  "scale_bits": 4
2835
  }
2836
  },
2837
  "model.layers.26": {
2838
+ "accuracy": 0.9072427749633789,
2839
+ "total_bits": 478689536,
2840
  "q_proj": {
2841
  "group_size": {
2842
  "4": 128,
 
2847
  2
2848
  ],
2849
  "bits_prop": [
2850
+ 0.02,
2851
+ 0.98
2852
  ],
2853
  "scale_bits": 4
2854
  },
 
2862
  2
2863
  ],
2864
  "bits_prop": [
2865
+ 0.02,
2866
+ 0.98
2867
  ],
2868
  "scale_bits": 4
2869
  },
 
2877
  2
2878
  ],
2879
  "bits_prop": [
2880
+ 0.2,
2881
+ 0.8
2882
  ],
2883
  "scale_bits": 4
2884
  },
 
2892
  2
2893
  ],
2894
  "bits_prop": [
2895
+ 0.02,
2896
+ 0.98
2897
  ],
2898
  "scale_bits": 4
2899
  },
 
2907
  2
2908
  ],
2909
  "bits_prop": [
2910
+ 0.02,
2911
+ 0.98
2912
  ],
2913
  "scale_bits": 4
2914
  },
 
2922
  2
2923
  ],
2924
  "bits_prop": [
2925
+ 0.02,
2926
+ 0.98
2927
  ],
2928
  "scale_bits": 4
2929
  },
 
2937
  2
2938
  ],
2939
  "bits_prop": [
2940
+ 0.2,
2941
+ 0.8
2942
  ],
2943
  "scale_bits": 4
2944
  }
2945
  },
2946
  "model.layers.27": {
2947
+ "accuracy": 0.9096179008483887,
2948
  "total_bits": 478689536,
2949
  "q_proj": {
2950
  "group_size": {
 
3053
  }
3054
  },
3055
  "model.layers.28": {
3056
+ "accuracy": 0.9028897285461426,
3057
  "total_bits": 478689536,
3058
  "q_proj": {
3059
  "group_size": {
 
3162
  }
3163
  },
3164
  "model.layers.29": {
3165
+ "accuracy": 0.902318000793457,
3166
+ "total_bits": 513030400,
3167
  "q_proj": {
3168
  "group_size": {
3169
  "4": 128,
 
3174
  2
3175
  ],
3176
  "bits_prop": [
3177
+ 0.05,
3178
+ 0.95
3179
  ],
3180
  "scale_bits": 4
3181
  },
 
3189
  2
3190
  ],
3191
  "bits_prop": [
3192
+ 0.05,
3193
+ 0.95
3194
  ],
3195
  "scale_bits": 4
3196
  },
 
3204
  2
3205
  ],
3206
  "bits_prop": [
3207
+ 0.4,
3208
+ 0.6
3209
  ],
3210
  "scale_bits": 4
3211
  },
 
3219
  2
3220
  ],
3221
  "bits_prop": [
3222
+ 0.05,
3223
+ 0.95
3224
  ],
3225
  "scale_bits": 4
3226
  },
 
3234
  2
3235
  ],
3236
  "bits_prop": [
3237
+ 0.05,
3238
+ 0.95
3239
  ],
3240
  "scale_bits": 4
3241
  },
 
3249
  2
3250
  ],
3251
  "bits_prop": [
3252
+ 0.05,
3253
+ 0.95
3254
  ],
3255
  "scale_bits": 4
3256
  },
 
3264
  2
3265
  ],
3266
  "bits_prop": [
3267
+ 0.4,
3268
+ 0.6
3269
  ],
3270
  "scale_bits": 4
3271
  }
3272
  },
3273
  "model.layers.30": {
3274
+ "accuracy": 0.9191799163818359,
3275
+ "total_bits": 607664384,
3276
  "q_proj": {
3277
  "group_size": {
3278
  "4": 128,
 
3283
  2
3284
  ],
3285
  "bits_prop": [
3286
+ 0.1,
3287
+ 0.9
3288
  ],
3289
  "scale_bits": 4
3290
  },
 
3298
  2
3299
  ],
3300
  "bits_prop": [
3301
+ 0.1,
3302
+ 0.9
3303
  ],
3304
  "scale_bits": 4
3305
  },
3306
  "v_proj": {
3307
  "group_size": {
3308
+ "4": 128
 
3309
  },
3310
  "bits": [
3311
+ 4
 
3312
  ],
3313
  "bits_prop": [
3314
+ 1.0
 
3315
  ],
3316
  "scale_bits": 4
3317
  },
 
3325
  2
3326
  ],
3327
  "bits_prop": [
3328
+ 0.1,
3329
+ 0.9
3330
  ],
3331
  "scale_bits": 4
3332
  },
 
3340
  2
3341
  ],
3342
  "bits_prop": [
3343
+ 0.1,
3344
+ 0.9
3345
  ],
3346
  "scale_bits": 4
3347
  },
 
3355
  2
3356
  ],
3357
  "bits_prop": [
3358
+ 0.1,
3359
+ 0.9
3360
  ],
3361
  "scale_bits": 4
3362
  },
3363
  "down_proj": {
3364
  "group_size": {
3365
+ "4": 128
 
3366
  },
3367
  "bits": [
3368
+ 4
 
3369
  ],
3370
  "bits_prop": [
3371
+ 1.0
 
3372
  ],
3373
  "scale_bits": 4
3374
  }
3375
  },
3376
  "model.layers.31": {
3377
+ "accuracy": 0.9125514030456543,
3378
+ "total_bits": 607664384,
3379
  "q_proj": {
3380
  "group_size": {
3381
  "4": 128,
 
3386
  2
3387
  ],
3388
  "bits_prop": [
3389
+ 0.1,
3390
+ 0.9
3391
  ],
3392
  "scale_bits": 4
3393
  },
 
3401
  2
3402
  ],
3403
  "bits_prop": [
3404
+ 0.1,
3405
+ 0.9
3406
  ],
3407
  "scale_bits": 4
3408
  },
3409
  "v_proj": {
3410
  "group_size": {
3411
+ "4": 128
 
3412
  },
3413
  "bits": [
3414
+ 4
 
3415
  ],
3416
  "bits_prop": [
3417
+ 1.0
 
3418
  ],
3419
  "scale_bits": 4
3420
  },
 
3428
  2
3429
  ],
3430
  "bits_prop": [
3431
+ 0.1,
3432
+ 0.9
3433
  ],
3434
  "scale_bits": 4
3435
  },
 
3443
  2
3444
  ],
3445
  "bits_prop": [
3446
+ 0.1,
3447
+ 0.9
3448
  ],
3449
  "scale_bits": 4
3450
  },
 
3458
  2
3459
  ],
3460
  "bits_prop": [
3461
+ 0.1,
3462
+ 0.9
3463
  ],
3464
  "scale_bits": 4
3465
  },
3466
  "down_proj": {
3467
  "group_size": {
3468
+ "4": 128
 
3469
  },
3470
  "bits": [
3471
+ 4
 
3472
  ],
3473
  "bits_prop": [
3474
+ 1.0
 
3475
  ],
3476
  "scale_bits": 4
3477
  }
3478
  }
3479
  }
3480
+ }