NicoNico6
commited on
Commit
•
8b40c52
1
Parent(s):
b991fa3
update
Browse files- model.safetensors +2 -2
- quant_strategy.json +325 -337
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f95f93237e56b66678970c36837ab144fa892ee7c8e34dd4d735fd940b762082
|
3 |
+
size 2847057704
|
quant_strategy.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"measurement": {
|
3 |
"model.layers.0": {
|
4 |
-
"accuracy": 0.
|
5 |
-
"total_bits":
|
6 |
"q_proj": {
|
7 |
"group_size": {
|
8 |
"4": 128,
|
@@ -13,8 +13,8 @@
|
|
13 |
2
|
14 |
],
|
15 |
"bits_prop": [
|
16 |
-
0.
|
17 |
-
0.
|
18 |
],
|
19 |
"scale_bits": 4
|
20 |
},
|
@@ -28,8 +28,8 @@
|
|
28 |
2
|
29 |
],
|
30 |
"bits_prop": [
|
31 |
-
0.
|
32 |
-
0.
|
33 |
],
|
34 |
"scale_bits": 4
|
35 |
},
|
@@ -43,8 +43,8 @@
|
|
43 |
2
|
44 |
],
|
45 |
"bits_prop": [
|
46 |
-
0.
|
47 |
-
0.
|
48 |
],
|
49 |
"scale_bits": 4
|
50 |
},
|
@@ -58,8 +58,8 @@
|
|
58 |
2
|
59 |
],
|
60 |
"bits_prop": [
|
61 |
-
0.
|
62 |
-
0.
|
63 |
],
|
64 |
"scale_bits": 4
|
65 |
},
|
@@ -73,8 +73,8 @@
|
|
73 |
2
|
74 |
],
|
75 |
"bits_prop": [
|
76 |
-
0.
|
77 |
-
0.
|
78 |
],
|
79 |
"scale_bits": 4
|
80 |
},
|
@@ -88,8 +88,8 @@
|
|
88 |
2
|
89 |
],
|
90 |
"bits_prop": [
|
91 |
-
0.
|
92 |
-
0.
|
93 |
],
|
94 |
"scale_bits": 4
|
95 |
},
|
@@ -103,15 +103,15 @@
|
|
103 |
2
|
104 |
],
|
105 |
"bits_prop": [
|
106 |
-
0.
|
107 |
-
0.
|
108 |
],
|
109 |
"scale_bits": 4
|
110 |
}
|
111 |
},
|
112 |
"model.layers.1": {
|
113 |
-
"accuracy": 0.
|
114 |
-
"total_bits":
|
115 |
"q_proj": {
|
116 |
"group_size": {
|
117 |
"4": 128,
|
@@ -122,8 +122,8 @@
|
|
122 |
2
|
123 |
],
|
124 |
"bits_prop": [
|
125 |
-
0.
|
126 |
-
0.
|
127 |
],
|
128 |
"scale_bits": 4
|
129 |
},
|
@@ -137,8 +137,8 @@
|
|
137 |
2
|
138 |
],
|
139 |
"bits_prop": [
|
140 |
-
0.
|
141 |
-
0.
|
142 |
],
|
143 |
"scale_bits": 4
|
144 |
},
|
@@ -152,8 +152,8 @@
|
|
152 |
2
|
153 |
],
|
154 |
"bits_prop": [
|
155 |
-
0.
|
156 |
-
0.
|
157 |
],
|
158 |
"scale_bits": 4
|
159 |
},
|
@@ -182,8 +182,8 @@
|
|
182 |
2
|
183 |
],
|
184 |
"bits_prop": [
|
185 |
-
0.
|
186 |
-
0.
|
187 |
],
|
188 |
"scale_bits": 4
|
189 |
},
|
@@ -197,8 +197,8 @@
|
|
197 |
2
|
198 |
],
|
199 |
"bits_prop": [
|
200 |
-
0.
|
201 |
-
0.
|
202 |
],
|
203 |
"scale_bits": 4
|
204 |
},
|
@@ -212,14 +212,14 @@
|
|
212 |
2
|
213 |
],
|
214 |
"bits_prop": [
|
215 |
-
0.
|
216 |
-
0.
|
217 |
],
|
218 |
"scale_bits": 4
|
219 |
}
|
220 |
},
|
221 |
"model.layers.2": {
|
222 |
-
"accuracy": 0.
|
223 |
"total_bits": 456407296,
|
224 |
"q_proj": {
|
225 |
"group_size": {
|
@@ -328,7 +328,7 @@
|
|
328 |
}
|
329 |
},
|
330 |
"model.layers.3": {
|
331 |
-
"accuracy": 0.
|
332 |
"total_bits": 456407296,
|
333 |
"q_proj": {
|
334 |
"group_size": {
|
@@ -437,7 +437,7 @@
|
|
437 |
}
|
438 |
},
|
439 |
"model.layers.4": {
|
440 |
-
"accuracy": 0.
|
441 |
"total_bits": 456407296,
|
442 |
"q_proj": {
|
443 |
"group_size": {
|
@@ -546,7 +546,7 @@
|
|
546 |
}
|
547 |
},
|
548 |
"model.layers.5": {
|
549 |
-
"accuracy": 0.
|
550 |
"total_bits": 456407296,
|
551 |
"q_proj": {
|
552 |
"group_size": {
|
@@ -655,7 +655,7 @@
|
|
655 |
}
|
656 |
},
|
657 |
"model.layers.6": {
|
658 |
-
"accuracy": 0.
|
659 |
"total_bits": 456407296,
|
660 |
"q_proj": {
|
661 |
"group_size": {
|
@@ -764,8 +764,8 @@
|
|
764 |
}
|
765 |
},
|
766 |
"model.layers.7": {
|
767 |
-
"accuracy": 0.
|
768 |
-
"total_bits":
|
769 |
"q_proj": {
|
770 |
"group_size": {
|
771 |
"4": 128,
|
@@ -776,8 +776,8 @@
|
|
776 |
2
|
777 |
],
|
778 |
"bits_prop": [
|
779 |
-
0.
|
780 |
-
0.
|
781 |
],
|
782 |
"scale_bits": 4
|
783 |
},
|
@@ -791,8 +791,8 @@
|
|
791 |
2
|
792 |
],
|
793 |
"bits_prop": [
|
794 |
-
0.
|
795 |
-
0.
|
796 |
],
|
797 |
"scale_bits": 4
|
798 |
},
|
@@ -821,8 +821,8 @@
|
|
821 |
2
|
822 |
],
|
823 |
"bits_prop": [
|
824 |
-
0.
|
825 |
-
0.
|
826 |
],
|
827 |
"scale_bits": 4
|
828 |
},
|
@@ -836,8 +836,8 @@
|
|
836 |
2
|
837 |
],
|
838 |
"bits_prop": [
|
839 |
-
0.
|
840 |
-
0.
|
841 |
],
|
842 |
"scale_bits": 4
|
843 |
},
|
@@ -851,8 +851,8 @@
|
|
851 |
2
|
852 |
],
|
853 |
"bits_prop": [
|
854 |
-
0.
|
855 |
-
0.
|
856 |
],
|
857 |
"scale_bits": 4
|
858 |
},
|
@@ -866,15 +866,15 @@
|
|
866 |
2
|
867 |
],
|
868 |
"bits_prop": [
|
869 |
-
0.
|
870 |
-
0.
|
871 |
],
|
872 |
"scale_bits": 4
|
873 |
}
|
874 |
},
|
875 |
"model.layers.8": {
|
876 |
-
"accuracy": 0.
|
877 |
-
"total_bits":
|
878 |
"q_proj": {
|
879 |
"group_size": {
|
880 |
"4": 128,
|
@@ -930,8 +930,8 @@
|
|
930 |
2
|
931 |
],
|
932 |
"bits_prop": [
|
933 |
-
0.
|
934 |
-
0.
|
935 |
],
|
936 |
"scale_bits": 4
|
937 |
},
|
@@ -975,15 +975,15 @@
|
|
975 |
2
|
976 |
],
|
977 |
"bits_prop": [
|
978 |
-
0.
|
979 |
-
0.
|
980 |
],
|
981 |
"scale_bits": 4
|
982 |
}
|
983 |
},
|
984 |
"model.layers.9": {
|
985 |
-
"accuracy": 0.
|
986 |
-
"total_bits":
|
987 |
"q_proj": {
|
988 |
"group_size": {
|
989 |
"4": 128,
|
@@ -994,8 +994,8 @@
|
|
994 |
2
|
995 |
],
|
996 |
"bits_prop": [
|
997 |
-
0.
|
998 |
-
0.
|
999 |
],
|
1000 |
"scale_bits": 4
|
1001 |
},
|
@@ -1009,8 +1009,8 @@
|
|
1009 |
2
|
1010 |
],
|
1011 |
"bits_prop": [
|
1012 |
-
0.
|
1013 |
-
0.
|
1014 |
],
|
1015 |
"scale_bits": 4
|
1016 |
},
|
@@ -1024,8 +1024,8 @@
|
|
1024 |
2
|
1025 |
],
|
1026 |
"bits_prop": [
|
1027 |
-
0.
|
1028 |
-
0.
|
1029 |
],
|
1030 |
"scale_bits": 4
|
1031 |
},
|
@@ -1054,8 +1054,8 @@
|
|
1054 |
2
|
1055 |
],
|
1056 |
"bits_prop": [
|
1057 |
-
0.
|
1058 |
-
0.
|
1059 |
],
|
1060 |
"scale_bits": 4
|
1061 |
},
|
@@ -1069,8 +1069,8 @@
|
|
1069 |
2
|
1070 |
],
|
1071 |
"bits_prop": [
|
1072 |
-
0.
|
1073 |
-
0.
|
1074 |
],
|
1075 |
"scale_bits": 4
|
1076 |
},
|
@@ -1084,15 +1084,15 @@
|
|
1084 |
2
|
1085 |
],
|
1086 |
"bits_prop": [
|
1087 |
-
0.
|
1088 |
-
0.
|
1089 |
],
|
1090 |
"scale_bits": 4
|
1091 |
}
|
1092 |
},
|
1093 |
"model.layers.10": {
|
1094 |
-
"accuracy": 0.
|
1095 |
-
"total_bits":
|
1096 |
"q_proj": {
|
1097 |
"group_size": {
|
1098 |
"4": 128,
|
@@ -1103,8 +1103,8 @@
|
|
1103 |
2
|
1104 |
],
|
1105 |
"bits_prop": [
|
1106 |
-
0.
|
1107 |
-
0.
|
1108 |
],
|
1109 |
"scale_bits": 4
|
1110 |
},
|
@@ -1118,8 +1118,8 @@
|
|
1118 |
2
|
1119 |
],
|
1120 |
"bits_prop": [
|
1121 |
-
0.
|
1122 |
-
0.
|
1123 |
],
|
1124 |
"scale_bits": 4
|
1125 |
},
|
@@ -1133,8 +1133,8 @@
|
|
1133 |
2
|
1134 |
],
|
1135 |
"bits_prop": [
|
1136 |
-
0.
|
1137 |
-
0.
|
1138 |
],
|
1139 |
"scale_bits": 4
|
1140 |
},
|
@@ -1163,8 +1163,8 @@
|
|
1163 |
2
|
1164 |
],
|
1165 |
"bits_prop": [
|
1166 |
-
0.
|
1167 |
-
0.
|
1168 |
],
|
1169 |
"scale_bits": 4
|
1170 |
},
|
@@ -1178,8 +1178,8 @@
|
|
1178 |
2
|
1179 |
],
|
1180 |
"bits_prop": [
|
1181 |
-
0.
|
1182 |
-
0.
|
1183 |
],
|
1184 |
"scale_bits": 4
|
1185 |
},
|
@@ -1193,15 +1193,15 @@
|
|
1193 |
2
|
1194 |
],
|
1195 |
"bits_prop": [
|
1196 |
-
0.
|
1197 |
-
0.
|
1198 |
],
|
1199 |
"scale_bits": 4
|
1200 |
}
|
1201 |
},
|
1202 |
"model.layers.11": {
|
1203 |
-
"accuracy": 0.
|
1204 |
-
"total_bits":
|
1205 |
"q_proj": {
|
1206 |
"group_size": {
|
1207 |
"4": 128,
|
@@ -1212,8 +1212,8 @@
|
|
1212 |
2
|
1213 |
],
|
1214 |
"bits_prop": [
|
1215 |
-
0.
|
1216 |
-
0.
|
1217 |
],
|
1218 |
"scale_bits": 4
|
1219 |
},
|
@@ -1227,8 +1227,8 @@
|
|
1227 |
2
|
1228 |
],
|
1229 |
"bits_prop": [
|
1230 |
-
0.
|
1231 |
-
0.
|
1232 |
],
|
1233 |
"scale_bits": 4
|
1234 |
},
|
@@ -1242,8 +1242,8 @@
|
|
1242 |
2
|
1243 |
],
|
1244 |
"bits_prop": [
|
1245 |
-
0.
|
1246 |
-
0.
|
1247 |
],
|
1248 |
"scale_bits": 4
|
1249 |
},
|
@@ -1272,8 +1272,8 @@
|
|
1272 |
2
|
1273 |
],
|
1274 |
"bits_prop": [
|
1275 |
-
0.
|
1276 |
-
0.
|
1277 |
],
|
1278 |
"scale_bits": 4
|
1279 |
},
|
@@ -1287,8 +1287,8 @@
|
|
1287 |
2
|
1288 |
],
|
1289 |
"bits_prop": [
|
1290 |
-
0.
|
1291 |
-
0.
|
1292 |
],
|
1293 |
"scale_bits": 4
|
1294 |
},
|
@@ -1302,15 +1302,15 @@
|
|
1302 |
2
|
1303 |
],
|
1304 |
"bits_prop": [
|
1305 |
-
0.
|
1306 |
-
0.
|
1307 |
],
|
1308 |
"scale_bits": 4
|
1309 |
}
|
1310 |
},
|
1311 |
"model.layers.12": {
|
1312 |
-
"accuracy": 0.
|
1313 |
-
"total_bits":
|
1314 |
"q_proj": {
|
1315 |
"group_size": {
|
1316 |
"4": 128,
|
@@ -1321,8 +1321,8 @@
|
|
1321 |
2
|
1322 |
],
|
1323 |
"bits_prop": [
|
1324 |
-
0.
|
1325 |
-
0.
|
1326 |
],
|
1327 |
"scale_bits": 4
|
1328 |
},
|
@@ -1336,8 +1336,8 @@
|
|
1336 |
2
|
1337 |
],
|
1338 |
"bits_prop": [
|
1339 |
-
0.
|
1340 |
-
0.
|
1341 |
],
|
1342 |
"scale_bits": 4
|
1343 |
},
|
@@ -1351,8 +1351,8 @@
|
|
1351 |
2
|
1352 |
],
|
1353 |
"bits_prop": [
|
1354 |
-
0.
|
1355 |
-
0.
|
1356 |
],
|
1357 |
"scale_bits": 4
|
1358 |
},
|
@@ -1366,8 +1366,8 @@
|
|
1366 |
2
|
1367 |
],
|
1368 |
"bits_prop": [
|
1369 |
-
0.
|
1370 |
-
0.
|
1371 |
],
|
1372 |
"scale_bits": 4
|
1373 |
},
|
@@ -1381,8 +1381,8 @@
|
|
1381 |
2
|
1382 |
],
|
1383 |
"bits_prop": [
|
1384 |
-
0.
|
1385 |
-
0.
|
1386 |
],
|
1387 |
"scale_bits": 4
|
1388 |
},
|
@@ -1396,8 +1396,8 @@
|
|
1396 |
2
|
1397 |
],
|
1398 |
"bits_prop": [
|
1399 |
-
0.
|
1400 |
-
0.
|
1401 |
],
|
1402 |
"scale_bits": 4
|
1403 |
},
|
@@ -1411,15 +1411,15 @@
|
|
1411 |
2
|
1412 |
],
|
1413 |
"bits_prop": [
|
1414 |
-
0.
|
1415 |
-
0.
|
1416 |
],
|
1417 |
"scale_bits": 4
|
1418 |
}
|
1419 |
},
|
1420 |
"model.layers.13": {
|
1421 |
-
"accuracy": 0.
|
1422 |
-
"total_bits":
|
1423 |
"q_proj": {
|
1424 |
"group_size": {
|
1425 |
"4": 128,
|
@@ -1430,8 +1430,8 @@
|
|
1430 |
2
|
1431 |
],
|
1432 |
"bits_prop": [
|
1433 |
-
0.
|
1434 |
-
0.
|
1435 |
],
|
1436 |
"scale_bits": 4
|
1437 |
},
|
@@ -1445,8 +1445,8 @@
|
|
1445 |
2
|
1446 |
],
|
1447 |
"bits_prop": [
|
1448 |
-
0.
|
1449 |
-
0.
|
1450 |
],
|
1451 |
"scale_bits": 4
|
1452 |
},
|
@@ -1460,8 +1460,8 @@
|
|
1460 |
2
|
1461 |
],
|
1462 |
"bits_prop": [
|
1463 |
-
0.
|
1464 |
-
0.
|
1465 |
],
|
1466 |
"scale_bits": 4
|
1467 |
},
|
@@ -1490,8 +1490,8 @@
|
|
1490 |
2
|
1491 |
],
|
1492 |
"bits_prop": [
|
1493 |
-
0.
|
1494 |
-
0.
|
1495 |
],
|
1496 |
"scale_bits": 4
|
1497 |
},
|
@@ -1505,8 +1505,8 @@
|
|
1505 |
2
|
1506 |
],
|
1507 |
"bits_prop": [
|
1508 |
-
0.
|
1509 |
-
0.
|
1510 |
],
|
1511 |
"scale_bits": 4
|
1512 |
},
|
@@ -1520,15 +1520,15 @@
|
|
1520 |
2
|
1521 |
],
|
1522 |
"bits_prop": [
|
1523 |
-
0.
|
1524 |
-
0.
|
1525 |
],
|
1526 |
"scale_bits": 4
|
1527 |
}
|
1528 |
},
|
1529 |
"model.layers.14": {
|
1530 |
-
"accuracy": 0.
|
1531 |
-
"total_bits":
|
1532 |
"q_proj": {
|
1533 |
"group_size": {
|
1534 |
"4": 128,
|
@@ -1539,8 +1539,8 @@
|
|
1539 |
2
|
1540 |
],
|
1541 |
"bits_prop": [
|
1542 |
-
0.
|
1543 |
-
0.
|
1544 |
],
|
1545 |
"scale_bits": 4
|
1546 |
},
|
@@ -1554,8 +1554,8 @@
|
|
1554 |
2
|
1555 |
],
|
1556 |
"bits_prop": [
|
1557 |
-
0.
|
1558 |
-
0.
|
1559 |
],
|
1560 |
"scale_bits": 4
|
1561 |
},
|
@@ -1569,8 +1569,8 @@
|
|
1569 |
2
|
1570 |
],
|
1571 |
"bits_prop": [
|
1572 |
-
0.
|
1573 |
-
0.
|
1574 |
],
|
1575 |
"scale_bits": 4
|
1576 |
},
|
@@ -1599,8 +1599,8 @@
|
|
1599 |
2
|
1600 |
],
|
1601 |
"bits_prop": [
|
1602 |
-
0.
|
1603 |
-
0.
|
1604 |
],
|
1605 |
"scale_bits": 4
|
1606 |
},
|
@@ -1614,8 +1614,8 @@
|
|
1614 |
2
|
1615 |
],
|
1616 |
"bits_prop": [
|
1617 |
-
0.
|
1618 |
-
0.
|
1619 |
],
|
1620 |
"scale_bits": 4
|
1621 |
},
|
@@ -1629,15 +1629,15 @@
|
|
1629 |
2
|
1630 |
],
|
1631 |
"bits_prop": [
|
1632 |
-
0.
|
1633 |
-
0.
|
1634 |
],
|
1635 |
"scale_bits": 4
|
1636 |
}
|
1637 |
},
|
1638 |
"model.layers.15": {
|
1639 |
-
"accuracy": 0.
|
1640 |
-
"total_bits":
|
1641 |
"q_proj": {
|
1642 |
"group_size": {
|
1643 |
"4": 128,
|
@@ -1648,9 +1648,9 @@
|
|
1648 |
2
|
1649 |
],
|
1650 |
"bits_prop": [
|
1651 |
-
0.
|
1652 |
-
0.
|
1653 |
-
],
|
1654 |
"scale_bits": 4
|
1655 |
},
|
1656 |
"k_proj": {
|
@@ -1663,8 +1663,8 @@
|
|
1663 |
2
|
1664 |
],
|
1665 |
"bits_prop": [
|
1666 |
-
0.
|
1667 |
-
0.
|
1668 |
],
|
1669 |
"scale_bits": 4
|
1670 |
},
|
@@ -1678,8 +1678,8 @@
|
|
1678 |
2
|
1679 |
],
|
1680 |
"bits_prop": [
|
1681 |
-
0.
|
1682 |
-
0.
|
1683 |
],
|
1684 |
"scale_bits": 4
|
1685 |
},
|
@@ -1693,8 +1693,8 @@
|
|
1693 |
2
|
1694 |
],
|
1695 |
"bits_prop": [
|
1696 |
-
0.
|
1697 |
-
0.
|
1698 |
],
|
1699 |
"scale_bits": 4
|
1700 |
},
|
@@ -1708,8 +1708,8 @@
|
|
1708 |
2
|
1709 |
],
|
1710 |
"bits_prop": [
|
1711 |
-
0.
|
1712 |
-
0.
|
1713 |
],
|
1714 |
"scale_bits": 4
|
1715 |
},
|
@@ -1723,8 +1723,8 @@
|
|
1723 |
2
|
1724 |
],
|
1725 |
"bits_prop": [
|
1726 |
-
0.
|
1727 |
-
0.
|
1728 |
],
|
1729 |
"scale_bits": 4
|
1730 |
},
|
@@ -1738,15 +1738,15 @@
|
|
1738 |
2
|
1739 |
],
|
1740 |
"bits_prop": [
|
1741 |
-
0.
|
1742 |
-
0.
|
1743 |
],
|
1744 |
"scale_bits": 4
|
1745 |
}
|
1746 |
},
|
1747 |
"model.layers.16": {
|
1748 |
-
"accuracy": 0.
|
1749 |
-
"total_bits":
|
1750 |
"q_proj": {
|
1751 |
"group_size": {
|
1752 |
"4": 128,
|
@@ -1757,8 +1757,8 @@
|
|
1757 |
2
|
1758 |
],
|
1759 |
"bits_prop": [
|
1760 |
-
0.
|
1761 |
-
0.
|
1762 |
],
|
1763 |
"scale_bits": 4
|
1764 |
},
|
@@ -1772,8 +1772,8 @@
|
|
1772 |
2
|
1773 |
],
|
1774 |
"bits_prop": [
|
1775 |
-
0.
|
1776 |
-
0.
|
1777 |
],
|
1778 |
"scale_bits": 4
|
1779 |
},
|
@@ -1787,8 +1787,8 @@
|
|
1787 |
2
|
1788 |
],
|
1789 |
"bits_prop": [
|
1790 |
-
0.
|
1791 |
-
0.
|
1792 |
],
|
1793 |
"scale_bits": 4
|
1794 |
},
|
@@ -1802,8 +1802,8 @@
|
|
1802 |
2
|
1803 |
],
|
1804 |
"bits_prop": [
|
1805 |
-
0.
|
1806 |
-
0.
|
1807 |
],
|
1808 |
"scale_bits": 4
|
1809 |
},
|
@@ -1817,8 +1817,8 @@
|
|
1817 |
2
|
1818 |
],
|
1819 |
"bits_prop": [
|
1820 |
-
0.
|
1821 |
-
0.
|
1822 |
],
|
1823 |
"scale_bits": 4
|
1824 |
},
|
@@ -1832,8 +1832,8 @@
|
|
1832 |
2
|
1833 |
],
|
1834 |
"bits_prop": [
|
1835 |
-
0.
|
1836 |
-
0.
|
1837 |
],
|
1838 |
"scale_bits": 4
|
1839 |
},
|
@@ -1847,14 +1847,14 @@
|
|
1847 |
2
|
1848 |
],
|
1849 |
"bits_prop": [
|
1850 |
-
0.
|
1851 |
-
0.
|
1852 |
],
|
1853 |
"scale_bits": 4
|
1854 |
}
|
1855 |
},
|
1856 |
"model.layers.17": {
|
1857 |
-
"accuracy": 0.
|
1858 |
"total_bits": 478689536,
|
1859 |
"q_proj": {
|
1860 |
"group_size": {
|
@@ -1963,8 +1963,8 @@
|
|
1963 |
}
|
1964 |
},
|
1965 |
"model.layers.18": {
|
1966 |
-
"accuracy": 0.
|
1967 |
-
"total_bits":
|
1968 |
"q_proj": {
|
1969 |
"group_size": {
|
1970 |
"4": 128,
|
@@ -1975,8 +1975,8 @@
|
|
1975 |
2
|
1976 |
],
|
1977 |
"bits_prop": [
|
1978 |
-
0.
|
1979 |
-
0.
|
1980 |
],
|
1981 |
"scale_bits": 4
|
1982 |
},
|
@@ -1990,8 +1990,8 @@
|
|
1990 |
2
|
1991 |
],
|
1992 |
"bits_prop": [
|
1993 |
-
0.
|
1994 |
-
0.
|
1995 |
],
|
1996 |
"scale_bits": 4
|
1997 |
},
|
@@ -2020,8 +2020,8 @@
|
|
2020 |
2
|
2021 |
],
|
2022 |
"bits_prop": [
|
2023 |
-
0.
|
2024 |
-
0.
|
2025 |
],
|
2026 |
"scale_bits": 4
|
2027 |
},
|
@@ -2035,8 +2035,8 @@
|
|
2035 |
2
|
2036 |
],
|
2037 |
"bits_prop": [
|
2038 |
-
0.
|
2039 |
-
0.
|
2040 |
],
|
2041 |
"scale_bits": 4
|
2042 |
},
|
@@ -2050,8 +2050,8 @@
|
|
2050 |
2
|
2051 |
],
|
2052 |
"bits_prop": [
|
2053 |
-
0.
|
2054 |
-
0.
|
2055 |
],
|
2056 |
"scale_bits": 4
|
2057 |
},
|
@@ -2072,7 +2072,7 @@
|
|
2072 |
}
|
2073 |
},
|
2074 |
"model.layers.19": {
|
2075 |
-
"accuracy": 0.
|
2076 |
"total_bits": 478689536,
|
2077 |
"q_proj": {
|
2078 |
"group_size": {
|
@@ -2181,7 +2181,7 @@
|
|
2181 |
}
|
2182 |
},
|
2183 |
"model.layers.20": {
|
2184 |
-
"accuracy": 0.
|
2185 |
"total_bits": 478689536,
|
2186 |
"q_proj": {
|
2187 |
"group_size": {
|
@@ -2290,7 +2290,7 @@
|
|
2290 |
}
|
2291 |
},
|
2292 |
"model.layers.21": {
|
2293 |
-
"accuracy": 0.
|
2294 |
"total_bits": 478689536,
|
2295 |
"q_proj": {
|
2296 |
"group_size": {
|
@@ -2399,8 +2399,8 @@
|
|
2399 |
}
|
2400 |
},
|
2401 |
"model.layers.22": {
|
2402 |
-
"accuracy": 0.
|
2403 |
-
"total_bits":
|
2404 |
"q_proj": {
|
2405 |
"group_size": {
|
2406 |
"4": 128,
|
@@ -2411,8 +2411,8 @@
|
|
2411 |
2
|
2412 |
],
|
2413 |
"bits_prop": [
|
2414 |
-
0.
|
2415 |
-
0.
|
2416 |
],
|
2417 |
"scale_bits": 4
|
2418 |
},
|
@@ -2426,8 +2426,8 @@
|
|
2426 |
2
|
2427 |
],
|
2428 |
"bits_prop": [
|
2429 |
-
0.
|
2430 |
-
0.
|
2431 |
],
|
2432 |
"scale_bits": 4
|
2433 |
},
|
@@ -2441,8 +2441,8 @@
|
|
2441 |
2
|
2442 |
],
|
2443 |
"bits_prop": [
|
2444 |
-
0.
|
2445 |
-
0.
|
2446 |
],
|
2447 |
"scale_bits": 4
|
2448 |
},
|
@@ -2456,8 +2456,8 @@
|
|
2456 |
2
|
2457 |
],
|
2458 |
"bits_prop": [
|
2459 |
-
0.
|
2460 |
-
0.
|
2461 |
],
|
2462 |
"scale_bits": 4
|
2463 |
},
|
@@ -2471,8 +2471,8 @@
|
|
2471 |
2
|
2472 |
],
|
2473 |
"bits_prop": [
|
2474 |
-
0.
|
2475 |
-
0.
|
2476 |
],
|
2477 |
"scale_bits": 4
|
2478 |
},
|
@@ -2486,8 +2486,8 @@
|
|
2486 |
2
|
2487 |
],
|
2488 |
"bits_prop": [
|
2489 |
-
0.
|
2490 |
-
0.
|
2491 |
],
|
2492 |
"scale_bits": 4
|
2493 |
},
|
@@ -2501,15 +2501,15 @@
|
|
2501 |
2
|
2502 |
],
|
2503 |
"bits_prop": [
|
2504 |
-
0.
|
2505 |
-
0.
|
2506 |
],
|
2507 |
"scale_bits": 4
|
2508 |
}
|
2509 |
},
|
2510 |
"model.layers.23": {
|
2511 |
-
"accuracy": 0.
|
2512 |
-
"total_bits":
|
2513 |
"q_proj": {
|
2514 |
"group_size": {
|
2515 |
"4": 128,
|
@@ -2520,8 +2520,8 @@
|
|
2520 |
2
|
2521 |
],
|
2522 |
"bits_prop": [
|
2523 |
-
0.
|
2524 |
-
0.
|
2525 |
],
|
2526 |
"scale_bits": 4
|
2527 |
},
|
@@ -2535,8 +2535,8 @@
|
|
2535 |
2
|
2536 |
],
|
2537 |
"bits_prop": [
|
2538 |
-
0.
|
2539 |
-
0.
|
2540 |
],
|
2541 |
"scale_bits": 4
|
2542 |
},
|
@@ -2550,8 +2550,8 @@
|
|
2550 |
2
|
2551 |
],
|
2552 |
"bits_prop": [
|
2553 |
-
0.
|
2554 |
-
0.
|
2555 |
],
|
2556 |
"scale_bits": 4
|
2557 |
},
|
@@ -2565,8 +2565,8 @@
|
|
2565 |
2
|
2566 |
],
|
2567 |
"bits_prop": [
|
2568 |
-
0.
|
2569 |
-
0.
|
2570 |
],
|
2571 |
"scale_bits": 4
|
2572 |
},
|
@@ -2580,8 +2580,8 @@
|
|
2580 |
2
|
2581 |
],
|
2582 |
"bits_prop": [
|
2583 |
-
0.
|
2584 |
-
0.
|
2585 |
],
|
2586 |
"scale_bits": 4
|
2587 |
},
|
@@ -2595,8 +2595,8 @@
|
|
2595 |
2
|
2596 |
],
|
2597 |
"bits_prop": [
|
2598 |
-
0.
|
2599 |
-
0.
|
2600 |
],
|
2601 |
"scale_bits": 4
|
2602 |
},
|
@@ -2610,15 +2610,15 @@
|
|
2610 |
2
|
2611 |
],
|
2612 |
"bits_prop": [
|
2613 |
-
0.
|
2614 |
-
0.
|
2615 |
],
|
2616 |
"scale_bits": 4
|
2617 |
}
|
2618 |
},
|
2619 |
"model.layers.24": {
|
2620 |
-
"accuracy": 0.
|
2621 |
-
"total_bits":
|
2622 |
"q_proj": {
|
2623 |
"group_size": {
|
2624 |
"4": 128,
|
@@ -2629,8 +2629,8 @@
|
|
2629 |
2
|
2630 |
],
|
2631 |
"bits_prop": [
|
2632 |
-
0.
|
2633 |
-
0.
|
2634 |
],
|
2635 |
"scale_bits": 4
|
2636 |
},
|
@@ -2644,8 +2644,8 @@
|
|
2644 |
2
|
2645 |
],
|
2646 |
"bits_prop": [
|
2647 |
-
0.
|
2648 |
-
0.
|
2649 |
],
|
2650 |
"scale_bits": 4
|
2651 |
},
|
@@ -2659,8 +2659,8 @@
|
|
2659 |
2
|
2660 |
],
|
2661 |
"bits_prop": [
|
2662 |
-
0.
|
2663 |
-
0.
|
2664 |
],
|
2665 |
"scale_bits": 4
|
2666 |
},
|
@@ -2674,8 +2674,8 @@
|
|
2674 |
2
|
2675 |
],
|
2676 |
"bits_prop": [
|
2677 |
-
0.
|
2678 |
-
0.
|
2679 |
],
|
2680 |
"scale_bits": 4
|
2681 |
},
|
@@ -2689,8 +2689,8 @@
|
|
2689 |
2
|
2690 |
],
|
2691 |
"bits_prop": [
|
2692 |
-
0.
|
2693 |
-
0.
|
2694 |
],
|
2695 |
"scale_bits": 4
|
2696 |
},
|
@@ -2704,8 +2704,8 @@
|
|
2704 |
2
|
2705 |
],
|
2706 |
"bits_prop": [
|
2707 |
-
0.
|
2708 |
-
0.
|
2709 |
],
|
2710 |
"scale_bits": 4
|
2711 |
},
|
@@ -2719,15 +2719,15 @@
|
|
2719 |
2
|
2720 |
],
|
2721 |
"bits_prop": [
|
2722 |
-
0.
|
2723 |
-
0.
|
2724 |
],
|
2725 |
"scale_bits": 4
|
2726 |
}
|
2727 |
},
|
2728 |
"model.layers.25": {
|
2729 |
-
"accuracy": 0.
|
2730 |
-
"total_bits":
|
2731 |
"q_proj": {
|
2732 |
"group_size": {
|
2733 |
"4": 128,
|
@@ -2738,8 +2738,8 @@
|
|
2738 |
2
|
2739 |
],
|
2740 |
"bits_prop": [
|
2741 |
-
0.
|
2742 |
-
0.
|
2743 |
],
|
2744 |
"scale_bits": 4
|
2745 |
},
|
@@ -2753,8 +2753,8 @@
|
|
2753 |
2
|
2754 |
],
|
2755 |
"bits_prop": [
|
2756 |
-
0.
|
2757 |
-
0.
|
2758 |
],
|
2759 |
"scale_bits": 4
|
2760 |
},
|
@@ -2768,8 +2768,8 @@
|
|
2768 |
2
|
2769 |
],
|
2770 |
"bits_prop": [
|
2771 |
-
0.
|
2772 |
-
0.
|
2773 |
],
|
2774 |
"scale_bits": 4
|
2775 |
},
|
@@ -2783,8 +2783,8 @@
|
|
2783 |
2
|
2784 |
],
|
2785 |
"bits_prop": [
|
2786 |
-
0.
|
2787 |
-
0.
|
2788 |
],
|
2789 |
"scale_bits": 4
|
2790 |
},
|
@@ -2798,8 +2798,8 @@
|
|
2798 |
2
|
2799 |
],
|
2800 |
"bits_prop": [
|
2801 |
-
0.
|
2802 |
-
0.
|
2803 |
],
|
2804 |
"scale_bits": 4
|
2805 |
},
|
@@ -2813,8 +2813,8 @@
|
|
2813 |
2
|
2814 |
],
|
2815 |
"bits_prop": [
|
2816 |
-
0.
|
2817 |
-
0.
|
2818 |
],
|
2819 |
"scale_bits": 4
|
2820 |
},
|
@@ -2828,15 +2828,15 @@
|
|
2828 |
2
|
2829 |
],
|
2830 |
"bits_prop": [
|
2831 |
-
0.
|
2832 |
-
0.
|
2833 |
],
|
2834 |
"scale_bits": 4
|
2835 |
}
|
2836 |
},
|
2837 |
"model.layers.26": {
|
2838 |
-
"accuracy": 0.
|
2839 |
-
"total_bits":
|
2840 |
"q_proj": {
|
2841 |
"group_size": {
|
2842 |
"4": 128,
|
@@ -2847,8 +2847,8 @@
|
|
2847 |
2
|
2848 |
],
|
2849 |
"bits_prop": [
|
2850 |
-
0.
|
2851 |
-
0.
|
2852 |
],
|
2853 |
"scale_bits": 4
|
2854 |
},
|
@@ -2862,8 +2862,8 @@
|
|
2862 |
2
|
2863 |
],
|
2864 |
"bits_prop": [
|
2865 |
-
0.
|
2866 |
-
0.
|
2867 |
],
|
2868 |
"scale_bits": 4
|
2869 |
},
|
@@ -2877,8 +2877,8 @@
|
|
2877 |
2
|
2878 |
],
|
2879 |
"bits_prop": [
|
2880 |
-
0.
|
2881 |
-
0.
|
2882 |
],
|
2883 |
"scale_bits": 4
|
2884 |
},
|
@@ -2892,8 +2892,8 @@
|
|
2892 |
2
|
2893 |
],
|
2894 |
"bits_prop": [
|
2895 |
-
0.
|
2896 |
-
0.
|
2897 |
],
|
2898 |
"scale_bits": 4
|
2899 |
},
|
@@ -2907,8 +2907,8 @@
|
|
2907 |
2
|
2908 |
],
|
2909 |
"bits_prop": [
|
2910 |
-
0.
|
2911 |
-
0.
|
2912 |
],
|
2913 |
"scale_bits": 4
|
2914 |
},
|
@@ -2922,8 +2922,8 @@
|
|
2922 |
2
|
2923 |
],
|
2924 |
"bits_prop": [
|
2925 |
-
0.
|
2926 |
-
0.
|
2927 |
],
|
2928 |
"scale_bits": 4
|
2929 |
},
|
@@ -2937,14 +2937,14 @@
|
|
2937 |
2
|
2938 |
],
|
2939 |
"bits_prop": [
|
2940 |
-
0.
|
2941 |
-
0.
|
2942 |
],
|
2943 |
"scale_bits": 4
|
2944 |
}
|
2945 |
},
|
2946 |
"model.layers.27": {
|
2947 |
-
"accuracy": 0.
|
2948 |
"total_bits": 478689536,
|
2949 |
"q_proj": {
|
2950 |
"group_size": {
|
@@ -3053,7 +3053,7 @@
|
|
3053 |
}
|
3054 |
},
|
3055 |
"model.layers.28": {
|
3056 |
-
"accuracy": 0.
|
3057 |
"total_bits": 478689536,
|
3058 |
"q_proj": {
|
3059 |
"group_size": {
|
@@ -3162,8 +3162,8 @@
|
|
3162 |
}
|
3163 |
},
|
3164 |
"model.layers.29": {
|
3165 |
-
"accuracy": 0.
|
3166 |
-
"total_bits":
|
3167 |
"q_proj": {
|
3168 |
"group_size": {
|
3169 |
"4": 128,
|
@@ -3174,8 +3174,8 @@
|
|
3174 |
2
|
3175 |
],
|
3176 |
"bits_prop": [
|
3177 |
-
0.
|
3178 |
-
0.
|
3179 |
],
|
3180 |
"scale_bits": 4
|
3181 |
},
|
@@ -3189,8 +3189,8 @@
|
|
3189 |
2
|
3190 |
],
|
3191 |
"bits_prop": [
|
3192 |
-
0.
|
3193 |
-
0.
|
3194 |
],
|
3195 |
"scale_bits": 4
|
3196 |
},
|
@@ -3204,8 +3204,8 @@
|
|
3204 |
2
|
3205 |
],
|
3206 |
"bits_prop": [
|
3207 |
-
0.
|
3208 |
-
0.
|
3209 |
],
|
3210 |
"scale_bits": 4
|
3211 |
},
|
@@ -3219,8 +3219,8 @@
|
|
3219 |
2
|
3220 |
],
|
3221 |
"bits_prop": [
|
3222 |
-
0.
|
3223 |
-
0.
|
3224 |
],
|
3225 |
"scale_bits": 4
|
3226 |
},
|
@@ -3234,8 +3234,8 @@
|
|
3234 |
2
|
3235 |
],
|
3236 |
"bits_prop": [
|
3237 |
-
0.
|
3238 |
-
0.
|
3239 |
],
|
3240 |
"scale_bits": 4
|
3241 |
},
|
@@ -3249,8 +3249,8 @@
|
|
3249 |
2
|
3250 |
],
|
3251 |
"bits_prop": [
|
3252 |
-
0.
|
3253 |
-
0.
|
3254 |
],
|
3255 |
"scale_bits": 4
|
3256 |
},
|
@@ -3264,15 +3264,15 @@
|
|
3264 |
2
|
3265 |
],
|
3266 |
"bits_prop": [
|
3267 |
-
0.
|
3268 |
-
0.
|
3269 |
],
|
3270 |
"scale_bits": 4
|
3271 |
}
|
3272 |
},
|
3273 |
"model.layers.30": {
|
3274 |
-
"accuracy": 0.
|
3275 |
-
"total_bits":
|
3276 |
"q_proj": {
|
3277 |
"group_size": {
|
3278 |
"4": 128,
|
@@ -3283,8 +3283,8 @@
|
|
3283 |
2
|
3284 |
],
|
3285 |
"bits_prop": [
|
3286 |
-
0.
|
3287 |
-
0.
|
3288 |
],
|
3289 |
"scale_bits": 4
|
3290 |
},
|
@@ -3298,23 +3298,20 @@
|
|
3298 |
2
|
3299 |
],
|
3300 |
"bits_prop": [
|
3301 |
-
0.
|
3302 |
-
0.
|
3303 |
],
|
3304 |
"scale_bits": 4
|
3305 |
},
|
3306 |
"v_proj": {
|
3307 |
"group_size": {
|
3308 |
-
"4": 128
|
3309 |
-
"2": 128
|
3310 |
},
|
3311 |
"bits": [
|
3312 |
-
4
|
3313 |
-
2
|
3314 |
],
|
3315 |
"bits_prop": [
|
3316 |
-
0
|
3317 |
-
0.8
|
3318 |
],
|
3319 |
"scale_bits": 4
|
3320 |
},
|
@@ -3328,8 +3325,8 @@
|
|
3328 |
2
|
3329 |
],
|
3330 |
"bits_prop": [
|
3331 |
-
0.
|
3332 |
-
0.
|
3333 |
],
|
3334 |
"scale_bits": 4
|
3335 |
},
|
@@ -3343,8 +3340,8 @@
|
|
3343 |
2
|
3344 |
],
|
3345 |
"bits_prop": [
|
3346 |
-
0.
|
3347 |
-
0.
|
3348 |
],
|
3349 |
"scale_bits": 4
|
3350 |
},
|
@@ -3358,30 +3355,27 @@
|
|
3358 |
2
|
3359 |
],
|
3360 |
"bits_prop": [
|
3361 |
-
0.
|
3362 |
-
0.
|
3363 |
],
|
3364 |
"scale_bits": 4
|
3365 |
},
|
3366 |
"down_proj": {
|
3367 |
"group_size": {
|
3368 |
-
"4": 128
|
3369 |
-
"2": 128
|
3370 |
},
|
3371 |
"bits": [
|
3372 |
-
4
|
3373 |
-
2
|
3374 |
],
|
3375 |
"bits_prop": [
|
3376 |
-
0
|
3377 |
-
0.8
|
3378 |
],
|
3379 |
"scale_bits": 4
|
3380 |
}
|
3381 |
},
|
3382 |
"model.layers.31": {
|
3383 |
-
"accuracy": 0.
|
3384 |
-
"total_bits":
|
3385 |
"q_proj": {
|
3386 |
"group_size": {
|
3387 |
"4": 128,
|
@@ -3392,8 +3386,8 @@
|
|
3392 |
2
|
3393 |
],
|
3394 |
"bits_prop": [
|
3395 |
-
0.
|
3396 |
-
0.
|
3397 |
],
|
3398 |
"scale_bits": 4
|
3399 |
},
|
@@ -3407,23 +3401,20 @@
|
|
3407 |
2
|
3408 |
],
|
3409 |
"bits_prop": [
|
3410 |
-
0.
|
3411 |
-
0.
|
3412 |
],
|
3413 |
"scale_bits": 4
|
3414 |
},
|
3415 |
"v_proj": {
|
3416 |
"group_size": {
|
3417 |
-
"4": 128
|
3418 |
-
"2": 128
|
3419 |
},
|
3420 |
"bits": [
|
3421 |
-
4
|
3422 |
-
2
|
3423 |
],
|
3424 |
"bits_prop": [
|
3425 |
-
0
|
3426 |
-
0.95
|
3427 |
],
|
3428 |
"scale_bits": 4
|
3429 |
},
|
@@ -3437,8 +3428,8 @@
|
|
3437 |
2
|
3438 |
],
|
3439 |
"bits_prop": [
|
3440 |
-
0.
|
3441 |
-
0.
|
3442 |
],
|
3443 |
"scale_bits": 4
|
3444 |
},
|
@@ -3452,8 +3443,8 @@
|
|
3452 |
2
|
3453 |
],
|
3454 |
"bits_prop": [
|
3455 |
-
0.
|
3456 |
-
0.
|
3457 |
],
|
3458 |
"scale_bits": 4
|
3459 |
},
|
@@ -3467,26 +3458,23 @@
|
|
3467 |
2
|
3468 |
],
|
3469 |
"bits_prop": [
|
3470 |
-
0.
|
3471 |
-
0.
|
3472 |
],
|
3473 |
"scale_bits": 4
|
3474 |
},
|
3475 |
"down_proj": {
|
3476 |
"group_size": {
|
3477 |
-
"4": 128
|
3478 |
-
"2": 128
|
3479 |
},
|
3480 |
"bits": [
|
3481 |
-
4
|
3482 |
-
2
|
3483 |
],
|
3484 |
"bits_prop": [
|
3485 |
-
0
|
3486 |
-
0.98
|
3487 |
],
|
3488 |
"scale_bits": 4
|
3489 |
}
|
3490 |
}
|
3491 |
}
|
3492 |
-
}
|
|
|
1 |
{
|
2 |
"measurement": {
|
3 |
"model.layers.0": {
|
4 |
+
"accuracy": 0.8987722396850586,
|
5 |
+
"total_bits": 507787520,
|
6 |
"q_proj": {
|
7 |
"group_size": {
|
8 |
"4": 128,
|
|
|
13 |
2
|
14 |
],
|
15 |
"bits_prop": [
|
16 |
+
0.1,
|
17 |
+
0.9
|
18 |
],
|
19 |
"scale_bits": 4
|
20 |
},
|
|
|
28 |
2
|
29 |
],
|
30 |
"bits_prop": [
|
31 |
+
0.1,
|
32 |
+
0.9
|
33 |
],
|
34 |
"scale_bits": 4
|
35 |
},
|
|
|
43 |
2
|
44 |
],
|
45 |
"bits_prop": [
|
46 |
+
0.2,
|
47 |
+
0.8
|
48 |
],
|
49 |
"scale_bits": 4
|
50 |
},
|
|
|
58 |
2
|
59 |
],
|
60 |
"bits_prop": [
|
61 |
+
0.1,
|
62 |
+
0.9
|
63 |
],
|
64 |
"scale_bits": 4
|
65 |
},
|
|
|
73 |
2
|
74 |
],
|
75 |
"bits_prop": [
|
76 |
+
0.1,
|
77 |
+
0.9
|
78 |
],
|
79 |
"scale_bits": 4
|
80 |
},
|
|
|
88 |
2
|
89 |
],
|
90 |
"bits_prop": [
|
91 |
+
0.1,
|
92 |
+
0.9
|
93 |
],
|
94 |
"scale_bits": 4
|
95 |
},
|
|
|
103 |
2
|
104 |
],
|
105 |
"bits_prop": [
|
106 |
+
0.2,
|
107 |
+
0.8
|
108 |
],
|
109 |
"scale_bits": 4
|
110 |
}
|
111 |
},
|
112 |
"model.layers.1": {
|
113 |
+
"accuracy": 0.9002890586853027,
|
114 |
+
"total_bits": 478689536,
|
115 |
"q_proj": {
|
116 |
"group_size": {
|
117 |
"4": 128,
|
|
|
122 |
2
|
123 |
],
|
124 |
"bits_prop": [
|
125 |
+
0.02,
|
126 |
+
0.98
|
127 |
],
|
128 |
"scale_bits": 4
|
129 |
},
|
|
|
137 |
2
|
138 |
],
|
139 |
"bits_prop": [
|
140 |
+
0.02,
|
141 |
+
0.98
|
142 |
],
|
143 |
"scale_bits": 4
|
144 |
},
|
|
|
152 |
2
|
153 |
],
|
154 |
"bits_prop": [
|
155 |
+
0.2,
|
156 |
+
0.8
|
157 |
],
|
158 |
"scale_bits": 4
|
159 |
},
|
|
|
182 |
2
|
183 |
],
|
184 |
"bits_prop": [
|
185 |
+
0.02,
|
186 |
+
0.98
|
187 |
],
|
188 |
"scale_bits": 4
|
189 |
},
|
|
|
197 |
2
|
198 |
],
|
199 |
"bits_prop": [
|
200 |
+
0.02,
|
201 |
+
0.98
|
202 |
],
|
203 |
"scale_bits": 4
|
204 |
},
|
|
|
212 |
2
|
213 |
],
|
214 |
"bits_prop": [
|
215 |
+
0.2,
|
216 |
+
0.8
|
217 |
],
|
218 |
"scale_bits": 4
|
219 |
}
|
220 |
},
|
221 |
"model.layers.2": {
|
222 |
+
"accuracy": 0.9608855843544006,
|
223 |
"total_bits": 456407296,
|
224 |
"q_proj": {
|
225 |
"group_size": {
|
|
|
328 |
}
|
329 |
},
|
330 |
"model.layers.3": {
|
331 |
+
"accuracy": 0.9555550813674927,
|
332 |
"total_bits": 456407296,
|
333 |
"q_proj": {
|
334 |
"group_size": {
|
|
|
437 |
}
|
438 |
},
|
439 |
"model.layers.4": {
|
440 |
+
"accuracy": 0.9499313831329346,
|
441 |
"total_bits": 456407296,
|
442 |
"q_proj": {
|
443 |
"group_size": {
|
|
|
546 |
}
|
547 |
},
|
548 |
"model.layers.5": {
|
549 |
+
"accuracy": 0.94085693359375,
|
550 |
"total_bits": 456407296,
|
551 |
"q_proj": {
|
552 |
"group_size": {
|
|
|
655 |
}
|
656 |
},
|
657 |
"model.layers.6": {
|
658 |
+
"accuracy": 0.9383460283279419,
|
659 |
"total_bits": 456407296,
|
660 |
"q_proj": {
|
661 |
"group_size": {
|
|
|
764 |
}
|
765 |
},
|
766 |
"model.layers.7": {
|
767 |
+
"accuracy": 0.9287691116333008,
|
768 |
+
"total_bits": 456407296,
|
769 |
"q_proj": {
|
770 |
"group_size": {
|
771 |
"4": 128,
|
|
|
776 |
2
|
777 |
],
|
778 |
"bits_prop": [
|
779 |
+
0.01,
|
780 |
+
0.99
|
781 |
],
|
782 |
"scale_bits": 4
|
783 |
},
|
|
|
791 |
2
|
792 |
],
|
793 |
"bits_prop": [
|
794 |
+
0.01,
|
795 |
+
0.99
|
796 |
],
|
797 |
"scale_bits": 4
|
798 |
},
|
|
|
821 |
2
|
822 |
],
|
823 |
"bits_prop": [
|
824 |
+
0.02,
|
825 |
+
0.98
|
826 |
],
|
827 |
"scale_bits": 4
|
828 |
},
|
|
|
836 |
2
|
837 |
],
|
838 |
"bits_prop": [
|
839 |
+
0.01,
|
840 |
+
0.99
|
841 |
],
|
842 |
"scale_bits": 4
|
843 |
},
|
|
|
851 |
2
|
852 |
],
|
853 |
"bits_prop": [
|
854 |
+
0.01,
|
855 |
+
0.99
|
856 |
],
|
857 |
"scale_bits": 4
|
858 |
},
|
|
|
866 |
2
|
867 |
],
|
868 |
"bits_prop": [
|
869 |
+
0.02,
|
870 |
+
0.98
|
871 |
],
|
872 |
"scale_bits": 4
|
873 |
}
|
874 |
},
|
875 |
"model.layers.8": {
|
876 |
+
"accuracy": 0.9277379512786865,
|
877 |
+
"total_bits": 456407296,
|
878 |
"q_proj": {
|
879 |
"group_size": {
|
880 |
"4": 128,
|
|
|
930 |
2
|
931 |
],
|
932 |
"bits_prop": [
|
933 |
+
0.02,
|
934 |
+
0.98
|
935 |
],
|
936 |
"scale_bits": 4
|
937 |
},
|
|
|
975 |
2
|
976 |
],
|
977 |
"bits_prop": [
|
978 |
+
0.02,
|
979 |
+
0.98
|
980 |
],
|
981 |
"scale_bits": 4
|
982 |
}
|
983 |
},
|
984 |
"model.layers.9": {
|
985 |
+
"accuracy": 0.9248785972595215,
|
986 |
+
"total_bits": 456407296,
|
987 |
"q_proj": {
|
988 |
"group_size": {
|
989 |
"4": 128,
|
|
|
994 |
2
|
995 |
],
|
996 |
"bits_prop": [
|
997 |
+
0.01,
|
998 |
+
0.99
|
999 |
],
|
1000 |
"scale_bits": 4
|
1001 |
},
|
|
|
1009 |
2
|
1010 |
],
|
1011 |
"bits_prop": [
|
1012 |
+
0.01,
|
1013 |
+
0.99
|
1014 |
],
|
1015 |
"scale_bits": 4
|
1016 |
},
|
|
|
1024 |
2
|
1025 |
],
|
1026 |
"bits_prop": [
|
1027 |
+
0.05,
|
1028 |
+
0.95
|
1029 |
],
|
1030 |
"scale_bits": 4
|
1031 |
},
|
|
|
1054 |
2
|
1055 |
],
|
1056 |
"bits_prop": [
|
1057 |
+
0.01,
|
1058 |
+
0.99
|
1059 |
],
|
1060 |
"scale_bits": 4
|
1061 |
},
|
|
|
1069 |
2
|
1070 |
],
|
1071 |
"bits_prop": [
|
1072 |
+
0.01,
|
1073 |
+
0.99
|
1074 |
],
|
1075 |
"scale_bits": 4
|
1076 |
},
|
|
|
1084 |
2
|
1085 |
],
|
1086 |
"bits_prop": [
|
1087 |
+
0.02,
|
1088 |
+
0.98
|
1089 |
],
|
1090 |
"scale_bits": 4
|
1091 |
}
|
1092 |
},
|
1093 |
"model.layers.10": {
|
1094 |
+
"accuracy": 0.9232504367828369,
|
1095 |
+
"total_bits": 456407296,
|
1096 |
"q_proj": {
|
1097 |
"group_size": {
|
1098 |
"4": 128,
|
|
|
1103 |
2
|
1104 |
],
|
1105 |
"bits_prop": [
|
1106 |
+
0.01,
|
1107 |
+
0.99
|
1108 |
],
|
1109 |
"scale_bits": 4
|
1110 |
},
|
|
|
1118 |
2
|
1119 |
],
|
1120 |
"bits_prop": [
|
1121 |
+
0.01,
|
1122 |
+
0.99
|
1123 |
],
|
1124 |
"scale_bits": 4
|
1125 |
},
|
|
|
1133 |
2
|
1134 |
],
|
1135 |
"bits_prop": [
|
1136 |
+
0.05,
|
1137 |
+
0.95
|
1138 |
],
|
1139 |
"scale_bits": 4
|
1140 |
},
|
|
|
1163 |
2
|
1164 |
],
|
1165 |
"bits_prop": [
|
1166 |
+
0.01,
|
1167 |
+
0.99
|
1168 |
],
|
1169 |
"scale_bits": 4
|
1170 |
},
|
|
|
1178 |
2
|
1179 |
],
|
1180 |
"bits_prop": [
|
1181 |
+
0.01,
|
1182 |
+
0.99
|
1183 |
],
|
1184 |
"scale_bits": 4
|
1185 |
},
|
|
|
1193 |
2
|
1194 |
],
|
1195 |
"bits_prop": [
|
1196 |
+
0.02,
|
1197 |
+
0.98
|
1198 |
],
|
1199 |
"scale_bits": 4
|
1200 |
}
|
1201 |
},
|
1202 |
"model.layers.11": {
|
1203 |
+
"accuracy": 0.9223508834838867,
|
1204 |
+
"total_bits": 456407296,
|
1205 |
"q_proj": {
|
1206 |
"group_size": {
|
1207 |
"4": 128,
|
|
|
1212 |
2
|
1213 |
],
|
1214 |
"bits_prop": [
|
1215 |
+
0.01,
|
1216 |
+
0.99
|
1217 |
],
|
1218 |
"scale_bits": 4
|
1219 |
},
|
|
|
1227 |
2
|
1228 |
],
|
1229 |
"bits_prop": [
|
1230 |
+
0.01,
|
1231 |
+
0.99
|
1232 |
],
|
1233 |
"scale_bits": 4
|
1234 |
},
|
|
|
1242 |
2
|
1243 |
],
|
1244 |
"bits_prop": [
|
1245 |
+
0.05,
|
1246 |
+
0.95
|
1247 |
],
|
1248 |
"scale_bits": 4
|
1249 |
},
|
|
|
1272 |
2
|
1273 |
],
|
1274 |
"bits_prop": [
|
1275 |
+
0.01,
|
1276 |
+
0.99
|
1277 |
],
|
1278 |
"scale_bits": 4
|
1279 |
},
|
|
|
1287 |
2
|
1288 |
],
|
1289 |
"bits_prop": [
|
1290 |
+
0.01,
|
1291 |
+
0.99
|
1292 |
],
|
1293 |
"scale_bits": 4
|
1294 |
},
|
|
|
1302 |
2
|
1303 |
],
|
1304 |
"bits_prop": [
|
1305 |
+
0.02,
|
1306 |
+
0.98
|
1307 |
],
|
1308 |
"scale_bits": 4
|
1309 |
}
|
1310 |
},
|
1311 |
"model.layers.12": {
|
1312 |
+
"accuracy": 0.9154980182647705,
|
1313 |
+
"total_bits": 456407296,
|
1314 |
"q_proj": {
|
1315 |
"group_size": {
|
1316 |
"4": 128,
|
|
|
1321 |
2
|
1322 |
],
|
1323 |
"bits_prop": [
|
1324 |
+
0.01,
|
1325 |
+
0.99
|
1326 |
],
|
1327 |
"scale_bits": 4
|
1328 |
},
|
|
|
1336 |
2
|
1337 |
],
|
1338 |
"bits_prop": [
|
1339 |
+
0.01,
|
1340 |
+
0.99
|
1341 |
],
|
1342 |
"scale_bits": 4
|
1343 |
},
|
|
|
1351 |
2
|
1352 |
],
|
1353 |
"bits_prop": [
|
1354 |
+
0.05,
|
1355 |
+
0.95
|
1356 |
],
|
1357 |
"scale_bits": 4
|
1358 |
},
|
|
|
1366 |
2
|
1367 |
],
|
1368 |
"bits_prop": [
|
1369 |
+
0.02,
|
1370 |
+
0.98
|
1371 |
],
|
1372 |
"scale_bits": 4
|
1373 |
},
|
|
|
1381 |
2
|
1382 |
],
|
1383 |
"bits_prop": [
|
1384 |
+
0.01,
|
1385 |
+
0.99
|
1386 |
],
|
1387 |
"scale_bits": 4
|
1388 |
},
|
|
|
1396 |
2
|
1397 |
],
|
1398 |
"bits_prop": [
|
1399 |
+
0.01,
|
1400 |
+
0.99
|
1401 |
],
|
1402 |
"scale_bits": 4
|
1403 |
},
|
|
|
1411 |
2
|
1412 |
],
|
1413 |
"bits_prop": [
|
1414 |
+
0.02,
|
1415 |
+
0.98
|
1416 |
],
|
1417 |
"scale_bits": 4
|
1418 |
}
|
1419 |
},
|
1420 |
"model.layers.13": {
|
1421 |
+
"accuracy": 0.9131753444671631,
|
1422 |
+
"total_bits": 456407296,
|
1423 |
"q_proj": {
|
1424 |
"group_size": {
|
1425 |
"4": 128,
|
|
|
1430 |
2
|
1431 |
],
|
1432 |
"bits_prop": [
|
1433 |
+
0.01,
|
1434 |
+
0.99
|
1435 |
],
|
1436 |
"scale_bits": 4
|
1437 |
},
|
|
|
1445 |
2
|
1446 |
],
|
1447 |
"bits_prop": [
|
1448 |
+
0.01,
|
1449 |
+
0.99
|
1450 |
],
|
1451 |
"scale_bits": 4
|
1452 |
},
|
|
|
1460 |
2
|
1461 |
],
|
1462 |
"bits_prop": [
|
1463 |
+
0.05,
|
1464 |
+
0.95
|
1465 |
],
|
1466 |
"scale_bits": 4
|
1467 |
},
|
|
|
1490 |
2
|
1491 |
],
|
1492 |
"bits_prop": [
|
1493 |
+
0.01,
|
1494 |
+
0.99
|
1495 |
],
|
1496 |
"scale_bits": 4
|
1497 |
},
|
|
|
1505 |
2
|
1506 |
],
|
1507 |
"bits_prop": [
|
1508 |
+
0.01,
|
1509 |
+
0.99
|
1510 |
],
|
1511 |
"scale_bits": 4
|
1512 |
},
|
|
|
1520 |
2
|
1521 |
],
|
1522 |
"bits_prop": [
|
1523 |
+
0.02,
|
1524 |
+
0.98
|
1525 |
],
|
1526 |
"scale_bits": 4
|
1527 |
}
|
1528 |
},
|
1529 |
"model.layers.14": {
|
1530 |
+
"accuracy": 0.9053680896759033,
|
1531 |
+
"total_bits": 456407296,
|
1532 |
"q_proj": {
|
1533 |
"group_size": {
|
1534 |
"4": 128,
|
|
|
1539 |
2
|
1540 |
],
|
1541 |
"bits_prop": [
|
1542 |
+
0.01,
|
1543 |
+
0.99
|
1544 |
],
|
1545 |
"scale_bits": 4
|
1546 |
},
|
|
|
1554 |
2
|
1555 |
],
|
1556 |
"bits_prop": [
|
1557 |
+
0.01,
|
1558 |
+
0.99
|
1559 |
],
|
1560 |
"scale_bits": 4
|
1561 |
},
|
|
|
1569 |
2
|
1570 |
],
|
1571 |
"bits_prop": [
|
1572 |
+
0.05,
|
1573 |
+
0.95
|
1574 |
],
|
1575 |
"scale_bits": 4
|
1576 |
},
|
|
|
1599 |
2
|
1600 |
],
|
1601 |
"bits_prop": [
|
1602 |
+
0.01,
|
1603 |
+
0.99
|
1604 |
],
|
1605 |
"scale_bits": 4
|
1606 |
},
|
|
|
1614 |
2
|
1615 |
],
|
1616 |
"bits_prop": [
|
1617 |
+
0.01,
|
1618 |
+
0.99
|
1619 |
],
|
1620 |
"scale_bits": 4
|
1621 |
},
|
|
|
1629 |
2
|
1630 |
],
|
1631 |
"bits_prop": [
|
1632 |
+
0.02,
|
1633 |
+
0.98
|
1634 |
],
|
1635 |
"scale_bits": 4
|
1636 |
}
|
1637 |
},
|
1638 |
"model.layers.15": {
|
1639 |
+
"accuracy": 0.9041793346405029,
|
1640 |
+
"total_bits": 465844480,
|
1641 |
"q_proj": {
|
1642 |
"group_size": {
|
1643 |
"4": 128,
|
|
|
1648 |
2
|
1649 |
],
|
1650 |
"bits_prop": [
|
1651 |
+
0.01,
|
1652 |
+
0.99
|
1653 |
+
],
|
1654 |
"scale_bits": 4
|
1655 |
},
|
1656 |
"k_proj": {
|
|
|
1663 |
2
|
1664 |
],
|
1665 |
"bits_prop": [
|
1666 |
+
0.01,
|
1667 |
+
0.99
|
1668 |
],
|
1669 |
"scale_bits": 4
|
1670 |
},
|
|
|
1678 |
2
|
1679 |
],
|
1680 |
"bits_prop": [
|
1681 |
+
0.05,
|
1682 |
+
0.95
|
1683 |
],
|
1684 |
"scale_bits": 4
|
1685 |
},
|
|
|
1693 |
2
|
1694 |
],
|
1695 |
"bits_prop": [
|
1696 |
+
0.01,
|
1697 |
+
0.99
|
1698 |
],
|
1699 |
"scale_bits": 4
|
1700 |
},
|
|
|
1708 |
2
|
1709 |
],
|
1710 |
"bits_prop": [
|
1711 |
+
0.01,
|
1712 |
+
0.99
|
1713 |
],
|
1714 |
"scale_bits": 4
|
1715 |
},
|
|
|
1723 |
2
|
1724 |
],
|
1725 |
"bits_prop": [
|
1726 |
+
0.01,
|
1727 |
+
0.99
|
1728 |
],
|
1729 |
"scale_bits": 4
|
1730 |
},
|
|
|
1738 |
2
|
1739 |
],
|
1740 |
"bits_prop": [
|
1741 |
+
0.1,
|
1742 |
+
0.9
|
1743 |
],
|
1744 |
"scale_bits": 4
|
1745 |
}
|
1746 |
},
|
1747 |
"model.layers.16": {
|
1748 |
+
"accuracy": 0.9069504737854004,
|
1749 |
+
"total_bits": 478689536,
|
1750 |
"q_proj": {
|
1751 |
"group_size": {
|
1752 |
"4": 128,
|
|
|
1757 |
2
|
1758 |
],
|
1759 |
"bits_prop": [
|
1760 |
+
0.02,
|
1761 |
+
0.98
|
1762 |
],
|
1763 |
"scale_bits": 4
|
1764 |
},
|
|
|
1772 |
2
|
1773 |
],
|
1774 |
"bits_prop": [
|
1775 |
+
0.02,
|
1776 |
+
0.98
|
1777 |
],
|
1778 |
"scale_bits": 4
|
1779 |
},
|
|
|
1787 |
2
|
1788 |
],
|
1789 |
"bits_prop": [
|
1790 |
+
0.2,
|
1791 |
+
0.8
|
1792 |
],
|
1793 |
"scale_bits": 4
|
1794 |
},
|
|
|
1802 |
2
|
1803 |
],
|
1804 |
"bits_prop": [
|
1805 |
+
0.02,
|
1806 |
+
0.98
|
1807 |
],
|
1808 |
"scale_bits": 4
|
1809 |
},
|
|
|
1817 |
2
|
1818 |
],
|
1819 |
"bits_prop": [
|
1820 |
+
0.02,
|
1821 |
+
0.98
|
1822 |
],
|
1823 |
"scale_bits": 4
|
1824 |
},
|
|
|
1832 |
2
|
1833 |
],
|
1834 |
"bits_prop": [
|
1835 |
+
0.02,
|
1836 |
+
0.98
|
1837 |
],
|
1838 |
"scale_bits": 4
|
1839 |
},
|
|
|
1847 |
2
|
1848 |
],
|
1849 |
"bits_prop": [
|
1850 |
+
0.2,
|
1851 |
+
0.8
|
1852 |
],
|
1853 |
"scale_bits": 4
|
1854 |
}
|
1855 |
},
|
1856 |
"model.layers.17": {
|
1857 |
+
"accuracy": 0.9034838676452637,
|
1858 |
"total_bits": 478689536,
|
1859 |
"q_proj": {
|
1860 |
"group_size": {
|
|
|
1963 |
}
|
1964 |
},
|
1965 |
"model.layers.18": {
|
1966 |
+
"accuracy": 0.9072866439819336,
|
1967 |
+
"total_bits": 507787520,
|
1968 |
"q_proj": {
|
1969 |
"group_size": {
|
1970 |
"4": 128,
|
|
|
1975 |
2
|
1976 |
],
|
1977 |
"bits_prop": [
|
1978 |
+
0.1,
|
1979 |
+
0.9
|
1980 |
],
|
1981 |
"scale_bits": 4
|
1982 |
},
|
|
|
1990 |
2
|
1991 |
],
|
1992 |
"bits_prop": [
|
1993 |
+
0.1,
|
1994 |
+
0.9
|
1995 |
],
|
1996 |
"scale_bits": 4
|
1997 |
},
|
|
|
2020 |
2
|
2021 |
],
|
2022 |
"bits_prop": [
|
2023 |
+
0.1,
|
2024 |
+
0.9
|
2025 |
],
|
2026 |
"scale_bits": 4
|
2027 |
},
|
|
|
2035 |
2
|
2036 |
],
|
2037 |
"bits_prop": [
|
2038 |
+
0.1,
|
2039 |
+
0.9
|
2040 |
],
|
2041 |
"scale_bits": 4
|
2042 |
},
|
|
|
2050 |
2
|
2051 |
],
|
2052 |
"bits_prop": [
|
2053 |
+
0.1,
|
2054 |
+
0.9
|
2055 |
],
|
2056 |
"scale_bits": 4
|
2057 |
},
|
|
|
2072 |
}
|
2073 |
},
|
2074 |
"model.layers.19": {
|
2075 |
+
"accuracy": 0.9008588790893555,
|
2076 |
"total_bits": 478689536,
|
2077 |
"q_proj": {
|
2078 |
"group_size": {
|
|
|
2181 |
}
|
2182 |
},
|
2183 |
"model.layers.20": {
|
2184 |
+
"accuracy": 0.9053101539611816,
|
2185 |
"total_bits": 478689536,
|
2186 |
"q_proj": {
|
2187 |
"group_size": {
|
|
|
2290 |
}
|
2291 |
},
|
2292 |
"model.layers.21": {
|
2293 |
+
"accuracy": 0.9090385437011719,
|
2294 |
"total_bits": 478689536,
|
2295 |
"q_proj": {
|
2296 |
"group_size": {
|
|
|
2399 |
}
|
2400 |
},
|
2401 |
"model.layers.22": {
|
2402 |
+
"accuracy": 0.9069957733154297,
|
2403 |
+
"total_bits": 465844480,
|
2404 |
"q_proj": {
|
2405 |
"group_size": {
|
2406 |
"4": 128,
|
|
|
2411 |
2
|
2412 |
],
|
2413 |
"bits_prop": [
|
2414 |
+
0.01,
|
2415 |
+
0.99
|
2416 |
],
|
2417 |
"scale_bits": 4
|
2418 |
},
|
|
|
2426 |
2
|
2427 |
],
|
2428 |
"bits_prop": [
|
2429 |
+
0.01,
|
2430 |
+
0.99
|
2431 |
],
|
2432 |
"scale_bits": 4
|
2433 |
},
|
|
|
2441 |
2
|
2442 |
],
|
2443 |
"bits_prop": [
|
2444 |
+
0.05,
|
2445 |
+
0.95
|
2446 |
],
|
2447 |
"scale_bits": 4
|
2448 |
},
|
|
|
2456 |
2
|
2457 |
],
|
2458 |
"bits_prop": [
|
2459 |
+
0.01,
|
2460 |
+
0.99
|
2461 |
],
|
2462 |
"scale_bits": 4
|
2463 |
},
|
|
|
2471 |
2
|
2472 |
],
|
2473 |
"bits_prop": [
|
2474 |
+
0.01,
|
2475 |
+
0.99
|
2476 |
],
|
2477 |
"scale_bits": 4
|
2478 |
},
|
|
|
2486 |
2
|
2487 |
],
|
2488 |
"bits_prop": [
|
2489 |
+
0.01,
|
2490 |
+
0.99
|
2491 |
],
|
2492 |
"scale_bits": 4
|
2493 |
},
|
|
|
2501 |
2
|
2502 |
],
|
2503 |
"bits_prop": [
|
2504 |
+
0.1,
|
2505 |
+
0.9
|
2506 |
],
|
2507 |
"scale_bits": 4
|
2508 |
}
|
2509 |
},
|
2510 |
"model.layers.23": {
|
2511 |
+
"accuracy": 0.9098095893859863,
|
2512 |
+
"total_bits": 478689536,
|
2513 |
"q_proj": {
|
2514 |
"group_size": {
|
2515 |
"4": 128,
|
|
|
2520 |
2
|
2521 |
],
|
2522 |
"bits_prop": [
|
2523 |
+
0.02,
|
2524 |
+
0.98
|
2525 |
],
|
2526 |
"scale_bits": 4
|
2527 |
},
|
|
|
2535 |
2
|
2536 |
],
|
2537 |
"bits_prop": [
|
2538 |
+
0.02,
|
2539 |
+
0.98
|
2540 |
],
|
2541 |
"scale_bits": 4
|
2542 |
},
|
|
|
2550 |
2
|
2551 |
],
|
2552 |
"bits_prop": [
|
2553 |
+
0.2,
|
2554 |
+
0.8
|
2555 |
],
|
2556 |
"scale_bits": 4
|
2557 |
},
|
|
|
2565 |
2
|
2566 |
],
|
2567 |
"bits_prop": [
|
2568 |
+
0.02,
|
2569 |
+
0.98
|
2570 |
],
|
2571 |
"scale_bits": 4
|
2572 |
},
|
|
|
2580 |
2
|
2581 |
],
|
2582 |
"bits_prop": [
|
2583 |
+
0.02,
|
2584 |
+
0.98
|
2585 |
],
|
2586 |
"scale_bits": 4
|
2587 |
},
|
|
|
2595 |
2
|
2596 |
],
|
2597 |
"bits_prop": [
|
2598 |
+
0.02,
|
2599 |
+
0.98
|
2600 |
],
|
2601 |
"scale_bits": 4
|
2602 |
},
|
|
|
2610 |
2
|
2611 |
],
|
2612 |
"bits_prop": [
|
2613 |
+
0.2,
|
2614 |
+
0.8
|
2615 |
],
|
2616 |
"scale_bits": 4
|
2617 |
}
|
2618 |
},
|
2619 |
"model.layers.24": {
|
2620 |
+
"accuracy": 0.9100494384765625,
|
2621 |
+
"total_bits": 478689536,
|
2622 |
"q_proj": {
|
2623 |
"group_size": {
|
2624 |
"4": 128,
|
|
|
2629 |
2
|
2630 |
],
|
2631 |
"bits_prop": [
|
2632 |
+
0.02,
|
2633 |
+
0.98
|
2634 |
],
|
2635 |
"scale_bits": 4
|
2636 |
},
|
|
|
2644 |
2
|
2645 |
],
|
2646 |
"bits_prop": [
|
2647 |
+
0.02,
|
2648 |
+
0.98
|
2649 |
],
|
2650 |
"scale_bits": 4
|
2651 |
},
|
|
|
2659 |
2
|
2660 |
],
|
2661 |
"bits_prop": [
|
2662 |
+
0.2,
|
2663 |
+
0.8
|
2664 |
],
|
2665 |
"scale_bits": 4
|
2666 |
},
|
|
|
2674 |
2
|
2675 |
],
|
2676 |
"bits_prop": [
|
2677 |
+
0.02,
|
2678 |
+
0.98
|
2679 |
],
|
2680 |
"scale_bits": 4
|
2681 |
},
|
|
|
2689 |
2
|
2690 |
],
|
2691 |
"bits_prop": [
|
2692 |
+
0.02,
|
2693 |
+
0.98
|
2694 |
],
|
2695 |
"scale_bits": 4
|
2696 |
},
|
|
|
2704 |
2
|
2705 |
],
|
2706 |
"bits_prop": [
|
2707 |
+
0.02,
|
2708 |
+
0.98
|
2709 |
],
|
2710 |
"scale_bits": 4
|
2711 |
},
|
|
|
2719 |
2
|
2720 |
],
|
2721 |
"bits_prop": [
|
2722 |
+
0.2,
|
2723 |
+
0.8
|
2724 |
],
|
2725 |
"scale_bits": 4
|
2726 |
}
|
2727 |
},
|
2728 |
"model.layers.25": {
|
2729 |
+
"accuracy": 0.9088339805603027,
|
2730 |
+
"total_bits": 478689536,
|
2731 |
"q_proj": {
|
2732 |
"group_size": {
|
2733 |
"4": 128,
|
|
|
2738 |
2
|
2739 |
],
|
2740 |
"bits_prop": [
|
2741 |
+
0.02,
|
2742 |
+
0.98
|
2743 |
],
|
2744 |
"scale_bits": 4
|
2745 |
},
|
|
|
2753 |
2
|
2754 |
],
|
2755 |
"bits_prop": [
|
2756 |
+
0.02,
|
2757 |
+
0.98
|
2758 |
],
|
2759 |
"scale_bits": 4
|
2760 |
},
|
|
|
2768 |
2
|
2769 |
],
|
2770 |
"bits_prop": [
|
2771 |
+
0.2,
|
2772 |
+
0.8
|
2773 |
],
|
2774 |
"scale_bits": 4
|
2775 |
},
|
|
|
2783 |
2
|
2784 |
],
|
2785 |
"bits_prop": [
|
2786 |
+
0.02,
|
2787 |
+
0.98
|
2788 |
],
|
2789 |
"scale_bits": 4
|
2790 |
},
|
|
|
2798 |
2
|
2799 |
],
|
2800 |
"bits_prop": [
|
2801 |
+
0.02,
|
2802 |
+
0.98
|
2803 |
],
|
2804 |
"scale_bits": 4
|
2805 |
},
|
|
|
2813 |
2
|
2814 |
],
|
2815 |
"bits_prop": [
|
2816 |
+
0.02,
|
2817 |
+
0.98
|
2818 |
],
|
2819 |
"scale_bits": 4
|
2820 |
},
|
|
|
2828 |
2
|
2829 |
],
|
2830 |
"bits_prop": [
|
2831 |
+
0.2,
|
2832 |
+
0.8
|
2833 |
],
|
2834 |
"scale_bits": 4
|
2835 |
}
|
2836 |
},
|
2837 |
"model.layers.26": {
|
2838 |
+
"accuracy": 0.9072427749633789,
|
2839 |
+
"total_bits": 478689536,
|
2840 |
"q_proj": {
|
2841 |
"group_size": {
|
2842 |
"4": 128,
|
|
|
2847 |
2
|
2848 |
],
|
2849 |
"bits_prop": [
|
2850 |
+
0.02,
|
2851 |
+
0.98
|
2852 |
],
|
2853 |
"scale_bits": 4
|
2854 |
},
|
|
|
2862 |
2
|
2863 |
],
|
2864 |
"bits_prop": [
|
2865 |
+
0.02,
|
2866 |
+
0.98
|
2867 |
],
|
2868 |
"scale_bits": 4
|
2869 |
},
|
|
|
2877 |
2
|
2878 |
],
|
2879 |
"bits_prop": [
|
2880 |
+
0.2,
|
2881 |
+
0.8
|
2882 |
],
|
2883 |
"scale_bits": 4
|
2884 |
},
|
|
|
2892 |
2
|
2893 |
],
|
2894 |
"bits_prop": [
|
2895 |
+
0.02,
|
2896 |
+
0.98
|
2897 |
],
|
2898 |
"scale_bits": 4
|
2899 |
},
|
|
|
2907 |
2
|
2908 |
],
|
2909 |
"bits_prop": [
|
2910 |
+
0.02,
|
2911 |
+
0.98
|
2912 |
],
|
2913 |
"scale_bits": 4
|
2914 |
},
|
|
|
2922 |
2
|
2923 |
],
|
2924 |
"bits_prop": [
|
2925 |
+
0.02,
|
2926 |
+
0.98
|
2927 |
],
|
2928 |
"scale_bits": 4
|
2929 |
},
|
|
|
2937 |
2
|
2938 |
],
|
2939 |
"bits_prop": [
|
2940 |
+
0.2,
|
2941 |
+
0.8
|
2942 |
],
|
2943 |
"scale_bits": 4
|
2944 |
}
|
2945 |
},
|
2946 |
"model.layers.27": {
|
2947 |
+
"accuracy": 0.9096179008483887,
|
2948 |
"total_bits": 478689536,
|
2949 |
"q_proj": {
|
2950 |
"group_size": {
|
|
|
3053 |
}
|
3054 |
},
|
3055 |
"model.layers.28": {
|
3056 |
+
"accuracy": 0.9028897285461426,
|
3057 |
"total_bits": 478689536,
|
3058 |
"q_proj": {
|
3059 |
"group_size": {
|
|
|
3162 |
}
|
3163 |
},
|
3164 |
"model.layers.29": {
|
3165 |
+
"accuracy": 0.902318000793457,
|
3166 |
+
"total_bits": 513030400,
|
3167 |
"q_proj": {
|
3168 |
"group_size": {
|
3169 |
"4": 128,
|
|
|
3174 |
2
|
3175 |
],
|
3176 |
"bits_prop": [
|
3177 |
+
0.05,
|
3178 |
+
0.95
|
3179 |
],
|
3180 |
"scale_bits": 4
|
3181 |
},
|
|
|
3189 |
2
|
3190 |
],
|
3191 |
"bits_prop": [
|
3192 |
+
0.05,
|
3193 |
+
0.95
|
3194 |
],
|
3195 |
"scale_bits": 4
|
3196 |
},
|
|
|
3204 |
2
|
3205 |
],
|
3206 |
"bits_prop": [
|
3207 |
+
0.4,
|
3208 |
+
0.6
|
3209 |
],
|
3210 |
"scale_bits": 4
|
3211 |
},
|
|
|
3219 |
2
|
3220 |
],
|
3221 |
"bits_prop": [
|
3222 |
+
0.05,
|
3223 |
+
0.95
|
3224 |
],
|
3225 |
"scale_bits": 4
|
3226 |
},
|
|
|
3234 |
2
|
3235 |
],
|
3236 |
"bits_prop": [
|
3237 |
+
0.05,
|
3238 |
+
0.95
|
3239 |
],
|
3240 |
"scale_bits": 4
|
3241 |
},
|
|
|
3249 |
2
|
3250 |
],
|
3251 |
"bits_prop": [
|
3252 |
+
0.05,
|
3253 |
+
0.95
|
3254 |
],
|
3255 |
"scale_bits": 4
|
3256 |
},
|
|
|
3264 |
2
|
3265 |
],
|
3266 |
"bits_prop": [
|
3267 |
+
0.4,
|
3268 |
+
0.6
|
3269 |
],
|
3270 |
"scale_bits": 4
|
3271 |
}
|
3272 |
},
|
3273 |
"model.layers.30": {
|
3274 |
+
"accuracy": 0.9191799163818359,
|
3275 |
+
"total_bits": 607664384,
|
3276 |
"q_proj": {
|
3277 |
"group_size": {
|
3278 |
"4": 128,
|
|
|
3283 |
2
|
3284 |
],
|
3285 |
"bits_prop": [
|
3286 |
+
0.1,
|
3287 |
+
0.9
|
3288 |
],
|
3289 |
"scale_bits": 4
|
3290 |
},
|
|
|
3298 |
2
|
3299 |
],
|
3300 |
"bits_prop": [
|
3301 |
+
0.1,
|
3302 |
+
0.9
|
3303 |
],
|
3304 |
"scale_bits": 4
|
3305 |
},
|
3306 |
"v_proj": {
|
3307 |
"group_size": {
|
3308 |
+
"4": 128
|
|
|
3309 |
},
|
3310 |
"bits": [
|
3311 |
+
4
|
|
|
3312 |
],
|
3313 |
"bits_prop": [
|
3314 |
+
1.0
|
|
|
3315 |
],
|
3316 |
"scale_bits": 4
|
3317 |
},
|
|
|
3325 |
2
|
3326 |
],
|
3327 |
"bits_prop": [
|
3328 |
+
0.1,
|
3329 |
+
0.9
|
3330 |
],
|
3331 |
"scale_bits": 4
|
3332 |
},
|
|
|
3340 |
2
|
3341 |
],
|
3342 |
"bits_prop": [
|
3343 |
+
0.1,
|
3344 |
+
0.9
|
3345 |
],
|
3346 |
"scale_bits": 4
|
3347 |
},
|
|
|
3355 |
2
|
3356 |
],
|
3357 |
"bits_prop": [
|
3358 |
+
0.1,
|
3359 |
+
0.9
|
3360 |
],
|
3361 |
"scale_bits": 4
|
3362 |
},
|
3363 |
"down_proj": {
|
3364 |
"group_size": {
|
3365 |
+
"4": 128
|
|
|
3366 |
},
|
3367 |
"bits": [
|
3368 |
+
4
|
|
|
3369 |
],
|
3370 |
"bits_prop": [
|
3371 |
+
1.0
|
|
|
3372 |
],
|
3373 |
"scale_bits": 4
|
3374 |
}
|
3375 |
},
|
3376 |
"model.layers.31": {
|
3377 |
+
"accuracy": 0.9125514030456543,
|
3378 |
+
"total_bits": 607664384,
|
3379 |
"q_proj": {
|
3380 |
"group_size": {
|
3381 |
"4": 128,
|
|
|
3386 |
2
|
3387 |
],
|
3388 |
"bits_prop": [
|
3389 |
+
0.1,
|
3390 |
+
0.9
|
3391 |
],
|
3392 |
"scale_bits": 4
|
3393 |
},
|
|
|
3401 |
2
|
3402 |
],
|
3403 |
"bits_prop": [
|
3404 |
+
0.1,
|
3405 |
+
0.9
|
3406 |
],
|
3407 |
"scale_bits": 4
|
3408 |
},
|
3409 |
"v_proj": {
|
3410 |
"group_size": {
|
3411 |
+
"4": 128
|
|
|
3412 |
},
|
3413 |
"bits": [
|
3414 |
+
4
|
|
|
3415 |
],
|
3416 |
"bits_prop": [
|
3417 |
+
1.0
|
|
|
3418 |
],
|
3419 |
"scale_bits": 4
|
3420 |
},
|
|
|
3428 |
2
|
3429 |
],
|
3430 |
"bits_prop": [
|
3431 |
+
0.1,
|
3432 |
+
0.9
|
3433 |
],
|
3434 |
"scale_bits": 4
|
3435 |
},
|
|
|
3443 |
2
|
3444 |
],
|
3445 |
"bits_prop": [
|
3446 |
+
0.1,
|
3447 |
+
0.9
|
3448 |
],
|
3449 |
"scale_bits": 4
|
3450 |
},
|
|
|
3458 |
2
|
3459 |
],
|
3460 |
"bits_prop": [
|
3461 |
+
0.1,
|
3462 |
+
0.9
|
3463 |
],
|
3464 |
"scale_bits": 4
|
3465 |
},
|
3466 |
"down_proj": {
|
3467 |
"group_size": {
|
3468 |
+
"4": 128
|
|
|
3469 |
},
|
3470 |
"bits": [
|
3471 |
+
4
|
|
|
3472 |
],
|
3473 |
"bits_prop": [
|
3474 |
+
1.0
|
|
|
3475 |
],
|
3476 |
"scale_bits": 4
|
3477 |
}
|
3478 |
}
|
3479 |
}
|
3480 |
+
}
|