File size: 17,404 Bytes
ac8897e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
,layer_id,layer_type,param_type,shape,nparam,nnz,sparsity
0,bert.embeddings.word_embeddings,Embedding,weight,"[30522, 768]",23440896,23440896,0.0
1,bert.embeddings.position_embeddings,Embedding,weight,"[512, 768]",393216,393216,0.0
2,bert.embeddings.token_type_embeddings,Embedding,weight,"[2, 768]",1536,1536,0.0
3,bert.embeddings.LayerNorm,LayerNorm,weight,[768],768,768,0.0
4,bert.embeddings.LayerNorm,LayerNorm,bias,[768],768,768,0.0
5,bert.encoder.layer.0.attention.self.query,Linear,weight,"[320, 768]",245760,135168,0.44999998807907104
6,bert.encoder.layer.0.attention.self.query,Linear,bias,[320],320,256,0.19999998807907104
7,bert.encoder.layer.0.attention.self.key,Linear,weight,"[320, 768]",245760,149504,0.3916666507720947
8,bert.encoder.layer.0.attention.self.key,Linear,bias,[320],320,256,0.19999998807907104
9,bert.encoder.layer.0.attention.self.value,Linear,weight,"[320, 768]",245760,173056,0.2958332896232605
10,bert.encoder.layer.0.attention.self.value,Linear,bias,[320],320,256,0.19999998807907104
11,bert.encoder.layer.0.attention.output.dense,Linear,weight,"[768, 320]",245760,181248,0.26249998807907104
12,bert.encoder.layer.0.attention.output.dense,Linear,bias,[768],768,768,0.0
13,bert.encoder.layer.0.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
14,bert.encoder.layer.0.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
15,bert.encoder.layer.0.intermediate.dense,Linear,weight,"[185, 768]",142080,142080,0.0
16,bert.encoder.layer.0.intermediate.dense,Linear,bias,[185],185,185,0.0
17,bert.encoder.layer.0.output.dense,Linear,weight,"[768, 185]",142080,142080,0.0
18,bert.encoder.layer.0.output.dense,Linear,bias,[768],768,768,0.0
19,bert.encoder.layer.0.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
20,bert.encoder.layer.0.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
21,bert.encoder.layer.1.attention.self.query,Linear,weight,"[320, 768]",245760,175104,0.28749996423721313
22,bert.encoder.layer.1.attention.self.query,Linear,bias,[320],320,288,0.09999996423721313
23,bert.encoder.layer.1.attention.self.key,Linear,weight,"[320, 768]",245760,177152,0.27916663885116577
24,bert.encoder.layer.1.attention.self.key,Linear,bias,[320],320,288,0.09999996423721313
25,bert.encoder.layer.1.attention.self.value,Linear,weight,"[320, 768]",245760,166912,0.32083332538604736
26,bert.encoder.layer.1.attention.self.value,Linear,bias,[320],320,288,0.09999996423721313
27,bert.encoder.layer.1.attention.output.dense,Linear,weight,"[768, 320]",245760,167936,0.3166666030883789
28,bert.encoder.layer.1.attention.output.dense,Linear,bias,[768],768,768,0.0
29,bert.encoder.layer.1.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
30,bert.encoder.layer.1.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
31,bert.encoder.layer.1.intermediate.dense,Linear,weight,"[315, 768]",241920,241920,0.0
32,bert.encoder.layer.1.intermediate.dense,Linear,bias,[315],315,315,0.0
33,bert.encoder.layer.1.output.dense,Linear,weight,"[768, 315]",241920,241920,0.0
34,bert.encoder.layer.1.output.dense,Linear,bias,[768],768,768,0.0
35,bert.encoder.layer.1.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
36,bert.encoder.layer.1.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
37,bert.encoder.layer.2.attention.self.query,Linear,weight,"[576, 768]",442368,285696,0.3541666865348816
38,bert.encoder.layer.2.attention.self.query,Linear,bias,[576],576,480,0.1666666865348816
39,bert.encoder.layer.2.attention.self.key,Linear,weight,"[576, 768]",442368,297984,0.3263888955116272
40,bert.encoder.layer.2.attention.self.key,Linear,bias,[576],576,480,0.1666666865348816
41,bert.encoder.layer.2.attention.self.value,Linear,weight,"[576, 768]",442368,226304,0.4884259104728699
42,bert.encoder.layer.2.attention.self.value,Linear,bias,[576],576,384,0.3333333134651184
43,bert.encoder.layer.2.attention.output.dense,Linear,weight,"[768, 576]",442368,237568,0.4629629850387573
44,bert.encoder.layer.2.attention.output.dense,Linear,bias,[768],768,768,0.0
45,bert.encoder.layer.2.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
46,bert.encoder.layer.2.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
47,bert.encoder.layer.2.intermediate.dense,Linear,weight,"[339, 768]",260352,260352,0.0
48,bert.encoder.layer.2.intermediate.dense,Linear,bias,[339],339,339,0.0
49,bert.encoder.layer.2.output.dense,Linear,weight,"[768, 339]",260352,260352,0.0
50,bert.encoder.layer.2.output.dense,Linear,bias,[768],768,768,0.0
51,bert.encoder.layer.2.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
52,bert.encoder.layer.2.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
53,bert.encoder.layer.3.attention.self.query,Linear,weight,"[576, 768]",442368,277504,0.37268519401550293
54,bert.encoder.layer.3.attention.self.query,Linear,bias,[576],576,512,0.1111111044883728
55,bert.encoder.layer.3.attention.self.key,Linear,weight,"[576, 768]",442368,303104,0.31481480598449707
56,bert.encoder.layer.3.attention.self.key,Linear,bias,[576],576,512,0.1111111044883728
57,bert.encoder.layer.3.attention.self.value,Linear,weight,"[576, 768]",442368,297984,0.3263888955116272
58,bert.encoder.layer.3.attention.self.value,Linear,bias,[576],576,512,0.1111111044883728
59,bert.encoder.layer.3.attention.output.dense,Linear,weight,"[768, 576]",442368,308224,0.30324071645736694
60,bert.encoder.layer.3.attention.output.dense,Linear,bias,[768],768,768,0.0
61,bert.encoder.layer.3.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
62,bert.encoder.layer.3.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
63,bert.encoder.layer.3.intermediate.dense,Linear,weight,"[368, 768]",282624,282624,0.0
64,bert.encoder.layer.3.intermediate.dense,Linear,bias,[368],368,368,0.0
65,bert.encoder.layer.3.output.dense,Linear,weight,"[768, 368]",282624,282624,0.0
66,bert.encoder.layer.3.output.dense,Linear,bias,[768],768,768,0.0
67,bert.encoder.layer.3.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
68,bert.encoder.layer.3.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
69,bert.encoder.layer.4.attention.self.query,Linear,weight,"[576, 768]",442368,291840,0.3402777910232544
70,bert.encoder.layer.4.attention.self.query,Linear,bias,[576],576,544,0.055555522441864014
71,bert.encoder.layer.4.attention.self.key,Linear,weight,"[576, 768]",442368,310272,0.2986111044883728
72,bert.encoder.layer.4.attention.self.key,Linear,bias,[576],576,544,0.055555522441864014
73,bert.encoder.layer.4.attention.self.value,Linear,weight,"[576, 768]",442368,272384,0.38425928354263306
74,bert.encoder.layer.4.attention.self.value,Linear,bias,[576],576,480,0.1666666865348816
75,bert.encoder.layer.4.attention.output.dense,Linear,weight,"[768, 576]",442368,263168,0.40509259700775146
76,bert.encoder.layer.4.attention.output.dense,Linear,bias,[768],768,768,0.0
77,bert.encoder.layer.4.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
78,bert.encoder.layer.4.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
79,bert.encoder.layer.4.intermediate.dense,Linear,weight,"[386, 768]",296448,296448,0.0
80,bert.encoder.layer.4.intermediate.dense,Linear,bias,[386],386,386,0.0
81,bert.encoder.layer.4.output.dense,Linear,weight,"[768, 386]",296448,296448,0.0
82,bert.encoder.layer.4.output.dense,Linear,bias,[768],768,768,0.0
83,bert.encoder.layer.4.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
84,bert.encoder.layer.4.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
85,bert.encoder.layer.5.attention.self.query,Linear,weight,"[384, 768]",294912,171008,0.4201388955116272
86,bert.encoder.layer.5.attention.self.query,Linear,bias,[384],384,352,0.08333331346511841
87,bert.encoder.layer.5.attention.self.key,Linear,weight,"[384, 768]",294912,205824,0.3020833134651184
88,bert.encoder.layer.5.attention.self.key,Linear,bias,[384],384,352,0.08333331346511841
89,bert.encoder.layer.5.attention.self.value,Linear,weight,"[384, 768]",294912,217088,0.2638888955116272
90,bert.encoder.layer.5.attention.self.value,Linear,bias,[384],384,384,0.0
91,bert.encoder.layer.5.attention.output.dense,Linear,weight,"[768, 384]",294912,223232,0.243055522441864
92,bert.encoder.layer.5.attention.output.dense,Linear,bias,[768],768,768,0.0
93,bert.encoder.layer.5.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
94,bert.encoder.layer.5.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
95,bert.encoder.layer.5.intermediate.dense,Linear,weight,"[336, 768]",258048,258048,0.0
96,bert.encoder.layer.5.intermediate.dense,Linear,bias,[336],336,336,0.0
97,bert.encoder.layer.5.output.dense,Linear,weight,"[768, 336]",258048,258048,0.0
98,bert.encoder.layer.5.output.dense,Linear,bias,[768],768,768,0.0
99,bert.encoder.layer.5.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
100,bert.encoder.layer.5.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
101,bert.encoder.layer.6.attention.self.query,Linear,weight,"[448, 768]",344064,192512,0.4404761791229248
102,bert.encoder.layer.6.attention.self.query,Linear,bias,[448],448,416,0.07142853736877441
103,bert.encoder.layer.6.attention.self.key,Linear,weight,"[448, 768]",344064,224256,0.3482142686843872
104,bert.encoder.layer.6.attention.self.key,Linear,bias,[448],448,416,0.07142853736877441
105,bert.encoder.layer.6.attention.self.value,Linear,weight,"[448, 768]",344064,209920,0.3898809552192688
106,bert.encoder.layer.6.attention.self.value,Linear,bias,[448],448,352,0.21428567171096802
107,bert.encoder.layer.6.attention.output.dense,Linear,weight,"[768, 448]",344064,199680,0.4196428656578064
108,bert.encoder.layer.6.attention.output.dense,Linear,bias,[768],768,768,0.0
109,bert.encoder.layer.6.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
110,bert.encoder.layer.6.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
111,bert.encoder.layer.6.intermediate.dense,Linear,weight,"[280, 768]",215040,215040,0.0
112,bert.encoder.layer.6.intermediate.dense,Linear,bias,[280],280,280,0.0
113,bert.encoder.layer.6.output.dense,Linear,weight,"[768, 280]",215040,215040,0.0
114,bert.encoder.layer.6.output.dense,Linear,bias,[768],768,768,0.0
115,bert.encoder.layer.6.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
116,bert.encoder.layer.6.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
117,bert.encoder.layer.7.attention.self.query,Linear,weight,"[448, 768]",344064,201728,0.413690447807312
118,bert.encoder.layer.7.attention.self.query,Linear,bias,[448],448,416,0.07142853736877441
119,bert.encoder.layer.7.attention.self.key,Linear,weight,"[448, 768]",344064,237568,0.3095238208770752
120,bert.encoder.layer.7.attention.self.key,Linear,bias,[448],448,416,0.07142853736877441
121,bert.encoder.layer.7.attention.self.value,Linear,weight,"[448, 768]",344064,218112,0.3660714030265808
122,bert.encoder.layer.7.attention.self.value,Linear,bias,[448],448,352,0.21428567171096802
123,bert.encoder.layer.7.attention.output.dense,Linear,weight,"[768, 448]",344064,202752,0.4107142686843872
124,bert.encoder.layer.7.attention.output.dense,Linear,bias,[768],768,768,0.0
125,bert.encoder.layer.7.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
126,bert.encoder.layer.7.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
127,bert.encoder.layer.7.intermediate.dense,Linear,weight,"[211, 768]",162048,162048,0.0
128,bert.encoder.layer.7.intermediate.dense,Linear,bias,[211],211,211,0.0
129,bert.encoder.layer.7.output.dense,Linear,weight,"[768, 211]",162048,162048,0.0
130,bert.encoder.layer.7.output.dense,Linear,bias,[768],768,768,0.0
131,bert.encoder.layer.7.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
132,bert.encoder.layer.7.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
133,bert.encoder.layer.8.attention.self.query,Linear,weight,"[448, 768]",344064,186368,0.4583333134651184
134,bert.encoder.layer.8.attention.self.query,Linear,bias,[448],448,416,0.07142853736877441
135,bert.encoder.layer.8.attention.self.key,Linear,weight,"[448, 768]",344064,197632,0.425595223903656
136,bert.encoder.layer.8.attention.self.key,Linear,bias,[448],448,416,0.07142853736877441
137,bert.encoder.layer.8.attention.self.value,Linear,weight,"[448, 768]",344064,154624,0.550595223903656
138,bert.encoder.layer.8.attention.self.value,Linear,bias,[448],448,288,0.3571428060531616
139,bert.encoder.layer.8.attention.output.dense,Linear,weight,"[768, 448]",344064,148480,0.5684523582458496
140,bert.encoder.layer.8.attention.output.dense,Linear,bias,[768],768,768,0.0
141,bert.encoder.layer.8.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
142,bert.encoder.layer.8.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
143,bert.encoder.layer.8.intermediate.dense,Linear,weight,"[108, 768]",82944,82944,0.0
144,bert.encoder.layer.8.intermediate.dense,Linear,bias,[108],108,108,0.0
145,bert.encoder.layer.8.output.dense,Linear,weight,"[768, 108]",82944,82944,0.0
146,bert.encoder.layer.8.output.dense,Linear,bias,[768],768,768,0.0
147,bert.encoder.layer.8.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
148,bert.encoder.layer.8.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
149,bert.encoder.layer.9.attention.self.query,Linear,weight,"[320, 768]",245760,144384,0.41249996423721313
150,bert.encoder.layer.9.attention.self.query,Linear,bias,[320],320,288,0.09999996423721313
151,bert.encoder.layer.9.attention.self.key,Linear,weight,"[320, 768]",245760,155648,0.36666661500930786
152,bert.encoder.layer.9.attention.self.key,Linear,bias,[320],320,288,0.09999996423721313
153,bert.encoder.layer.9.attention.self.value,Linear,weight,"[320, 768]",245760,63488,0.7416666746139526
154,bert.encoder.layer.9.attention.self.value,Linear,bias,[320],320,160,0.5
155,bert.encoder.layer.9.attention.output.dense,Linear,weight,"[768, 320]",245760,65536,0.7333333492279053
156,bert.encoder.layer.9.attention.output.dense,Linear,bias,[768],768,704,0.08333331346511841
157,bert.encoder.layer.9.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
158,bert.encoder.layer.9.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
159,bert.encoder.layer.9.intermediate.dense,Linear,weight,"[53, 768]",40704,40704,5.960464477539063e-08
160,bert.encoder.layer.9.intermediate.dense,Linear,bias,[53],53,53,0.0
161,bert.encoder.layer.9.output.dense,Linear,weight,"[768, 53]",40704,40704,5.960464477539063e-08
162,bert.encoder.layer.9.output.dense,Linear,bias,[768],768,768,0.0
163,bert.encoder.layer.9.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
164,bert.encoder.layer.9.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
165,bert.encoder.layer.10.attention.self.query,Linear,weight,"[384, 768]",294912,158720,0.461805522441864
166,bert.encoder.layer.10.attention.self.query,Linear,bias,[384],384,320,0.16666662693023682
167,bert.encoder.layer.10.attention.self.key,Linear,weight,"[384, 768]",294912,158720,0.461805522441864
168,bert.encoder.layer.10.attention.self.key,Linear,bias,[384],384,320,0.16666662693023682
169,bert.encoder.layer.10.attention.self.value,Linear,weight,"[384, 768]",294912,77824,0.7361111044883728
170,bert.encoder.layer.10.attention.self.value,Linear,bias,[384],384,192,0.5
171,bert.encoder.layer.10.attention.output.dense,Linear,weight,"[768, 384]",294912,78848,0.7326388955116272
172,bert.encoder.layer.10.attention.output.dense,Linear,bias,[768],768,736,0.041666626930236816
173,bert.encoder.layer.10.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
174,bert.encoder.layer.10.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
175,bert.encoder.layer.10.intermediate.dense,Linear,weight,"[86, 768]",66048,66048,0.0
176,bert.encoder.layer.10.intermediate.dense,Linear,bias,[86],86,86,0.0
177,bert.encoder.layer.10.output.dense,Linear,weight,"[768, 86]",66048,66048,0.0
178,bert.encoder.layer.10.output.dense,Linear,bias,[768],768,768,0.0
179,bert.encoder.layer.10.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
180,bert.encoder.layer.10.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
181,bert.encoder.layer.11.attention.self.query,Linear,weight,"[384, 768]",294912,107520,0.6354166269302368
182,bert.encoder.layer.11.attention.self.query,Linear,bias,[384],384,256,0.3333333134651184
183,bert.encoder.layer.11.attention.self.key,Linear,weight,"[384, 768]",294912,118784,0.5972222089767456
184,bert.encoder.layer.11.attention.self.key,Linear,bias,[384],384,256,0.3333333134651184
185,bert.encoder.layer.11.attention.self.value,Linear,weight,"[384, 768]",294912,62464,0.7881944179534912
186,bert.encoder.layer.11.attention.self.value,Linear,bias,[384],384,192,0.5
187,bert.encoder.layer.11.attention.output.dense,Linear,weight,"[768, 384]",294912,54272,0.8159722089767456
188,bert.encoder.layer.11.attention.output.dense,Linear,bias,[768],768,672,0.125
189,bert.encoder.layer.11.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
190,bert.encoder.layer.11.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
191,bert.encoder.layer.11.intermediate.dense,Linear,weight,"[105, 768]",80640,80640,0.0
192,bert.encoder.layer.11.intermediate.dense,Linear,bias,[105],105,105,0.0
193,bert.encoder.layer.11.output.dense,Linear,weight,"[768, 105]",80640,80640,0.0
194,bert.encoder.layer.11.output.dense,Linear,bias,[768],768,768,0.0
195,bert.encoder.layer.11.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
196,bert.encoder.layer.11.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
197,qa_outputs,Linear,weight,"[2, 768]",1536,1536,0.0
198,qa_outputs,Linear,bias,[2],2,2,0.0
|