XP_layer_wise_sparsity_global_rate_0.00.md · vuiseng9/bert-base-squadv1-block-pruning-hybrid-filled-lt at main

	layer_id	layer_type	param_type	shape	nparam	nnz	sparsity
0	bert.embeddings.word_embeddings	Embedding	weight	[30522, 768]	23440896	23440896	0
1	bert.embeddings.position_embeddings	Embedding	weight	[512, 768]	393216	393216	0
2	bert.embeddings.token_type_embeddings	Embedding	weight	[2, 768]	1536	1536	0
3	bert.embeddings.LayerNorm	LayerNorm	weight	[768]	768	768	0
4	bert.embeddings.LayerNorm	LayerNorm	bias	[768]	768	768	0
5	bert.encoder.layer.0.attention.self.query	Linear	weight	[320, 768]	245760	245760	0
6	bert.encoder.layer.0.attention.self.query	Linear	bias	[320]	320	320	0
7	bert.encoder.layer.0.attention.self.key	Linear	weight	[320, 768]	245760	245760	0
8	bert.encoder.layer.0.attention.self.key	Linear	bias	[320]	320	320	0
9	bert.encoder.layer.0.attention.self.value	Linear	weight	[320, 768]	245760	245760	0
10	bert.encoder.layer.0.attention.self.value	Linear	bias	[320]	320	320	0
11	bert.encoder.layer.0.attention.output.dense	Linear	weight	[768, 320]	245760	245760	0
12	bert.encoder.layer.0.attention.output.dense	Linear	bias	[768]	768	768	0
13	bert.encoder.layer.0.attention.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
14	bert.encoder.layer.0.attention.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
15	bert.encoder.layer.0.intermediate.dense	Linear	weight	[185, 768]	142080	142080	0
16	bert.encoder.layer.0.intermediate.dense	Linear	bias	[185]	185	185	0
17	bert.encoder.layer.0.output.dense	Linear	weight	[768, 185]	142080	142080	0
18	bert.encoder.layer.0.output.dense	Linear	bias	[768]	768	768	0
19	bert.encoder.layer.0.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
20	bert.encoder.layer.0.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
21	bert.encoder.layer.1.attention.self.query	Linear	weight	[320, 768]	245760	245760	0
22	bert.encoder.layer.1.attention.self.query	Linear	bias	[320]	320	320	0
23	bert.encoder.layer.1.attention.self.key	Linear	weight	[320, 768]	245760	245760	0
24	bert.encoder.layer.1.attention.self.key	Linear	bias	[320]	320	320	0
25	bert.encoder.layer.1.attention.self.value	Linear	weight	[320, 768]	245760	245760	0
26	bert.encoder.layer.1.attention.self.value	Linear	bias	[320]	320	320	0
27	bert.encoder.layer.1.attention.output.dense	Linear	weight	[768, 320]	245760	245760	0
28	bert.encoder.layer.1.attention.output.dense	Linear	bias	[768]	768	768	0
29	bert.encoder.layer.1.attention.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
30	bert.encoder.layer.1.attention.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
31	bert.encoder.layer.1.intermediate.dense	Linear	weight	[315, 768]	241920	241920	0
32	bert.encoder.layer.1.intermediate.dense	Linear	bias	[315]	315	315	0
33	bert.encoder.layer.1.output.dense	Linear	weight	[768, 315]	241920	241920	0
34	bert.encoder.layer.1.output.dense	Linear	bias	[768]	768	768	0
35	bert.encoder.layer.1.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
36	bert.encoder.layer.1.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
37	bert.encoder.layer.2.attention.self.query	Linear	weight	[576, 768]	442368	442368	0
38	bert.encoder.layer.2.attention.self.query	Linear	bias	[576]	576	576	0
39	bert.encoder.layer.2.attention.self.key	Linear	weight	[576, 768]	442368	442368	0
40	bert.encoder.layer.2.attention.self.key	Linear	bias	[576]	576	576	0
41	bert.encoder.layer.2.attention.self.value	Linear	weight	[576, 768]	442368	442368	0
42	bert.encoder.layer.2.attention.self.value	Linear	bias	[576]	576	576	0
43	bert.encoder.layer.2.attention.output.dense	Linear	weight	[768, 576]	442368	442368	0
44	bert.encoder.layer.2.attention.output.dense	Linear	bias	[768]	768	768	0
45	bert.encoder.layer.2.attention.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
46	bert.encoder.layer.2.attention.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
47	bert.encoder.layer.2.intermediate.dense	Linear	weight	[339, 768]	260352	260352	0
48	bert.encoder.layer.2.intermediate.dense	Linear	bias	[339]	339	339	0
49	bert.encoder.layer.2.output.dense	Linear	weight	[768, 339]	260352	260352	0
50	bert.encoder.layer.2.output.dense	Linear	bias	[768]	768	768	0
51	bert.encoder.layer.2.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
52	bert.encoder.layer.2.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
53	bert.encoder.layer.3.attention.self.query	Linear	weight	[576, 768]	442368	442368	0
54	bert.encoder.layer.3.attention.self.query	Linear	bias	[576]	576	576	0
55	bert.encoder.layer.3.attention.self.key	Linear	weight	[576, 768]	442368	442368	0
56	bert.encoder.layer.3.attention.self.key	Linear	bias	[576]	576	576	0
57	bert.encoder.layer.3.attention.self.value	Linear	weight	[576, 768]	442368	442368	0
58	bert.encoder.layer.3.attention.self.value	Linear	bias	[576]	576	576	0
59	bert.encoder.layer.3.attention.output.dense	Linear	weight	[768, 576]	442368	442368	0
60	bert.encoder.layer.3.attention.output.dense	Linear	bias	[768]	768	768	0
61	bert.encoder.layer.3.attention.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
62	bert.encoder.layer.3.attention.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
63	bert.encoder.layer.3.intermediate.dense	Linear	weight	[368, 768]	282624	282624	0
64	bert.encoder.layer.3.intermediate.dense	Linear	bias	[368]	368	368	0
65	bert.encoder.layer.3.output.dense	Linear	weight	[768, 368]	282624	282624	0
66	bert.encoder.layer.3.output.dense	Linear	bias	[768]	768	768	0
67	bert.encoder.layer.3.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
68	bert.encoder.layer.3.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
69	bert.encoder.layer.4.attention.self.query	Linear	weight	[576, 768]	442368	442368	0
70	bert.encoder.layer.4.attention.self.query	Linear	bias	[576]	576	576	0
71	bert.encoder.layer.4.attention.self.key	Linear	weight	[576, 768]	442368	442368	0
72	bert.encoder.layer.4.attention.self.key	Linear	bias	[576]	576	576	0
73	bert.encoder.layer.4.attention.self.value	Linear	weight	[576, 768]	442368	442368	0
74	bert.encoder.layer.4.attention.self.value	Linear	bias	[576]	576	576	0
75	bert.encoder.layer.4.attention.output.dense	Linear	weight	[768, 576]	442368	442368	0
76	bert.encoder.layer.4.attention.output.dense	Linear	bias	[768]	768	768	0
77	bert.encoder.layer.4.attention.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
78	bert.encoder.layer.4.attention.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
79	bert.encoder.layer.4.intermediate.dense	Linear	weight	[386, 768]	296448	296448	0
80	bert.encoder.layer.4.intermediate.dense	Linear	bias	[386]	386	386	0
81	bert.encoder.layer.4.output.dense	Linear	weight	[768, 386]	296448	296448	0
82	bert.encoder.layer.4.output.dense	Linear	bias	[768]	768	768	0
83	bert.encoder.layer.4.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
84	bert.encoder.layer.4.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
85	bert.encoder.layer.5.attention.self.query	Linear	weight	[384, 768]	294912	294912	0
86	bert.encoder.layer.5.attention.self.query	Linear	bias	[384]	384	384	0
87	bert.encoder.layer.5.attention.self.key	Linear	weight	[384, 768]	294912	294912	0
88	bert.encoder.layer.5.attention.self.key	Linear	bias	[384]	384	384	0
89	bert.encoder.layer.5.attention.self.value	Linear	weight	[384, 768]	294912	294912	0
90	bert.encoder.layer.5.attention.self.value	Linear	bias	[384]	384	384	0
91	bert.encoder.layer.5.attention.output.dense	Linear	weight	[768, 384]	294912	294912	0
92	bert.encoder.layer.5.attention.output.dense	Linear	bias	[768]	768	768	0
93	bert.encoder.layer.5.attention.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
94	bert.encoder.layer.5.attention.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
95	bert.encoder.layer.5.intermediate.dense	Linear	weight	[336, 768]	258048	258048	0
96	bert.encoder.layer.5.intermediate.dense	Linear	bias	[336]	336	336	0
97	bert.encoder.layer.5.output.dense	Linear	weight	[768, 336]	258048	258048	0
98	bert.encoder.layer.5.output.dense	Linear	bias	[768]	768	768	0
99	bert.encoder.layer.5.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
100	bert.encoder.layer.5.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
101	bert.encoder.layer.6.attention.self.query	Linear	weight	[448, 768]	344064	344064	0
102	bert.encoder.layer.6.attention.self.query	Linear	bias	[448]	448	448	0
103	bert.encoder.layer.6.attention.self.key	Linear	weight	[448, 768]	344064	344064	0
104	bert.encoder.layer.6.attention.self.key	Linear	bias	[448]	448	448	0
105	bert.encoder.layer.6.attention.self.value	Linear	weight	[448, 768]	344064	344064	0
106	bert.encoder.layer.6.attention.self.value	Linear	bias	[448]	448	448	0
107	bert.encoder.layer.6.attention.output.dense	Linear	weight	[768, 448]	344064	344064	0
108	bert.encoder.layer.6.attention.output.dense	Linear	bias	[768]	768	768	0
109	bert.encoder.layer.6.attention.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
110	bert.encoder.layer.6.attention.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
111	bert.encoder.layer.6.intermediate.dense	Linear	weight	[280, 768]	215040	215040	0
112	bert.encoder.layer.6.intermediate.dense	Linear	bias	[280]	280	280	0
113	bert.encoder.layer.6.output.dense	Linear	weight	[768, 280]	215040	215040	0
114	bert.encoder.layer.6.output.dense	Linear	bias	[768]	768	768	0
115	bert.encoder.layer.6.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
116	bert.encoder.layer.6.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
117	bert.encoder.layer.7.attention.self.query	Linear	weight	[448, 768]	344064	344064	0
118	bert.encoder.layer.7.attention.self.query	Linear	bias	[448]	448	448	0
119	bert.encoder.layer.7.attention.self.key	Linear	weight	[448, 768]	344064	344064	0
120	bert.encoder.layer.7.attention.self.key	Linear	bias	[448]	448	448	0
121	bert.encoder.layer.7.attention.self.value	Linear	weight	[448, 768]	344064	344064	0
122	bert.encoder.layer.7.attention.self.value	Linear	bias	[448]	448	448	0
123	bert.encoder.layer.7.attention.output.dense	Linear	weight	[768, 448]	344064	344064	0
124	bert.encoder.layer.7.attention.output.dense	Linear	bias	[768]	768	768	0
125	bert.encoder.layer.7.attention.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
126	bert.encoder.layer.7.attention.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
127	bert.encoder.layer.7.intermediate.dense	Linear	weight	[211, 768]	162048	162048	0
128	bert.encoder.layer.7.intermediate.dense	Linear	bias	[211]	211	211	0
129	bert.encoder.layer.7.output.dense	Linear	weight	[768, 211]	162048	162048	0
130	bert.encoder.layer.7.output.dense	Linear	bias	[768]	768	768	0
131	bert.encoder.layer.7.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
132	bert.encoder.layer.7.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
133	bert.encoder.layer.8.attention.self.query	Linear	weight	[448, 768]	344064	344064	0
134	bert.encoder.layer.8.attention.self.query	Linear	bias	[448]	448	448	0
135	bert.encoder.layer.8.attention.self.key	Linear	weight	[448, 768]	344064	344064	0
136	bert.encoder.layer.8.attention.self.key	Linear	bias	[448]	448	448	0
137	bert.encoder.layer.8.attention.self.value	Linear	weight	[448, 768]	344064	344064	0
138	bert.encoder.layer.8.attention.self.value	Linear	bias	[448]	448	448	0
139	bert.encoder.layer.8.attention.output.dense	Linear	weight	[768, 448]	344064	344064	0
140	bert.encoder.layer.8.attention.output.dense	Linear	bias	[768]	768	768	0
141	bert.encoder.layer.8.attention.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
142	bert.encoder.layer.8.attention.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
143	bert.encoder.layer.8.intermediate.dense	Linear	weight	[108, 768]	82944	82944	0
144	bert.encoder.layer.8.intermediate.dense	Linear	bias	[108]	108	108	0
145	bert.encoder.layer.8.output.dense	Linear	weight	[768, 108]	82944	82944	0
146	bert.encoder.layer.8.output.dense	Linear	bias	[768]	768	768	0
147	bert.encoder.layer.8.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
148	bert.encoder.layer.8.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
149	bert.encoder.layer.9.attention.self.query	Linear	weight	[320, 768]	245760	245760	0
150	bert.encoder.layer.9.attention.self.query	Linear	bias	[320]	320	320	0
151	bert.encoder.layer.9.attention.self.key	Linear	weight	[320, 768]	245760	245760	0
152	bert.encoder.layer.9.attention.self.key	Linear	bias	[320]	320	320	0
153	bert.encoder.layer.9.attention.self.value	Linear	weight	[320, 768]	245760	245760	0
154	bert.encoder.layer.9.attention.self.value	Linear	bias	[320]	320	320	0
155	bert.encoder.layer.9.attention.output.dense	Linear	weight	[768, 320]	245760	245760	0
156	bert.encoder.layer.9.attention.output.dense	Linear	bias	[768]	768	768	0
157	bert.encoder.layer.9.attention.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
158	bert.encoder.layer.9.attention.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
159	bert.encoder.layer.9.intermediate.dense	Linear	weight	[53, 768]	40704	40704	5.96046e-08
160	bert.encoder.layer.9.intermediate.dense	Linear	bias	[53]	53	53	0
161	bert.encoder.layer.9.output.dense	Linear	weight	[768, 53]	40704	40704	5.96046e-08
162	bert.encoder.layer.9.output.dense	Linear	bias	[768]	768	768	0
163	bert.encoder.layer.9.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
164	bert.encoder.layer.9.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
165	bert.encoder.layer.10.attention.self.query	Linear	weight	[384, 768]	294912	294912	0
166	bert.encoder.layer.10.attention.self.query	Linear	bias	[384]	384	384	0
167	bert.encoder.layer.10.attention.self.key	Linear	weight	[384, 768]	294912	294912	0
168	bert.encoder.layer.10.attention.self.key	Linear	bias	[384]	384	384	0
169	bert.encoder.layer.10.attention.self.value	Linear	weight	[384, 768]	294912	294912	0
170	bert.encoder.layer.10.attention.self.value	Linear	bias	[384]	384	384	0
171	bert.encoder.layer.10.attention.output.dense	Linear	weight	[768, 384]	294912	294912	0
172	bert.encoder.layer.10.attention.output.dense	Linear	bias	[768]	768	768	0
173	bert.encoder.layer.10.attention.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
174	bert.encoder.layer.10.attention.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
175	bert.encoder.layer.10.intermediate.dense	Linear	weight	[86, 768]	66048	66048	0
176	bert.encoder.layer.10.intermediate.dense	Linear	bias	[86]	86	86	0
177	bert.encoder.layer.10.output.dense	Linear	weight	[768, 86]	66048	66048	0
178	bert.encoder.layer.10.output.dense	Linear	bias	[768]	768	768	0
179	bert.encoder.layer.10.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
180	bert.encoder.layer.10.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
181	bert.encoder.layer.11.attention.self.query	Linear	weight	[384, 768]	294912	294912	0
182	bert.encoder.layer.11.attention.self.query	Linear	bias	[384]	384	384	0
183	bert.encoder.layer.11.attention.self.key	Linear	weight	[384, 768]	294912	294912	0
184	bert.encoder.layer.11.attention.self.key	Linear	bias	[384]	384	384	0
185	bert.encoder.layer.11.attention.self.value	Linear	weight	[384, 768]	294912	294912	0
186	bert.encoder.layer.11.attention.self.value	Linear	bias	[384]	384	384	0
187	bert.encoder.layer.11.attention.output.dense	Linear	weight	[768, 384]	294912	294912	0
188	bert.encoder.layer.11.attention.output.dense	Linear	bias	[768]	768	768	0
189	bert.encoder.layer.11.attention.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
190	bert.encoder.layer.11.attention.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
191	bert.encoder.layer.11.intermediate.dense	Linear	weight	[105, 768]	80640	80640	0
192	bert.encoder.layer.11.intermediate.dense	Linear	bias	[105]	105	105	0
193	bert.encoder.layer.11.output.dense	Linear	weight	[768, 105]	80640	80640	0
194	bert.encoder.layer.11.output.dense	Linear	bias	[768]	768	768	0
195	bert.encoder.layer.11.output.LayerNorm	LayerNorm	weight	[768]	768	768	0
196	bert.encoder.layer.11.output.LayerNorm	LayerNorm	bias	[768]	768	768	0
197	qa_outputs	Linear	weight	[2, 768]	1536	1536	0
198	qa_outputs	Linear	bias	[2]	2	2	0