d-matrix commited on
Commit
9a5ba8a
·
verified ·
1 Parent(s): 0ffcde2

Delete configs/BASIC.yaml

Browse files
Files changed (1) hide show
  1. configs/BASIC.yaml +0 -1072
configs/BASIC.yaml DELETED
@@ -1,1072 +0,0 @@
1
- model:
2
- lm_head:
3
- accum_format: SAME
4
- approximation_function: NONE
5
- input_format: SAME
6
- instance: Linear
7
- output_format: SAME
8
- weight_format: SAME
9
- weight_sparseness: DENSE
10
- model.decoder.final_layer_norm:
11
- approximation_function: NONE
12
- bias_format: SAME
13
- input_format: SAME
14
- instance: LayerNorm
15
- output_format: SAME
16
- weight_format: SAME
17
- model.decoder.layers.0.activation_fn:
18
- approximation_function: NONE
19
- input_format: SAME
20
- instance: ReLU
21
- output_format: SAME
22
- model.decoder.layers.0.dropout:
23
- approximation_function: NONE
24
- input_format: SAME
25
- instance: Dropout
26
- output_format: SAME
27
- model.decoder.layers.0.fc1:
28
- accum_format: SAME
29
- approximation_function: NONE
30
- bias_format: SAME
31
- input_format: BFP[8|8]{64,-1}(SN)
32
- instance: Linear
33
- output_format: SAME
34
- weight_format: BFP[8|8]{64,-1}(SN)
35
- weight_sparseness: DENSE
36
- model.decoder.layers.0.fc2:
37
- accum_format: SAME
38
- approximation_function: NONE
39
- bias_format: SAME
40
- input_format: BFP[8|8]{64,-1}(SN)
41
- instance: Linear
42
- output_format: SAME
43
- weight_format: BFP[8|8]{64,-1}(SN)
44
- weight_sparseness: DENSE
45
- model.decoder.layers.0.final_layer_norm:
46
- approximation_function: NONE
47
- bias_format: SAME
48
- input_format: SAME
49
- instance: LayerNorm
50
- output_format: SAME
51
- weight_format: SAME
52
- model.decoder.layers.0.self_attn.dropout:
53
- approximation_function: NONE
54
- input_format: SAME
55
- instance: Dropout
56
- output_format: BFP[8|8]{64,-1}(SN)
57
- model.decoder.layers.0.self_attn.k_proj:
58
- accum_format: SAME
59
- approximation_function: NONE
60
- bias_format: SAME
61
- input_format: BFP[8|8]{64,-1}(SN)
62
- instance: Linear
63
- output_format: BFP[8|8]{64,-1}(SN)
64
- weight_format: BFP[8|8]{64,-1}(SN)
65
- weight_sparseness: DENSE
66
- model.decoder.layers.0.self_attn.out_proj:
67
- accum_format: SAME
68
- approximation_function: NONE
69
- bias_format: SAME
70
- input_format: BFP[8|8]{64,-1}(SN)
71
- instance: Linear
72
- output_format: SAME
73
- weight_format: BFP[8|8]{64,-1}(SN)
74
- weight_sparseness: DENSE
75
- model.decoder.layers.0.self_attn.q_proj:
76
- accum_format: SAME
77
- approximation_function: NONE
78
- bias_format: SAME
79
- input_format: BFP[8|8]{64,-1}(SN)
80
- instance: Linear
81
- output_format: BFP[8|8]{64,-1}(SN)
82
- weight_format: BFP[8|8]{64,-1}(SN)
83
- weight_sparseness: DENSE
84
- model.decoder.layers.0.self_attn.softmax:
85
- approximation_function: NONE
86
- input_format: SAME
87
- instance: Softmax
88
- output_format: SAME
89
- model.decoder.layers.0.self_attn.v_proj:
90
- accum_format: SAME
91
- approximation_function: NONE
92
- bias_format: SAME
93
- input_format: BFP[8|8]{64,-1}(SN)
94
- instance: Linear
95
- output_format: BFP[8|8]{64,-1}(SN)
96
- weight_format: BFP[8|8]{64,-1}(SN)
97
- weight_sparseness: DENSE
98
- model.decoder.layers.0.self_attn_layer_norm:
99
- approximation_function: NONE
100
- bias_format: SAME
101
- input_format: SAME
102
- instance: LayerNorm
103
- output_format: SAME
104
- weight_format: SAME
105
- model.decoder.layers.1.activation_fn:
106
- approximation_function: NONE
107
- input_format: SAME
108
- instance: ReLU
109
- output_format: SAME
110
- model.decoder.layers.1.dropout:
111
- approximation_function: NONE
112
- input_format: SAME
113
- instance: Dropout
114
- output_format: SAME
115
- model.decoder.layers.1.fc1:
116
- accum_format: SAME
117
- approximation_function: NONE
118
- bias_format: SAME
119
- input_format: BFP[8|8]{64,-1}(SN)
120
- instance: Linear
121
- output_format: SAME
122
- weight_format: BFP[8|8]{64,-1}(SN)
123
- weight_sparseness: DENSE
124
- model.decoder.layers.1.fc2:
125
- accum_format: SAME
126
- approximation_function: NONE
127
- bias_format: SAME
128
- input_format: BFP[8|8]{64,-1}(SN)
129
- instance: Linear
130
- output_format: SAME
131
- weight_format: BFP[8|8]{64,-1}(SN)
132
- weight_sparseness: DENSE
133
- model.decoder.layers.1.final_layer_norm:
134
- approximation_function: NONE
135
- bias_format: SAME
136
- input_format: SAME
137
- instance: LayerNorm
138
- output_format: SAME
139
- weight_format: SAME
140
- model.decoder.layers.1.self_attn.dropout:
141
- approximation_function: NONE
142
- input_format: SAME
143
- instance: Dropout
144
- output_format: BFP[8|8]{64,-1}(SN)
145
- model.decoder.layers.1.self_attn.k_proj:
146
- accum_format: SAME
147
- approximation_function: NONE
148
- bias_format: SAME
149
- input_format: BFP[8|8]{64,-1}(SN)
150
- instance: Linear
151
- output_format: BFP[8|8]{64,-1}(SN)
152
- weight_format: BFP[8|8]{64,-1}(SN)
153
- weight_sparseness: DENSE
154
- model.decoder.layers.1.self_attn.out_proj:
155
- accum_format: SAME
156
- approximation_function: NONE
157
- bias_format: SAME
158
- input_format: BFP[8|8]{64,-1}(SN)
159
- instance: Linear
160
- output_format: SAME
161
- weight_format: BFP[8|8]{64,-1}(SN)
162
- weight_sparseness: DENSE
163
- model.decoder.layers.1.self_attn.q_proj:
164
- accum_format: SAME
165
- approximation_function: NONE
166
- bias_format: SAME
167
- input_format: BFP[8|8]{64,-1}(SN)
168
- instance: Linear
169
- output_format: BFP[8|8]{64,-1}(SN)
170
- weight_format: BFP[8|8]{64,-1}(SN)
171
- weight_sparseness: DENSE
172
- model.decoder.layers.1.self_attn.softmax:
173
- approximation_function: NONE
174
- input_format: SAME
175
- instance: Softmax
176
- output_format: SAME
177
- model.decoder.layers.1.self_attn.v_proj:
178
- accum_format: SAME
179
- approximation_function: NONE
180
- bias_format: SAME
181
- input_format: BFP[8|8]{64,-1}(SN)
182
- instance: Linear
183
- output_format: BFP[8|8]{64,-1}(SN)
184
- weight_format: BFP[8|8]{64,-1}(SN)
185
- weight_sparseness: DENSE
186
- model.decoder.layers.1.self_attn_layer_norm:
187
- approximation_function: NONE
188
- bias_format: SAME
189
- input_format: SAME
190
- instance: LayerNorm
191
- output_format: SAME
192
- weight_format: SAME
193
- model.decoder.layers.10.activation_fn:
194
- approximation_function: NONE
195
- input_format: SAME
196
- instance: ReLU
197
- output_format: SAME
198
- model.decoder.layers.10.dropout:
199
- approximation_function: NONE
200
- input_format: SAME
201
- instance: Dropout
202
- output_format: SAME
203
- model.decoder.layers.10.fc1:
204
- accum_format: SAME
205
- approximation_function: NONE
206
- bias_format: SAME
207
- input_format: BFP[8|8]{64,-1}(SN)
208
- instance: Linear
209
- output_format: SAME
210
- weight_format: BFP[8|8]{64,-1}(SN)
211
- weight_sparseness: DENSE
212
- model.decoder.layers.10.fc2:
213
- accum_format: SAME
214
- approximation_function: NONE
215
- bias_format: SAME
216
- input_format: BFP[8|8]{64,-1}(SN)
217
- instance: Linear
218
- output_format: SAME
219
- weight_format: BFP[8|8]{64,-1}(SN)
220
- weight_sparseness: DENSE
221
- model.decoder.layers.10.final_layer_norm:
222
- approximation_function: NONE
223
- bias_format: SAME
224
- input_format: SAME
225
- instance: LayerNorm
226
- output_format: SAME
227
- weight_format: SAME
228
- model.decoder.layers.10.self_attn.dropout:
229
- approximation_function: NONE
230
- input_format: SAME
231
- instance: Dropout
232
- output_format: BFP[8|8]{64,-1}(SN)
233
- model.decoder.layers.10.self_attn.k_proj:
234
- accum_format: SAME
235
- approximation_function: NONE
236
- bias_format: SAME
237
- input_format: BFP[8|8]{64,-1}(SN)
238
- instance: Linear
239
- output_format: BFP[8|8]{64,-1}(SN)
240
- weight_format: BFP[8|8]{64,-1}(SN)
241
- weight_sparseness: DENSE
242
- model.decoder.layers.10.self_attn.out_proj:
243
- accum_format: SAME
244
- approximation_function: NONE
245
- bias_format: SAME
246
- input_format: BFP[8|8]{64,-1}(SN)
247
- instance: Linear
248
- output_format: SAME
249
- weight_format: BFP[8|8]{64,-1}(SN)
250
- weight_sparseness: DENSE
251
- model.decoder.layers.10.self_attn.q_proj:
252
- accum_format: SAME
253
- approximation_function: NONE
254
- bias_format: SAME
255
- input_format: BFP[8|8]{64,-1}(SN)
256
- instance: Linear
257
- output_format: BFP[8|8]{64,-1}(SN)
258
- weight_format: BFP[8|8]{64,-1}(SN)
259
- weight_sparseness: DENSE
260
- model.decoder.layers.10.self_attn.softmax:
261
- approximation_function: NONE
262
- input_format: SAME
263
- instance: Softmax
264
- output_format: SAME
265
- model.decoder.layers.10.self_attn.v_proj:
266
- accum_format: SAME
267
- approximation_function: NONE
268
- bias_format: SAME
269
- input_format: BFP[8|8]{64,-1}(SN)
270
- instance: Linear
271
- output_format: BFP[8|8]{64,-1}(SN)
272
- weight_format: BFP[8|8]{64,-1}(SN)
273
- weight_sparseness: DENSE
274
- model.decoder.layers.10.self_attn_layer_norm:
275
- approximation_function: NONE
276
- bias_format: SAME
277
- input_format: SAME
278
- instance: LayerNorm
279
- output_format: SAME
280
- weight_format: SAME
281
- model.decoder.layers.11.activation_fn:
282
- approximation_function: NONE
283
- input_format: SAME
284
- instance: ReLU
285
- output_format: SAME
286
- model.decoder.layers.11.dropout:
287
- approximation_function: NONE
288
- input_format: SAME
289
- instance: Dropout
290
- output_format: SAME
291
- model.decoder.layers.11.fc1:
292
- accum_format: SAME
293
- approximation_function: NONE
294
- bias_format: SAME
295
- input_format: BFP[8|8]{64,-1}(SN)
296
- instance: Linear
297
- output_format: SAME
298
- weight_format: BFP[8|8]{64,-1}(SN)
299
- weight_sparseness: DENSE
300
- model.decoder.layers.11.fc2:
301
- accum_format: SAME
302
- approximation_function: NONE
303
- bias_format: SAME
304
- input_format: BFP[8|8]{64,-1}(SN)
305
- instance: Linear
306
- output_format: SAME
307
- weight_format: BFP[8|8]{64,-1}(SN)
308
- weight_sparseness: DENSE
309
- model.decoder.layers.11.final_layer_norm:
310
- approximation_function: NONE
311
- bias_format: SAME
312
- input_format: SAME
313
- instance: LayerNorm
314
- output_format: SAME
315
- weight_format: SAME
316
- model.decoder.layers.11.self_attn.dropout:
317
- approximation_function: NONE
318
- input_format: SAME
319
- instance: Dropout
320
- output_format: BFP[8|8]{64,-1}(SN)
321
- model.decoder.layers.11.self_attn.k_proj:
322
- accum_format: SAME
323
- approximation_function: NONE
324
- bias_format: SAME
325
- input_format: BFP[8|8]{64,-1}(SN)
326
- instance: Linear
327
- output_format: BFP[8|8]{64,-1}(SN)
328
- weight_format: BFP[8|8]{64,-1}(SN)
329
- weight_sparseness: DENSE
330
- model.decoder.layers.11.self_attn.out_proj:
331
- accum_format: SAME
332
- approximation_function: NONE
333
- bias_format: SAME
334
- input_format: BFP[8|8]{64,-1}(SN)
335
- instance: Linear
336
- output_format: SAME
337
- weight_format: BFP[8|8]{64,-1}(SN)
338
- weight_sparseness: DENSE
339
- model.decoder.layers.11.self_attn.q_proj:
340
- accum_format: SAME
341
- approximation_function: NONE
342
- bias_format: SAME
343
- input_format: BFP[8|8]{64,-1}(SN)
344
- instance: Linear
345
- output_format: BFP[8|8]{64,-1}(SN)
346
- weight_format: BFP[8|8]{64,-1}(SN)
347
- weight_sparseness: DENSE
348
- model.decoder.layers.11.self_attn.softmax:
349
- approximation_function: NONE
350
- input_format: SAME
351
- instance: Softmax
352
- output_format: SAME
353
- model.decoder.layers.11.self_attn.v_proj:
354
- accum_format: SAME
355
- approximation_function: NONE
356
- bias_format: SAME
357
- input_format: BFP[8|8]{64,-1}(SN)
358
- instance: Linear
359
- output_format: BFP[8|8]{64,-1}(SN)
360
- weight_format: BFP[8|8]{64,-1}(SN)
361
- weight_sparseness: DENSE
362
- model.decoder.layers.11.self_attn_layer_norm:
363
- approximation_function: NONE
364
- bias_format: SAME
365
- input_format: SAME
366
- instance: LayerNorm
367
- output_format: SAME
368
- weight_format: SAME
369
- model.decoder.layers.2.activation_fn:
370
- approximation_function: NONE
371
- input_format: SAME
372
- instance: ReLU
373
- output_format: SAME
374
- model.decoder.layers.2.dropout:
375
- approximation_function: NONE
376
- input_format: SAME
377
- instance: Dropout
378
- output_format: SAME
379
- model.decoder.layers.2.fc1:
380
- accum_format: SAME
381
- approximation_function: NONE
382
- bias_format: SAME
383
- input_format: BFP[8|8]{64,-1}(SN)
384
- instance: Linear
385
- output_format: SAME
386
- weight_format: BFP[8|8]{64,-1}(SN)
387
- weight_sparseness: DENSE
388
- model.decoder.layers.2.fc2:
389
- accum_format: SAME
390
- approximation_function: NONE
391
- bias_format: SAME
392
- input_format: BFP[8|8]{64,-1}(SN)
393
- instance: Linear
394
- output_format: SAME
395
- weight_format: BFP[8|8]{64,-1}(SN)
396
- weight_sparseness: DENSE
397
- model.decoder.layers.2.final_layer_norm:
398
- approximation_function: NONE
399
- bias_format: SAME
400
- input_format: SAME
401
- instance: LayerNorm
402
- output_format: SAME
403
- weight_format: SAME
404
- model.decoder.layers.2.self_attn.dropout:
405
- approximation_function: NONE
406
- input_format: SAME
407
- instance: Dropout
408
- output_format: BFP[8|8]{64,-1}(SN)
409
- model.decoder.layers.2.self_attn.k_proj:
410
- accum_format: SAME
411
- approximation_function: NONE
412
- bias_format: SAME
413
- input_format: BFP[8|8]{64,-1}(SN)
414
- instance: Linear
415
- output_format: BFP[8|8]{64,-1}(SN)
416
- weight_format: BFP[8|8]{64,-1}(SN)
417
- weight_sparseness: DENSE
418
- model.decoder.layers.2.self_attn.out_proj:
419
- accum_format: SAME
420
- approximation_function: NONE
421
- bias_format: SAME
422
- input_format: BFP[8|8]{64,-1}(SN)
423
- instance: Linear
424
- output_format: SAME
425
- weight_format: BFP[8|8]{64,-1}(SN)
426
- weight_sparseness: DENSE
427
- model.decoder.layers.2.self_attn.q_proj:
428
- accum_format: SAME
429
- approximation_function: NONE
430
- bias_format: SAME
431
- input_format: BFP[8|8]{64,-1}(SN)
432
- instance: Linear
433
- output_format: BFP[8|8]{64,-1}(SN)
434
- weight_format: BFP[8|8]{64,-1}(SN)
435
- weight_sparseness: DENSE
436
- model.decoder.layers.2.self_attn.softmax:
437
- approximation_function: NONE
438
- input_format: SAME
439
- instance: Softmax
440
- output_format: SAME
441
- model.decoder.layers.2.self_attn.v_proj:
442
- accum_format: SAME
443
- approximation_function: NONE
444
- bias_format: SAME
445
- input_format: BFP[8|8]{64,-1}(SN)
446
- instance: Linear
447
- output_format: BFP[8|8]{64,-1}(SN)
448
- weight_format: BFP[8|8]{64,-1}(SN)
449
- weight_sparseness: DENSE
450
- model.decoder.layers.2.self_attn_layer_norm:
451
- approximation_function: NONE
452
- bias_format: SAME
453
- input_format: SAME
454
- instance: LayerNorm
455
- output_format: SAME
456
- weight_format: SAME
457
- model.decoder.layers.3.activation_fn:
458
- approximation_function: NONE
459
- input_format: SAME
460
- instance: ReLU
461
- output_format: SAME
462
- model.decoder.layers.3.dropout:
463
- approximation_function: NONE
464
- input_format: SAME
465
- instance: Dropout
466
- output_format: SAME
467
- model.decoder.layers.3.fc1:
468
- accum_format: SAME
469
- approximation_function: NONE
470
- bias_format: SAME
471
- input_format: BFP[8|8]{64,-1}(SN)
472
- instance: Linear
473
- output_format: SAME
474
- weight_format: BFP[8|8]{64,-1}(SN)
475
- weight_sparseness: DENSE
476
- model.decoder.layers.3.fc2:
477
- accum_format: SAME
478
- approximation_function: NONE
479
- bias_format: SAME
480
- input_format: BFP[8|8]{64,-1}(SN)
481
- instance: Linear
482
- output_format: SAME
483
- weight_format: BFP[8|8]{64,-1}(SN)
484
- weight_sparseness: DENSE
485
- model.decoder.layers.3.final_layer_norm:
486
- approximation_function: NONE
487
- bias_format: SAME
488
- input_format: SAME
489
- instance: LayerNorm
490
- output_format: SAME
491
- weight_format: SAME
492
- model.decoder.layers.3.self_attn.dropout:
493
- approximation_function: NONE
494
- input_format: SAME
495
- instance: Dropout
496
- output_format: BFP[8|8]{64,-1}(SN)
497
- model.decoder.layers.3.self_attn.k_proj:
498
- accum_format: SAME
499
- approximation_function: NONE
500
- bias_format: SAME
501
- input_format: BFP[8|8]{64,-1}(SN)
502
- instance: Linear
503
- output_format: BFP[8|8]{64,-1}(SN)
504
- weight_format: BFP[8|8]{64,-1}(SN)
505
- weight_sparseness: DENSE
506
- model.decoder.layers.3.self_attn.out_proj:
507
- accum_format: SAME
508
- approximation_function: NONE
509
- bias_format: SAME
510
- input_format: BFP[8|8]{64,-1}(SN)
511
- instance: Linear
512
- output_format: SAME
513
- weight_format: BFP[8|8]{64,-1}(SN)
514
- weight_sparseness: DENSE
515
- model.decoder.layers.3.self_attn.q_proj:
516
- accum_format: SAME
517
- approximation_function: NONE
518
- bias_format: SAME
519
- input_format: BFP[8|8]{64,-1}(SN)
520
- instance: Linear
521
- output_format: BFP[8|8]{64,-1}(SN)
522
- weight_format: BFP[8|8]{64,-1}(SN)
523
- weight_sparseness: DENSE
524
- model.decoder.layers.3.self_attn.softmax:
525
- approximation_function: NONE
526
- input_format: SAME
527
- instance: Softmax
528
- output_format: SAME
529
- model.decoder.layers.3.self_attn.v_proj:
530
- accum_format: SAME
531
- approximation_function: NONE
532
- bias_format: SAME
533
- input_format: BFP[8|8]{64,-1}(SN)
534
- instance: Linear
535
- output_format: BFP[8|8]{64,-1}(SN)
536
- weight_format: BFP[8|8]{64,-1}(SN)
537
- weight_sparseness: DENSE
538
- model.decoder.layers.3.self_attn_layer_norm:
539
- approximation_function: NONE
540
- bias_format: SAME
541
- input_format: SAME
542
- instance: LayerNorm
543
- output_format: SAME
544
- weight_format: SAME
545
- model.decoder.layers.4.activation_fn:
546
- approximation_function: NONE
547
- input_format: SAME
548
- instance: ReLU
549
- output_format: SAME
550
- model.decoder.layers.4.dropout:
551
- approximation_function: NONE
552
- input_format: SAME
553
- instance: Dropout
554
- output_format: SAME
555
- model.decoder.layers.4.fc1:
556
- accum_format: SAME
557
- approximation_function: NONE
558
- bias_format: SAME
559
- input_format: BFP[8|8]{64,-1}(SN)
560
- instance: Linear
561
- output_format: SAME
562
- weight_format: BFP[8|8]{64,-1}(SN)
563
- weight_sparseness: DENSE
564
- model.decoder.layers.4.fc2:
565
- accum_format: SAME
566
- approximation_function: NONE
567
- bias_format: SAME
568
- input_format: BFP[8|8]{64,-1}(SN)
569
- instance: Linear
570
- output_format: SAME
571
- weight_format: BFP[8|8]{64,-1}(SN)
572
- weight_sparseness: DENSE
573
- model.decoder.layers.4.final_layer_norm:
574
- approximation_function: NONE
575
- bias_format: SAME
576
- input_format: SAME
577
- instance: LayerNorm
578
- output_format: SAME
579
- weight_format: SAME
580
- model.decoder.layers.4.self_attn.dropout:
581
- approximation_function: NONE
582
- input_format: SAME
583
- instance: Dropout
584
- output_format: BFP[8|8]{64,-1}(SN)
585
- model.decoder.layers.4.self_attn.k_proj:
586
- accum_format: SAME
587
- approximation_function: NONE
588
- bias_format: SAME
589
- input_format: BFP[8|8]{64,-1}(SN)
590
- instance: Linear
591
- output_format: BFP[8|8]{64,-1}(SN)
592
- weight_format: BFP[8|8]{64,-1}(SN)
593
- weight_sparseness: DENSE
594
- model.decoder.layers.4.self_attn.out_proj:
595
- accum_format: SAME
596
- approximation_function: NONE
597
- bias_format: SAME
598
- input_format: BFP[8|8]{64,-1}(SN)
599
- instance: Linear
600
- output_format: SAME
601
- weight_format: BFP[8|8]{64,-1}(SN)
602
- weight_sparseness: DENSE
603
- model.decoder.layers.4.self_attn.q_proj:
604
- accum_format: SAME
605
- approximation_function: NONE
606
- bias_format: SAME
607
- input_format: BFP[8|8]{64,-1}(SN)
608
- instance: Linear
609
- output_format: BFP[8|8]{64,-1}(SN)
610
- weight_format: BFP[8|8]{64,-1}(SN)
611
- weight_sparseness: DENSE
612
- model.decoder.layers.4.self_attn.softmax:
613
- approximation_function: NONE
614
- input_format: SAME
615
- instance: Softmax
616
- output_format: SAME
617
- model.decoder.layers.4.self_attn.v_proj:
618
- accum_format: SAME
619
- approximation_function: NONE
620
- bias_format: SAME
621
- input_format: BFP[8|8]{64,-1}(SN)
622
- instance: Linear
623
- output_format: BFP[8|8]{64,-1}(SN)
624
- weight_format: BFP[8|8]{64,-1}(SN)
625
- weight_sparseness: DENSE
626
- model.decoder.layers.4.self_attn_layer_norm:
627
- approximation_function: NONE
628
- bias_format: SAME
629
- input_format: SAME
630
- instance: LayerNorm
631
- output_format: SAME
632
- weight_format: SAME
633
- model.decoder.layers.5.activation_fn:
634
- approximation_function: NONE
635
- input_format: SAME
636
- instance: ReLU
637
- output_format: SAME
638
- model.decoder.layers.5.dropout:
639
- approximation_function: NONE
640
- input_format: SAME
641
- instance: Dropout
642
- output_format: SAME
643
- model.decoder.layers.5.fc1:
644
- accum_format: SAME
645
- approximation_function: NONE
646
- bias_format: SAME
647
- input_format: BFP[8|8]{64,-1}(SN)
648
- instance: Linear
649
- output_format: SAME
650
- weight_format: BFP[8|8]{64,-1}(SN)
651
- weight_sparseness: DENSE
652
- model.decoder.layers.5.fc2:
653
- accum_format: SAME
654
- approximation_function: NONE
655
- bias_format: SAME
656
- input_format: BFP[8|8]{64,-1}(SN)
657
- instance: Linear
658
- output_format: SAME
659
- weight_format: BFP[8|8]{64,-1}(SN)
660
- weight_sparseness: DENSE
661
- model.decoder.layers.5.final_layer_norm:
662
- approximation_function: NONE
663
- bias_format: SAME
664
- input_format: SAME
665
- instance: LayerNorm
666
- output_format: SAME
667
- weight_format: SAME
668
- model.decoder.layers.5.self_attn.dropout:
669
- approximation_function: NONE
670
- input_format: SAME
671
- instance: Dropout
672
- output_format: BFP[8|8]{64,-1}(SN)
673
- model.decoder.layers.5.self_attn.k_proj:
674
- accum_format: SAME
675
- approximation_function: NONE
676
- bias_format: SAME
677
- input_format: BFP[8|8]{64,-1}(SN)
678
- instance: Linear
679
- output_format: BFP[8|8]{64,-1}(SN)
680
- weight_format: BFP[8|8]{64,-1}(SN)
681
- weight_sparseness: DENSE
682
- model.decoder.layers.5.self_attn.out_proj:
683
- accum_format: SAME
684
- approximation_function: NONE
685
- bias_format: SAME
686
- input_format: BFP[8|8]{64,-1}(SN)
687
- instance: Linear
688
- output_format: SAME
689
- weight_format: BFP[8|8]{64,-1}(SN)
690
- weight_sparseness: DENSE
691
- model.decoder.layers.5.self_attn.q_proj:
692
- accum_format: SAME
693
- approximation_function: NONE
694
- bias_format: SAME
695
- input_format: BFP[8|8]{64,-1}(SN)
696
- instance: Linear
697
- output_format: BFP[8|8]{64,-1}(SN)
698
- weight_format: BFP[8|8]{64,-1}(SN)
699
- weight_sparseness: DENSE
700
- model.decoder.layers.5.self_attn.softmax:
701
- approximation_function: NONE
702
- input_format: SAME
703
- instance: Softmax
704
- output_format: SAME
705
- model.decoder.layers.5.self_attn.v_proj:
706
- accum_format: SAME
707
- approximation_function: NONE
708
- bias_format: SAME
709
- input_format: BFP[8|8]{64,-1}(SN)
710
- instance: Linear
711
- output_format: BFP[8|8]{64,-1}(SN)
712
- weight_format: BFP[8|8]{64,-1}(SN)
713
- weight_sparseness: DENSE
714
- model.decoder.layers.5.self_attn_layer_norm:
715
- approximation_function: NONE
716
- bias_format: SAME
717
- input_format: SAME
718
- instance: LayerNorm
719
- output_format: SAME
720
- weight_format: SAME
721
- model.decoder.layers.6.activation_fn:
722
- approximation_function: NONE
723
- input_format: SAME
724
- instance: ReLU
725
- output_format: SAME
726
- model.decoder.layers.6.dropout:
727
- approximation_function: NONE
728
- input_format: SAME
729
- instance: Dropout
730
- output_format: SAME
731
- model.decoder.layers.6.fc1:
732
- accum_format: SAME
733
- approximation_function: NONE
734
- bias_format: SAME
735
- input_format: BFP[8|8]{64,-1}(SN)
736
- instance: Linear
737
- output_format: SAME
738
- weight_format: BFP[8|8]{64,-1}(SN)
739
- weight_sparseness: DENSE
740
- model.decoder.layers.6.fc2:
741
- accum_format: SAME
742
- approximation_function: NONE
743
- bias_format: SAME
744
- input_format: BFP[8|8]{64,-1}(SN)
745
- instance: Linear
746
- output_format: SAME
747
- weight_format: BFP[8|8]{64,-1}(SN)
748
- weight_sparseness: DENSE
749
- model.decoder.layers.6.final_layer_norm:
750
- approximation_function: NONE
751
- bias_format: SAME
752
- input_format: SAME
753
- instance: LayerNorm
754
- output_format: SAME
755
- weight_format: SAME
756
- model.decoder.layers.6.self_attn.dropout:
757
- approximation_function: NONE
758
- input_format: SAME
759
- instance: Dropout
760
- output_format: BFP[8|8]{64,-1}(SN)
761
- model.decoder.layers.6.self_attn.k_proj:
762
- accum_format: SAME
763
- approximation_function: NONE
764
- bias_format: SAME
765
- input_format: BFP[8|8]{64,-1}(SN)
766
- instance: Linear
767
- output_format: BFP[8|8]{64,-1}(SN)
768
- weight_format: BFP[8|8]{64,-1}(SN)
769
- weight_sparseness: DENSE
770
- model.decoder.layers.6.self_attn.out_proj:
771
- accum_format: SAME
772
- approximation_function: NONE
773
- bias_format: SAME
774
- input_format: BFP[8|8]{64,-1}(SN)
775
- instance: Linear
776
- output_format: SAME
777
- weight_format: BFP[8|8]{64,-1}(SN)
778
- weight_sparseness: DENSE
779
- model.decoder.layers.6.self_attn.q_proj:
780
- accum_format: SAME
781
- approximation_function: NONE
782
- bias_format: SAME
783
- input_format: BFP[8|8]{64,-1}(SN)
784
- instance: Linear
785
- output_format: BFP[8|8]{64,-1}(SN)
786
- weight_format: BFP[8|8]{64,-1}(SN)
787
- weight_sparseness: DENSE
788
- model.decoder.layers.6.self_attn.softmax:
789
- approximation_function: NONE
790
- input_format: SAME
791
- instance: Softmax
792
- output_format: SAME
793
- model.decoder.layers.6.self_attn.v_proj:
794
- accum_format: SAME
795
- approximation_function: NONE
796
- bias_format: SAME
797
- input_format: BFP[8|8]{64,-1}(SN)
798
- instance: Linear
799
- output_format: BFP[8|8]{64,-1}(SN)
800
- weight_format: BFP[8|8]{64,-1}(SN)
801
- weight_sparseness: DENSE
802
- model.decoder.layers.6.self_attn_layer_norm:
803
- approximation_function: NONE
804
- bias_format: SAME
805
- input_format: SAME
806
- instance: LayerNorm
807
- output_format: SAME
808
- weight_format: SAME
809
- model.decoder.layers.7.activation_fn:
810
- approximation_function: NONE
811
- input_format: SAME
812
- instance: ReLU
813
- output_format: SAME
814
- model.decoder.layers.7.dropout:
815
- approximation_function: NONE
816
- input_format: SAME
817
- instance: Dropout
818
- output_format: SAME
819
- model.decoder.layers.7.fc1:
820
- accum_format: SAME
821
- approximation_function: NONE
822
- bias_format: SAME
823
- input_format: BFP[8|8]{64,-1}(SN)
824
- instance: Linear
825
- output_format: SAME
826
- weight_format: BFP[8|8]{64,-1}(SN)
827
- weight_sparseness: DENSE
828
- model.decoder.layers.7.fc2:
829
- accum_format: SAME
830
- approximation_function: NONE
831
- bias_format: SAME
832
- input_format: BFP[8|8]{64,-1}(SN)
833
- instance: Linear
834
- output_format: SAME
835
- weight_format: BFP[8|8]{64,-1}(SN)
836
- weight_sparseness: DENSE
837
- model.decoder.layers.7.final_layer_norm:
838
- approximation_function: NONE
839
- bias_format: SAME
840
- input_format: SAME
841
- instance: LayerNorm
842
- output_format: SAME
843
- weight_format: SAME
844
- model.decoder.layers.7.self_attn.dropout:
845
- approximation_function: NONE
846
- input_format: SAME
847
- instance: Dropout
848
- output_format: BFP[8|8]{64,-1}(SN)
849
- model.decoder.layers.7.self_attn.k_proj:
850
- accum_format: SAME
851
- approximation_function: NONE
852
- bias_format: SAME
853
- input_format: BFP[8|8]{64,-1}(SN)
854
- instance: Linear
855
- output_format: BFP[8|8]{64,-1}(SN)
856
- weight_format: BFP[8|8]{64,-1}(SN)
857
- weight_sparseness: DENSE
858
- model.decoder.layers.7.self_attn.out_proj:
859
- accum_format: SAME
860
- approximation_function: NONE
861
- bias_format: SAME
862
- input_format: BFP[8|8]{64,-1}(SN)
863
- instance: Linear
864
- output_format: SAME
865
- weight_format: BFP[8|8]{64,-1}(SN)
866
- weight_sparseness: DENSE
867
- model.decoder.layers.7.self_attn.q_proj:
868
- accum_format: SAME
869
- approximation_function: NONE
870
- bias_format: SAME
871
- input_format: BFP[8|8]{64,-1}(SN)
872
- instance: Linear
873
- output_format: BFP[8|8]{64,-1}(SN)
874
- weight_format: BFP[8|8]{64,-1}(SN)
875
- weight_sparseness: DENSE
876
- model.decoder.layers.7.self_attn.softmax:
877
- approximation_function: NONE
878
- input_format: SAME
879
- instance: Softmax
880
- output_format: SAME
881
- model.decoder.layers.7.self_attn.v_proj:
882
- accum_format: SAME
883
- approximation_function: NONE
884
- bias_format: SAME
885
- input_format: BFP[8|8]{64,-1}(SN)
886
- instance: Linear
887
- output_format: BFP[8|8]{64,-1}(SN)
888
- weight_format: BFP[8|8]{64,-1}(SN)
889
- weight_sparseness: DENSE
890
- model.decoder.layers.7.self_attn_layer_norm:
891
- approximation_function: NONE
892
- bias_format: SAME
893
- input_format: SAME
894
- instance: LayerNorm
895
- output_format: SAME
896
- weight_format: SAME
897
- model.decoder.layers.8.activation_fn:
898
- approximation_function: NONE
899
- input_format: SAME
900
- instance: ReLU
901
- output_format: SAME
902
- model.decoder.layers.8.dropout:
903
- approximation_function: NONE
904
- input_format: SAME
905
- instance: Dropout
906
- output_format: SAME
907
- model.decoder.layers.8.fc1:
908
- accum_format: SAME
909
- approximation_function: NONE
910
- bias_format: SAME
911
- input_format: BFP[8|8]{64,-1}(SN)
912
- instance: Linear
913
- output_format: SAME
914
- weight_format: BFP[8|8]{64,-1}(SN)
915
- weight_sparseness: DENSE
916
- model.decoder.layers.8.fc2:
917
- accum_format: SAME
918
- approximation_function: NONE
919
- bias_format: SAME
920
- input_format: BFP[8|8]{64,-1}(SN)
921
- instance: Linear
922
- output_format: SAME
923
- weight_format: BFP[8|8]{64,-1}(SN)
924
- weight_sparseness: DENSE
925
- model.decoder.layers.8.final_layer_norm:
926
- approximation_function: NONE
927
- bias_format: SAME
928
- input_format: SAME
929
- instance: LayerNorm
930
- output_format: SAME
931
- weight_format: SAME
932
- model.decoder.layers.8.self_attn.dropout:
933
- approximation_function: NONE
934
- input_format: SAME
935
- instance: Dropout
936
- output_format: BFP[8|8]{64,-1}(SN)
937
- model.decoder.layers.8.self_attn.k_proj:
938
- accum_format: SAME
939
- approximation_function: NONE
940
- bias_format: SAME
941
- input_format: BFP[8|8]{64,-1}(SN)
942
- instance: Linear
943
- output_format: BFP[8|8]{64,-1}(SN)
944
- weight_format: BFP[8|8]{64,-1}(SN)
945
- weight_sparseness: DENSE
946
- model.decoder.layers.8.self_attn.out_proj:
947
- accum_format: SAME
948
- approximation_function: NONE
949
- bias_format: SAME
950
- input_format: BFP[8|8]{64,-1}(SN)
951
- instance: Linear
952
- output_format: SAME
953
- weight_format: BFP[8|8]{64,-1}(SN)
954
- weight_sparseness: DENSE
955
- model.decoder.layers.8.self_attn.q_proj:
956
- accum_format: SAME
957
- approximation_function: NONE
958
- bias_format: SAME
959
- input_format: BFP[8|8]{64,-1}(SN)
960
- instance: Linear
961
- output_format: BFP[8|8]{64,-1}(SN)
962
- weight_format: BFP[8|8]{64,-1}(SN)
963
- weight_sparseness: DENSE
964
- model.decoder.layers.8.self_attn.softmax:
965
- approximation_function: NONE
966
- input_format: SAME
967
- instance: Softmax
968
- output_format: SAME
969
- model.decoder.layers.8.self_attn.v_proj:
970
- accum_format: SAME
971
- approximation_function: NONE
972
- bias_format: SAME
973
- input_format: BFP[8|8]{64,-1}(SN)
974
- instance: Linear
975
- output_format: BFP[8|8]{64,-1}(SN)
976
- weight_format: BFP[8|8]{64,-1}(SN)
977
- weight_sparseness: DENSE
978
- model.decoder.layers.8.self_attn_layer_norm:
979
- approximation_function: NONE
980
- bias_format: SAME
981
- input_format: SAME
982
- instance: LayerNorm
983
- output_format: SAME
984
- weight_format: SAME
985
- model.decoder.layers.9.activation_fn:
986
- approximation_function: NONE
987
- input_format: SAME
988
- instance: ReLU
989
- output_format: SAME
990
- model.decoder.layers.9.dropout:
991
- approximation_function: NONE
992
- input_format: SAME
993
- instance: Dropout
994
- output_format: SAME
995
- model.decoder.layers.9.fc1:
996
- accum_format: SAME
997
- approximation_function: NONE
998
- bias_format: SAME
999
- input_format: BFP[8|8]{64,-1}(SN)
1000
- instance: Linear
1001
- output_format: SAME
1002
- weight_format: BFP[8|8]{64,-1}(SN)
1003
- weight_sparseness: DENSE
1004
- model.decoder.layers.9.fc2:
1005
- accum_format: SAME
1006
- approximation_function: NONE
1007
- bias_format: SAME
1008
- input_format: BFP[8|8]{64,-1}(SN)
1009
- instance: Linear
1010
- output_format: SAME
1011
- weight_format: BFP[8|8]{64,-1}(SN)
1012
- weight_sparseness: DENSE
1013
- model.decoder.layers.9.final_layer_norm:
1014
- approximation_function: NONE
1015
- bias_format: SAME
1016
- input_format: SAME
1017
- instance: LayerNorm
1018
- output_format: SAME
1019
- weight_format: SAME
1020
- model.decoder.layers.9.self_attn.dropout:
1021
- approximation_function: NONE
1022
- input_format: SAME
1023
- instance: Dropout
1024
- output_format: BFP[8|8]{64,-1}(SN)
1025
- model.decoder.layers.9.self_attn.k_proj:
1026
- accum_format: SAME
1027
- approximation_function: NONE
1028
- bias_format: SAME
1029
- input_format: BFP[8|8]{64,-1}(SN)
1030
- instance: Linear
1031
- output_format: BFP[8|8]{64,-1}(SN)
1032
- weight_format: BFP[8|8]{64,-1}(SN)
1033
- weight_sparseness: DENSE
1034
- model.decoder.layers.9.self_attn.out_proj:
1035
- accum_format: SAME
1036
- approximation_function: NONE
1037
- bias_format: SAME
1038
- input_format: BFP[8|8]{64,-1}(SN)
1039
- instance: Linear
1040
- output_format: SAME
1041
- weight_format: BFP[8|8]{64,-1}(SN)
1042
- weight_sparseness: DENSE
1043
- model.decoder.layers.9.self_attn.q_proj:
1044
- accum_format: SAME
1045
- approximation_function: NONE
1046
- bias_format: SAME
1047
- input_format: BFP[8|8]{64,-1}(SN)
1048
- instance: Linear
1049
- output_format: BFP[8|8]{64,-1}(SN)
1050
- weight_format: BFP[8|8]{64,-1}(SN)
1051
- weight_sparseness: DENSE
1052
- model.decoder.layers.9.self_attn.softmax:
1053
- approximation_function: NONE
1054
- input_format: SAME
1055
- instance: Softmax
1056
- output_format: SAME
1057
- model.decoder.layers.9.self_attn.v_proj:
1058
- accum_format: SAME
1059
- approximation_function: NONE
1060
- bias_format: SAME
1061
- input_format: BFP[8|8]{64,-1}(SN)
1062
- instance: Linear
1063
- output_format: BFP[8|8]{64,-1}(SN)
1064
- weight_format: BFP[8|8]{64,-1}(SN)
1065
- weight_sparseness: DENSE
1066
- model.decoder.layers.9.self_attn_layer_norm:
1067
- approximation_function: NONE
1068
- bias_format: SAME
1069
- input_format: SAME
1070
- instance: LayerNorm
1071
- output_format: SAME
1072
- weight_format: SAME