pere commited on
Commit
c90c1b7
1 Parent(s): c070a09

large in one shard

Browse files
pytorch_model-00002-of-00002.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4dc921fe35459b4c2d0da49f6f14e3aeccb8e7eac26cf73ac162e854e89f508e
3
- size 1180724112
 
 
 
 
pytorch_model-00001-of-00002.bin → pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1f74e8e89a719bb75630eb1535c03ee1c1fae738e70e4a8dacc7d8d6fe5f0d7
3
- size 4993672129
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1682ff604dd299a54f3678b026874b678f86577e3fca047f51803d52b4655fd
3
+ size 6174380865
pytorch_model.bin.index.json CHANGED
@@ -330,10 +330,10 @@
330
  "model.decoder.layers.20.encoder_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
331
  "model.decoder.layers.20.fc1.bias": "pytorch_model-00001-of-00002.bin",
332
  "model.decoder.layers.20.fc1.weight": "pytorch_model-00001-of-00002.bin",
333
- "model.decoder.layers.20.fc2.bias": "pytorch_model-00002-of-00002.bin",
334
- "model.decoder.layers.20.fc2.weight": "pytorch_model-00002-of-00002.bin",
335
- "model.decoder.layers.20.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
336
- "model.decoder.layers.20.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
337
  "model.decoder.layers.20.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
338
  "model.decoder.layers.20.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
339
  "model.decoder.layers.20.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
@@ -343,222 +343,222 @@
343
  "model.decoder.layers.20.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
344
  "model.decoder.layers.20.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
345
  "model.decoder.layers.20.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
346
- "model.decoder.layers.21.encoder_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
347
- "model.decoder.layers.21.encoder_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
348
- "model.decoder.layers.21.encoder_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
349
- "model.decoder.layers.21.encoder_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
350
- "model.decoder.layers.21.encoder_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
351
- "model.decoder.layers.21.encoder_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
352
- "model.decoder.layers.21.encoder_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
353
- "model.decoder.layers.21.encoder_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
354
- "model.decoder.layers.21.encoder_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
355
- "model.decoder.layers.21.fc1.bias": "pytorch_model-00002-of-00002.bin",
356
- "model.decoder.layers.21.fc1.weight": "pytorch_model-00002-of-00002.bin",
357
- "model.decoder.layers.21.fc2.bias": "pytorch_model-00002-of-00002.bin",
358
- "model.decoder.layers.21.fc2.weight": "pytorch_model-00002-of-00002.bin",
359
- "model.decoder.layers.21.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
360
- "model.decoder.layers.21.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
361
- "model.decoder.layers.21.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
362
- "model.decoder.layers.21.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
363
- "model.decoder.layers.21.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
364
- "model.decoder.layers.21.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
365
- "model.decoder.layers.21.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
366
- "model.decoder.layers.21.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
367
- "model.decoder.layers.21.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
368
- "model.decoder.layers.21.self_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
369
- "model.decoder.layers.21.self_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
370
- "model.decoder.layers.22.encoder_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
371
- "model.decoder.layers.22.encoder_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
372
- "model.decoder.layers.22.encoder_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
373
- "model.decoder.layers.22.encoder_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
374
- "model.decoder.layers.22.encoder_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
375
- "model.decoder.layers.22.encoder_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
376
- "model.decoder.layers.22.encoder_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
377
- "model.decoder.layers.22.encoder_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
378
- "model.decoder.layers.22.encoder_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
379
- "model.decoder.layers.22.fc1.bias": "pytorch_model-00002-of-00002.bin",
380
- "model.decoder.layers.22.fc1.weight": "pytorch_model-00002-of-00002.bin",
381
- "model.decoder.layers.22.fc2.bias": "pytorch_model-00002-of-00002.bin",
382
- "model.decoder.layers.22.fc2.weight": "pytorch_model-00002-of-00002.bin",
383
- "model.decoder.layers.22.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
384
- "model.decoder.layers.22.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
385
- "model.decoder.layers.22.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
386
- "model.decoder.layers.22.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
387
- "model.decoder.layers.22.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
388
- "model.decoder.layers.22.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
389
- "model.decoder.layers.22.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
390
- "model.decoder.layers.22.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
391
- "model.decoder.layers.22.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
392
- "model.decoder.layers.22.self_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
393
- "model.decoder.layers.22.self_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
394
- "model.decoder.layers.23.encoder_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
395
- "model.decoder.layers.23.encoder_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
396
- "model.decoder.layers.23.encoder_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
397
- "model.decoder.layers.23.encoder_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
398
- "model.decoder.layers.23.encoder_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
399
- "model.decoder.layers.23.encoder_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
400
- "model.decoder.layers.23.encoder_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
401
- "model.decoder.layers.23.encoder_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
402
- "model.decoder.layers.23.encoder_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
403
- "model.decoder.layers.23.fc1.bias": "pytorch_model-00002-of-00002.bin",
404
- "model.decoder.layers.23.fc1.weight": "pytorch_model-00002-of-00002.bin",
405
- "model.decoder.layers.23.fc2.bias": "pytorch_model-00002-of-00002.bin",
406
- "model.decoder.layers.23.fc2.weight": "pytorch_model-00002-of-00002.bin",
407
- "model.decoder.layers.23.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
408
- "model.decoder.layers.23.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
409
- "model.decoder.layers.23.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
410
- "model.decoder.layers.23.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
411
- "model.decoder.layers.23.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
412
- "model.decoder.layers.23.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
413
- "model.decoder.layers.23.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
414
- "model.decoder.layers.23.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
415
- "model.decoder.layers.23.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
416
- "model.decoder.layers.23.self_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
417
- "model.decoder.layers.23.self_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
418
- "model.decoder.layers.24.encoder_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
419
- "model.decoder.layers.24.encoder_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
420
- "model.decoder.layers.24.encoder_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
421
- "model.decoder.layers.24.encoder_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
422
- "model.decoder.layers.24.encoder_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
423
- "model.decoder.layers.24.encoder_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
424
- "model.decoder.layers.24.encoder_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
425
- "model.decoder.layers.24.encoder_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
426
- "model.decoder.layers.24.encoder_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
427
- "model.decoder.layers.24.fc1.bias": "pytorch_model-00002-of-00002.bin",
428
- "model.decoder.layers.24.fc1.weight": "pytorch_model-00002-of-00002.bin",
429
- "model.decoder.layers.24.fc2.bias": "pytorch_model-00002-of-00002.bin",
430
- "model.decoder.layers.24.fc2.weight": "pytorch_model-00002-of-00002.bin",
431
- "model.decoder.layers.24.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
432
- "model.decoder.layers.24.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
433
- "model.decoder.layers.24.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
434
- "model.decoder.layers.24.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
435
- "model.decoder.layers.24.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
436
- "model.decoder.layers.24.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
437
- "model.decoder.layers.24.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
438
- "model.decoder.layers.24.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
439
- "model.decoder.layers.24.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
440
- "model.decoder.layers.24.self_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
441
- "model.decoder.layers.24.self_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
442
- "model.decoder.layers.25.encoder_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
443
- "model.decoder.layers.25.encoder_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
444
- "model.decoder.layers.25.encoder_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
445
- "model.decoder.layers.25.encoder_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
446
- "model.decoder.layers.25.encoder_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
447
- "model.decoder.layers.25.encoder_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
448
- "model.decoder.layers.25.encoder_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
449
- "model.decoder.layers.25.encoder_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
450
- "model.decoder.layers.25.encoder_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
451
- "model.decoder.layers.25.fc1.bias": "pytorch_model-00002-of-00002.bin",
452
- "model.decoder.layers.25.fc1.weight": "pytorch_model-00002-of-00002.bin",
453
- "model.decoder.layers.25.fc2.bias": "pytorch_model-00002-of-00002.bin",
454
- "model.decoder.layers.25.fc2.weight": "pytorch_model-00002-of-00002.bin",
455
- "model.decoder.layers.25.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
456
- "model.decoder.layers.25.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
457
- "model.decoder.layers.25.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
458
- "model.decoder.layers.25.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
459
- "model.decoder.layers.25.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
460
- "model.decoder.layers.25.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
461
- "model.decoder.layers.25.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
462
- "model.decoder.layers.25.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
463
- "model.decoder.layers.25.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
464
- "model.decoder.layers.25.self_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
465
- "model.decoder.layers.25.self_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
466
- "model.decoder.layers.26.encoder_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
467
- "model.decoder.layers.26.encoder_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
468
- "model.decoder.layers.26.encoder_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
469
- "model.decoder.layers.26.encoder_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
470
- "model.decoder.layers.26.encoder_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
471
- "model.decoder.layers.26.encoder_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
472
- "model.decoder.layers.26.encoder_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
473
- "model.decoder.layers.26.encoder_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
474
- "model.decoder.layers.26.encoder_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
475
- "model.decoder.layers.26.fc1.bias": "pytorch_model-00002-of-00002.bin",
476
- "model.decoder.layers.26.fc1.weight": "pytorch_model-00002-of-00002.bin",
477
- "model.decoder.layers.26.fc2.bias": "pytorch_model-00002-of-00002.bin",
478
- "model.decoder.layers.26.fc2.weight": "pytorch_model-00002-of-00002.bin",
479
- "model.decoder.layers.26.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
480
- "model.decoder.layers.26.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
481
- "model.decoder.layers.26.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
482
- "model.decoder.layers.26.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
483
- "model.decoder.layers.26.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
484
- "model.decoder.layers.26.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
485
- "model.decoder.layers.26.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
486
- "model.decoder.layers.26.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
487
- "model.decoder.layers.26.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
488
- "model.decoder.layers.26.self_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
489
- "model.decoder.layers.26.self_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
490
- "model.decoder.layers.27.encoder_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
491
- "model.decoder.layers.27.encoder_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
492
- "model.decoder.layers.27.encoder_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
493
- "model.decoder.layers.27.encoder_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
494
- "model.decoder.layers.27.encoder_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
495
- "model.decoder.layers.27.encoder_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
496
- "model.decoder.layers.27.encoder_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
497
- "model.decoder.layers.27.encoder_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
498
- "model.decoder.layers.27.encoder_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
499
- "model.decoder.layers.27.fc1.bias": "pytorch_model-00002-of-00002.bin",
500
- "model.decoder.layers.27.fc1.weight": "pytorch_model-00002-of-00002.bin",
501
- "model.decoder.layers.27.fc2.bias": "pytorch_model-00002-of-00002.bin",
502
- "model.decoder.layers.27.fc2.weight": "pytorch_model-00002-of-00002.bin",
503
- "model.decoder.layers.27.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
504
- "model.decoder.layers.27.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
505
- "model.decoder.layers.27.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
506
- "model.decoder.layers.27.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
507
- "model.decoder.layers.27.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
508
- "model.decoder.layers.27.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
509
- "model.decoder.layers.27.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
510
- "model.decoder.layers.27.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
511
- "model.decoder.layers.27.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
512
- "model.decoder.layers.27.self_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
513
- "model.decoder.layers.27.self_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
514
- "model.decoder.layers.28.encoder_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
515
- "model.decoder.layers.28.encoder_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
516
- "model.decoder.layers.28.encoder_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
517
- "model.decoder.layers.28.encoder_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
518
- "model.decoder.layers.28.encoder_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
519
- "model.decoder.layers.28.encoder_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
520
- "model.decoder.layers.28.encoder_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
521
- "model.decoder.layers.28.encoder_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
522
- "model.decoder.layers.28.encoder_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
523
- "model.decoder.layers.28.fc1.bias": "pytorch_model-00002-of-00002.bin",
524
- "model.decoder.layers.28.fc1.weight": "pytorch_model-00002-of-00002.bin",
525
- "model.decoder.layers.28.fc2.bias": "pytorch_model-00002-of-00002.bin",
526
- "model.decoder.layers.28.fc2.weight": "pytorch_model-00002-of-00002.bin",
527
- "model.decoder.layers.28.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
528
- "model.decoder.layers.28.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
529
- "model.decoder.layers.28.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
530
- "model.decoder.layers.28.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
531
- "model.decoder.layers.28.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
532
- "model.decoder.layers.28.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
533
- "model.decoder.layers.28.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
534
- "model.decoder.layers.28.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
535
- "model.decoder.layers.28.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
536
- "model.decoder.layers.28.self_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
537
- "model.decoder.layers.28.self_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
538
- "model.decoder.layers.29.encoder_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
539
- "model.decoder.layers.29.encoder_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
540
- "model.decoder.layers.29.encoder_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
541
- "model.decoder.layers.29.encoder_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
542
- "model.decoder.layers.29.encoder_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
543
- "model.decoder.layers.29.encoder_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
544
- "model.decoder.layers.29.encoder_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
545
- "model.decoder.layers.29.encoder_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
546
- "model.decoder.layers.29.encoder_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
547
- "model.decoder.layers.29.fc1.bias": "pytorch_model-00002-of-00002.bin",
548
- "model.decoder.layers.29.fc1.weight": "pytorch_model-00002-of-00002.bin",
549
- "model.decoder.layers.29.fc2.bias": "pytorch_model-00002-of-00002.bin",
550
- "model.decoder.layers.29.fc2.weight": "pytorch_model-00002-of-00002.bin",
551
- "model.decoder.layers.29.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
552
- "model.decoder.layers.29.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
553
- "model.decoder.layers.29.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
554
- "model.decoder.layers.29.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
555
- "model.decoder.layers.29.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
556
- "model.decoder.layers.29.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
557
- "model.decoder.layers.29.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
558
- "model.decoder.layers.29.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
559
- "model.decoder.layers.29.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
560
- "model.decoder.layers.29.self_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
561
- "model.decoder.layers.29.self_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
562
  "model.decoder.layers.3.encoder_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
563
  "model.decoder.layers.3.encoder_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
564
  "model.decoder.layers.3.encoder_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
@@ -583,7 +583,7 @@
583
  "model.decoder.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
584
  "model.decoder.layers.3.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
585
  "model.decoder.layers.3.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
586
- "model.decoder.layers.30.encoder_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
587
  "model.decoder.layers.30.encoder_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
588
  "model.decoder.layers.30.encoder_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
589
  "model.decoder.layers.30.encoder_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
@@ -598,15 +598,15 @@
598
  "model.decoder.layers.30.fc2.weight": "pytorch_model-00002-of-00002.bin",
599
  "model.decoder.layers.30.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
600
  "model.decoder.layers.30.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
601
- "model.decoder.layers.30.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
602
- "model.decoder.layers.30.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
603
- "model.decoder.layers.30.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
604
- "model.decoder.layers.30.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
605
- "model.decoder.layers.30.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
606
- "model.decoder.layers.30.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
607
- "model.decoder.layers.30.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
608
- "model.decoder.layers.30.self_attn_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
609
- "model.decoder.layers.30.self_attn_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
610
  "model.decoder.layers.31.encoder_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
611
  "model.decoder.layers.31.encoder_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
612
  "model.decoder.layers.31.encoder_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
 
330
  "model.decoder.layers.20.encoder_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
331
  "model.decoder.layers.20.fc1.bias": "pytorch_model-00001-of-00002.bin",
332
  "model.decoder.layers.20.fc1.weight": "pytorch_model-00001-of-00002.bin",
333
+ "model.decoder.layers.20.fc2.bias": "pytorch_model-00001-of-00002.bin",
334
+ "model.decoder.layers.20.fc2.weight": "pytorch_model-00001-of-00002.bin",
335
+ "model.decoder.layers.20.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
336
+ "model.decoder.layers.20.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
337
  "model.decoder.layers.20.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
338
  "model.decoder.layers.20.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
339
  "model.decoder.layers.20.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
 
343
  "model.decoder.layers.20.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
344
  "model.decoder.layers.20.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
345
  "model.decoder.layers.20.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
346
+ "model.decoder.layers.21.encoder_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
347
+ "model.decoder.layers.21.encoder_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
348
+ "model.decoder.layers.21.encoder_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
349
+ "model.decoder.layers.21.encoder_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
350
+ "model.decoder.layers.21.encoder_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
351
+ "model.decoder.layers.21.encoder_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
352
+ "model.decoder.layers.21.encoder_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
353
+ "model.decoder.layers.21.encoder_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
354
+ "model.decoder.layers.21.encoder_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
355
+ "model.decoder.layers.21.fc1.bias": "pytorch_model-00001-of-00002.bin",
356
+ "model.decoder.layers.21.fc1.weight": "pytorch_model-00001-of-00002.bin",
357
+ "model.decoder.layers.21.fc2.bias": "pytorch_model-00001-of-00002.bin",
358
+ "model.decoder.layers.21.fc2.weight": "pytorch_model-00001-of-00002.bin",
359
+ "model.decoder.layers.21.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
360
+ "model.decoder.layers.21.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
361
+ "model.decoder.layers.21.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
362
+ "model.decoder.layers.21.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
363
+ "model.decoder.layers.21.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
364
+ "model.decoder.layers.21.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
365
+ "model.decoder.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
366
+ "model.decoder.layers.21.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
367
+ "model.decoder.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
368
+ "model.decoder.layers.21.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
369
+ "model.decoder.layers.21.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
370
+ "model.decoder.layers.22.encoder_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
371
+ "model.decoder.layers.22.encoder_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
372
+ "model.decoder.layers.22.encoder_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
373
+ "model.decoder.layers.22.encoder_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
374
+ "model.decoder.layers.22.encoder_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
375
+ "model.decoder.layers.22.encoder_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
376
+ "model.decoder.layers.22.encoder_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
377
+ "model.decoder.layers.22.encoder_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
378
+ "model.decoder.layers.22.encoder_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
379
+ "model.decoder.layers.22.fc1.bias": "pytorch_model-00001-of-00002.bin",
380
+ "model.decoder.layers.22.fc1.weight": "pytorch_model-00001-of-00002.bin",
381
+ "model.decoder.layers.22.fc2.bias": "pytorch_model-00001-of-00002.bin",
382
+ "model.decoder.layers.22.fc2.weight": "pytorch_model-00001-of-00002.bin",
383
+ "model.decoder.layers.22.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
384
+ "model.decoder.layers.22.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
385
+ "model.decoder.layers.22.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
386
+ "model.decoder.layers.22.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
387
+ "model.decoder.layers.22.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
388
+ "model.decoder.layers.22.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
389
+ "model.decoder.layers.22.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
390
+ "model.decoder.layers.22.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
391
+ "model.decoder.layers.22.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
392
+ "model.decoder.layers.22.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
393
+ "model.decoder.layers.22.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
394
+ "model.decoder.layers.23.encoder_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
395
+ "model.decoder.layers.23.encoder_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
396
+ "model.decoder.layers.23.encoder_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
397
+ "model.decoder.layers.23.encoder_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
398
+ "model.decoder.layers.23.encoder_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
399
+ "model.decoder.layers.23.encoder_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
400
+ "model.decoder.layers.23.encoder_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
401
+ "model.decoder.layers.23.encoder_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
402
+ "model.decoder.layers.23.encoder_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
403
+ "model.decoder.layers.23.fc1.bias": "pytorch_model-00001-of-00002.bin",
404
+ "model.decoder.layers.23.fc1.weight": "pytorch_model-00001-of-00002.bin",
405
+ "model.decoder.layers.23.fc2.bias": "pytorch_model-00001-of-00002.bin",
406
+ "model.decoder.layers.23.fc2.weight": "pytorch_model-00001-of-00002.bin",
407
+ "model.decoder.layers.23.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
408
+ "model.decoder.layers.23.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
409
+ "model.decoder.layers.23.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
410
+ "model.decoder.layers.23.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
411
+ "model.decoder.layers.23.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
412
+ "model.decoder.layers.23.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
413
+ "model.decoder.layers.23.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
414
+ "model.decoder.layers.23.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
415
+ "model.decoder.layers.23.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
416
+ "model.decoder.layers.23.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
417
+ "model.decoder.layers.23.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
418
+ "model.decoder.layers.24.encoder_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
419
+ "model.decoder.layers.24.encoder_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
420
+ "model.decoder.layers.24.encoder_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
421
+ "model.decoder.layers.24.encoder_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
422
+ "model.decoder.layers.24.encoder_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
423
+ "model.decoder.layers.24.encoder_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
424
+ "model.decoder.layers.24.encoder_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
425
+ "model.decoder.layers.24.encoder_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
426
+ "model.decoder.layers.24.encoder_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
427
+ "model.decoder.layers.24.fc1.bias": "pytorch_model-00001-of-00002.bin",
428
+ "model.decoder.layers.24.fc1.weight": "pytorch_model-00001-of-00002.bin",
429
+ "model.decoder.layers.24.fc2.bias": "pytorch_model-00001-of-00002.bin",
430
+ "model.decoder.layers.24.fc2.weight": "pytorch_model-00001-of-00002.bin",
431
+ "model.decoder.layers.24.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
432
+ "model.decoder.layers.24.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
433
+ "model.decoder.layers.24.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
434
+ "model.decoder.layers.24.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
435
+ "model.decoder.layers.24.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
436
+ "model.decoder.layers.24.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
437
+ "model.decoder.layers.24.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
438
+ "model.decoder.layers.24.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
439
+ "model.decoder.layers.24.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
440
+ "model.decoder.layers.24.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
441
+ "model.decoder.layers.24.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
442
+ "model.decoder.layers.25.encoder_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
443
+ "model.decoder.layers.25.encoder_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
444
+ "model.decoder.layers.25.encoder_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
445
+ "model.decoder.layers.25.encoder_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
446
+ "model.decoder.layers.25.encoder_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
447
+ "model.decoder.layers.25.encoder_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
448
+ "model.decoder.layers.25.encoder_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
449
+ "model.decoder.layers.25.encoder_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
450
+ "model.decoder.layers.25.encoder_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
451
+ "model.decoder.layers.25.fc1.bias": "pytorch_model-00001-of-00002.bin",
452
+ "model.decoder.layers.25.fc1.weight": "pytorch_model-00001-of-00002.bin",
453
+ "model.decoder.layers.25.fc2.bias": "pytorch_model-00001-of-00002.bin",
454
+ "model.decoder.layers.25.fc2.weight": "pytorch_model-00001-of-00002.bin",
455
+ "model.decoder.layers.25.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
456
+ "model.decoder.layers.25.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
457
+ "model.decoder.layers.25.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
458
+ "model.decoder.layers.25.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
459
+ "model.decoder.layers.25.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
460
+ "model.decoder.layers.25.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
461
+ "model.decoder.layers.25.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
462
+ "model.decoder.layers.25.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
463
+ "model.decoder.layers.25.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
464
+ "model.decoder.layers.25.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
465
+ "model.decoder.layers.25.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
466
+ "model.decoder.layers.26.encoder_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
467
+ "model.decoder.layers.26.encoder_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
468
+ "model.decoder.layers.26.encoder_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
469
+ "model.decoder.layers.26.encoder_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
470
+ "model.decoder.layers.26.encoder_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
471
+ "model.decoder.layers.26.encoder_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
472
+ "model.decoder.layers.26.encoder_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
473
+ "model.decoder.layers.26.encoder_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
474
+ "model.decoder.layers.26.encoder_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
475
+ "model.decoder.layers.26.fc1.bias": "pytorch_model-00001-of-00002.bin",
476
+ "model.decoder.layers.26.fc1.weight": "pytorch_model-00001-of-00002.bin",
477
+ "model.decoder.layers.26.fc2.bias": "pytorch_model-00001-of-00002.bin",
478
+ "model.decoder.layers.26.fc2.weight": "pytorch_model-00001-of-00002.bin",
479
+ "model.decoder.layers.26.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
480
+ "model.decoder.layers.26.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
481
+ "model.decoder.layers.26.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
482
+ "model.decoder.layers.26.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
483
+ "model.decoder.layers.26.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
484
+ "model.decoder.layers.26.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
485
+ "model.decoder.layers.26.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
486
+ "model.decoder.layers.26.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
487
+ "model.decoder.layers.26.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
488
+ "model.decoder.layers.26.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
489
+ "model.decoder.layers.26.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
490
+ "model.decoder.layers.27.encoder_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
491
+ "model.decoder.layers.27.encoder_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
492
+ "model.decoder.layers.27.encoder_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
493
+ "model.decoder.layers.27.encoder_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
494
+ "model.decoder.layers.27.encoder_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
495
+ "model.decoder.layers.27.encoder_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
496
+ "model.decoder.layers.27.encoder_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
497
+ "model.decoder.layers.27.encoder_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
498
+ "model.decoder.layers.27.encoder_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
499
+ "model.decoder.layers.27.fc1.bias": "pytorch_model-00001-of-00002.bin",
500
+ "model.decoder.layers.27.fc1.weight": "pytorch_model-00001-of-00002.bin",
501
+ "model.decoder.layers.27.fc2.bias": "pytorch_model-00001-of-00002.bin",
502
+ "model.decoder.layers.27.fc2.weight": "pytorch_model-00001-of-00002.bin",
503
+ "model.decoder.layers.27.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
504
+ "model.decoder.layers.27.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
505
+ "model.decoder.layers.27.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
506
+ "model.decoder.layers.27.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
507
+ "model.decoder.layers.27.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
508
+ "model.decoder.layers.27.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
509
+ "model.decoder.layers.27.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
510
+ "model.decoder.layers.27.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
511
+ "model.decoder.layers.27.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
512
+ "model.decoder.layers.27.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
513
+ "model.decoder.layers.27.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
514
+ "model.decoder.layers.28.encoder_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
515
+ "model.decoder.layers.28.encoder_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
516
+ "model.decoder.layers.28.encoder_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
517
+ "model.decoder.layers.28.encoder_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
518
+ "model.decoder.layers.28.encoder_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
519
+ "model.decoder.layers.28.encoder_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
520
+ "model.decoder.layers.28.encoder_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
521
+ "model.decoder.layers.28.encoder_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
522
+ "model.decoder.layers.28.encoder_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
523
+ "model.decoder.layers.28.fc1.bias": "pytorch_model-00001-of-00002.bin",
524
+ "model.decoder.layers.28.fc1.weight": "pytorch_model-00001-of-00002.bin",
525
+ "model.decoder.layers.28.fc2.bias": "pytorch_model-00001-of-00002.bin",
526
+ "model.decoder.layers.28.fc2.weight": "pytorch_model-00001-of-00002.bin",
527
+ "model.decoder.layers.28.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
528
+ "model.decoder.layers.28.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
529
+ "model.decoder.layers.28.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
530
+ "model.decoder.layers.28.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
531
+ "model.decoder.layers.28.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
532
+ "model.decoder.layers.28.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
533
+ "model.decoder.layers.28.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
534
+ "model.decoder.layers.28.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
535
+ "model.decoder.layers.28.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
536
+ "model.decoder.layers.28.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
537
+ "model.decoder.layers.28.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
538
+ "model.decoder.layers.29.encoder_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
539
+ "model.decoder.layers.29.encoder_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
540
+ "model.decoder.layers.29.encoder_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
541
+ "model.decoder.layers.29.encoder_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
542
+ "model.decoder.layers.29.encoder_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
543
+ "model.decoder.layers.29.encoder_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
544
+ "model.decoder.layers.29.encoder_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
545
+ "model.decoder.layers.29.encoder_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
546
+ "model.decoder.layers.29.encoder_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
547
+ "model.decoder.layers.29.fc1.bias": "pytorch_model-00001-of-00002.bin",
548
+ "model.decoder.layers.29.fc1.weight": "pytorch_model-00001-of-00002.bin",
549
+ "model.decoder.layers.29.fc2.bias": "pytorch_model-00001-of-00002.bin",
550
+ "model.decoder.layers.29.fc2.weight": "pytorch_model-00001-of-00002.bin",
551
+ "model.decoder.layers.29.final_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
552
+ "model.decoder.layers.29.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
553
+ "model.decoder.layers.29.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
554
+ "model.decoder.layers.29.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
555
+ "model.decoder.layers.29.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
556
+ "model.decoder.layers.29.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
557
+ "model.decoder.layers.29.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
558
+ "model.decoder.layers.29.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
559
+ "model.decoder.layers.29.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
560
+ "model.decoder.layers.29.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
561
+ "model.decoder.layers.29.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
562
  "model.decoder.layers.3.encoder_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
563
  "model.decoder.layers.3.encoder_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
564
  "model.decoder.layers.3.encoder_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
 
583
  "model.decoder.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
584
  "model.decoder.layers.3.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
585
  "model.decoder.layers.3.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
586
+ "model.decoder.layers.30.encoder_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
587
  "model.decoder.layers.30.encoder_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
588
  "model.decoder.layers.30.encoder_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
589
  "model.decoder.layers.30.encoder_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
 
598
  "model.decoder.layers.30.fc2.weight": "pytorch_model-00002-of-00002.bin",
599
  "model.decoder.layers.30.final_layer_norm.bias": "pytorch_model-00002-of-00002.bin",
600
  "model.decoder.layers.30.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
601
+ "model.decoder.layers.30.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
602
+ "model.decoder.layers.30.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin",
603
+ "model.decoder.layers.30.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
604
+ "model.decoder.layers.30.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
605
+ "model.decoder.layers.30.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
606
+ "model.decoder.layers.30.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
607
+ "model.decoder.layers.30.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
608
+ "model.decoder.layers.30.self_attn_layer_norm.bias": "pytorch_model-00001-of-00002.bin",
609
+ "model.decoder.layers.30.self_attn_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
610
  "model.decoder.layers.31.encoder_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
611
  "model.decoder.layers.31.encoder_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
612
  "model.decoder.layers.31.encoder_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",