fsicoli commited on
Commit
6e324a6
1 Parent(s): e275270

Upload 11 files

Browse files
config.json CHANGED
@@ -43,7 +43,7 @@
43
  "num_mel_bins": 128,
44
  "pad_token_id": 50256,
45
  "scale_embedding": false,
46
- "torch_dtype": "float16",
47
  "transformers_version": "4.37.0.dev0",
48
  "use_cache": true,
49
  "use_weighted_layer_sum": false,
 
43
  "num_mel_bins": 128,
44
  "pad_token_id": 50256,
45
  "scale_embedding": false,
46
+ "torch_dtype": "float32",
47
  "transformers_version": "4.37.0.dev0",
48
  "use_cache": true,
49
  "use_weighted_layer_sum": false,
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:156e0fd71ba8d6d1a308460a813997db7d653c1f820bd2d5995abd49692f14a1
3
  size 4993448880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f878789c48bcae8bdb738a21db184b61ea25d207190b7a28b1886fd661820964
3
  size 4993448880
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c05a1c27b9a9a440db979127057f5f66ec41e211bf68bb2ea3a227c4403c8e60
3
  size 1180663192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cebf488a1c5179e1630e18b09c98e0fc49f633401cd2860996acc46e8ce31123
3
  size 1180663192
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4da10ec2e0bf47d4b44db41bb1bfc93e007a4873e0e2ff5dbc5e667cb0168b4f
3
  size 12333660476
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b881cb7e883293384811c41e69a3af5ab73194ee3fd9c7fc959db40efbb8dce
3
  size 12333660476
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f584fc46b466d88dd39394bd540717b3bc1053ecc01185f34166047c3442833a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:473904b1f2366db08d341e1d5587ca85e45deb227516e6d83bdef880288fcb69
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd2b4288adefddaec779eef5814fc8113ed6f29d36bcf1119544624d9b84c141
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a7fed81325cf3d8746c8c8e29722d1ee71a0fee7a7fbd34629d55e97abe774b
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5161290322580645,
5
  "eval_steps": 1000,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -505,6 +505,255 @@
505
  "eval_steps_per_second": 0.006,
506
  "eval_wer": 0.10521978021978022,
507
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
508
  }
509
  ],
510
  "logging_steps": 25,
@@ -512,7 +761,7 @@
512
  "num_input_tokens_seen": 0,
513
  "num_train_epochs": 2,
514
  "save_steps": 1000,
515
- "total_flos": 5.435997290496e+19,
516
  "train_batch_size": 8,
517
  "trial_name": null,
518
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7741935483870968,
5
  "eval_steps": 1000,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
505
  "eval_steps_per_second": 0.006,
506
  "eval_wer": 0.10521978021978022,
507
  "step": 2000
508
+ },
509
+ {
510
+ "epoch": 0.52,
511
+ "learning_rate": 9.923333333333332e-07,
512
+ "loss": 0.1202,
513
+ "step": 2025
514
+ },
515
+ {
516
+ "epoch": 0.53,
517
+ "learning_rate": 9.84e-07,
518
+ "loss": 0.186,
519
+ "step": 2050
520
+ },
521
+ {
522
+ "epoch": 0.54,
523
+ "learning_rate": 9.756666666666666e-07,
524
+ "loss": 0.1106,
525
+ "step": 2075
526
+ },
527
+ {
528
+ "epoch": 0.54,
529
+ "learning_rate": 9.673333333333332e-07,
530
+ "loss": 0.1926,
531
+ "step": 2100
532
+ },
533
+ {
534
+ "epoch": 0.55,
535
+ "learning_rate": 9.589999999999998e-07,
536
+ "loss": 0.1178,
537
+ "step": 2125
538
+ },
539
+ {
540
+ "epoch": 0.55,
541
+ "learning_rate": 9.506666666666667e-07,
542
+ "loss": 0.1744,
543
+ "step": 2150
544
+ },
545
+ {
546
+ "epoch": 0.56,
547
+ "learning_rate": 9.423333333333333e-07,
548
+ "loss": 0.1263,
549
+ "step": 2175
550
+ },
551
+ {
552
+ "epoch": 0.57,
553
+ "learning_rate": 9.34e-07,
554
+ "loss": 0.1818,
555
+ "step": 2200
556
+ },
557
+ {
558
+ "epoch": 0.57,
559
+ "learning_rate": 9.256666666666666e-07,
560
+ "loss": 0.1466,
561
+ "step": 2225
562
+ },
563
+ {
564
+ "epoch": 0.58,
565
+ "learning_rate": 9.173333333333333e-07,
566
+ "loss": 0.1357,
567
+ "step": 2250
568
+ },
569
+ {
570
+ "epoch": 0.59,
571
+ "learning_rate": 9.09e-07,
572
+ "loss": 0.148,
573
+ "step": 2275
574
+ },
575
+ {
576
+ "epoch": 0.59,
577
+ "learning_rate": 9.006666666666666e-07,
578
+ "loss": 0.1684,
579
+ "step": 2300
580
+ },
581
+ {
582
+ "epoch": 0.6,
583
+ "learning_rate": 8.923333333333333e-07,
584
+ "loss": 0.1368,
585
+ "step": 2325
586
+ },
587
+ {
588
+ "epoch": 0.61,
589
+ "learning_rate": 8.839999999999999e-07,
590
+ "loss": 0.188,
591
+ "step": 2350
592
+ },
593
+ {
594
+ "epoch": 0.61,
595
+ "learning_rate": 8.756666666666666e-07,
596
+ "loss": 0.1299,
597
+ "step": 2375
598
+ },
599
+ {
600
+ "epoch": 0.62,
601
+ "learning_rate": 8.673333333333332e-07,
602
+ "loss": 0.1461,
603
+ "step": 2400
604
+ },
605
+ {
606
+ "epoch": 0.63,
607
+ "learning_rate": 8.59e-07,
608
+ "loss": 0.1569,
609
+ "step": 2425
610
+ },
611
+ {
612
+ "epoch": 0.63,
613
+ "learning_rate": 8.506666666666667e-07,
614
+ "loss": 0.1527,
615
+ "step": 2450
616
+ },
617
+ {
618
+ "epoch": 0.64,
619
+ "learning_rate": 8.423333333333334e-07,
620
+ "loss": 0.1041,
621
+ "step": 2475
622
+ },
623
+ {
624
+ "epoch": 0.65,
625
+ "learning_rate": 8.34e-07,
626
+ "loss": 0.157,
627
+ "step": 2500
628
+ },
629
+ {
630
+ "epoch": 0.65,
631
+ "learning_rate": 8.256666666666666e-07,
632
+ "loss": 0.1364,
633
+ "step": 2525
634
+ },
635
+ {
636
+ "epoch": 0.66,
637
+ "learning_rate": 8.173333333333333e-07,
638
+ "loss": 0.1685,
639
+ "step": 2550
640
+ },
641
+ {
642
+ "epoch": 0.66,
643
+ "learning_rate": 8.09e-07,
644
+ "loss": 0.1344,
645
+ "step": 2575
646
+ },
647
+ {
648
+ "epoch": 0.67,
649
+ "learning_rate": 8.006666666666666e-07,
650
+ "loss": 0.1589,
651
+ "step": 2600
652
+ },
653
+ {
654
+ "epoch": 0.68,
655
+ "learning_rate": 7.923333333333333e-07,
656
+ "loss": 0.1334,
657
+ "step": 2625
658
+ },
659
+ {
660
+ "epoch": 0.68,
661
+ "learning_rate": 7.84e-07,
662
+ "loss": 0.1297,
663
+ "step": 2650
664
+ },
665
+ {
666
+ "epoch": 0.69,
667
+ "learning_rate": 7.756666666666665e-07,
668
+ "loss": 0.1326,
669
+ "step": 2675
670
+ },
671
+ {
672
+ "epoch": 0.7,
673
+ "learning_rate": 7.673333333333332e-07,
674
+ "loss": 0.179,
675
+ "step": 2700
676
+ },
677
+ {
678
+ "epoch": 0.7,
679
+ "learning_rate": 7.59e-07,
680
+ "loss": 0.1242,
681
+ "step": 2725
682
+ },
683
+ {
684
+ "epoch": 0.71,
685
+ "learning_rate": 7.506666666666667e-07,
686
+ "loss": 0.1714,
687
+ "step": 2750
688
+ },
689
+ {
690
+ "epoch": 0.72,
691
+ "learning_rate": 7.423333333333333e-07,
692
+ "loss": 0.1503,
693
+ "step": 2775
694
+ },
695
+ {
696
+ "epoch": 0.72,
697
+ "learning_rate": 7.34e-07,
698
+ "loss": 0.131,
699
+ "step": 2800
700
+ },
701
+ {
702
+ "epoch": 0.73,
703
+ "learning_rate": 7.256666666666667e-07,
704
+ "loss": 0.118,
705
+ "step": 2825
706
+ },
707
+ {
708
+ "epoch": 0.74,
709
+ "learning_rate": 7.173333333333333e-07,
710
+ "loss": 0.1729,
711
+ "step": 2850
712
+ },
713
+ {
714
+ "epoch": 0.74,
715
+ "learning_rate": 7.089999999999999e-07,
716
+ "loss": 0.1193,
717
+ "step": 2875
718
+ },
719
+ {
720
+ "epoch": 0.75,
721
+ "learning_rate": 7.006666666666666e-07,
722
+ "loss": 0.1723,
723
+ "step": 2900
724
+ },
725
+ {
726
+ "epoch": 0.75,
727
+ "learning_rate": 6.923333333333333e-07,
728
+ "loss": 0.1393,
729
+ "step": 2925
730
+ },
731
+ {
732
+ "epoch": 0.76,
733
+ "learning_rate": 6.84e-07,
734
+ "loss": 0.1259,
735
+ "step": 2950
736
+ },
737
+ {
738
+ "epoch": 0.77,
739
+ "learning_rate": 6.756666666666666e-07,
740
+ "loss": 0.1228,
741
+ "step": 2975
742
+ },
743
+ {
744
+ "epoch": 0.77,
745
+ "learning_rate": 6.673333333333334e-07,
746
+ "loss": 0.1794,
747
+ "step": 3000
748
+ },
749
+ {
750
+ "epoch": 0.77,
751
+ "eval_loss": 0.13788852095603943,
752
+ "eval_runtime": 198193.9708,
753
+ "eval_samples_per_second": 0.047,
754
+ "eval_steps_per_second": 0.006,
755
+ "eval_wer": 0.09970911441499677,
756
+ "step": 3000
757
  }
758
  ],
759
  "logging_steps": 25,
 
761
  "num_input_tokens_seen": 0,
762
  "num_train_epochs": 2,
763
  "save_steps": 1000,
764
+ "total_flos": 8.153995935744e+19,
765
  "train_batch_size": 8,
766
  "trial_name": null,
767
  "trial_params": null