warmestman commited on
Commit
1397219
1 Parent(s): 447a425

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80b1f8d231b5ff1859039320fa0188cee6c8c45459e38193a22bc1676daee37c
3
  size 4993448880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:374ed8949342602163ff342ab341b7f2396c86df086fc325532b8011637eade8
3
  size 4993448880
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9006daf5d78d90bf22b0c813d9e776720f90a688af4a7bd80cbc7dc7c09b019
3
  size 1180663192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afa46570533cc904339e6f587cf429f834efd131ac812971f1bbe438d6109678
3
  size 1180663192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba2a236d039fcf4f5348b184a304acbdf17c1575f2f90a02947dc19886042b45
3
  size 3095446256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:401fa0df85ea07bc908110453d152ebafc1f512885a204e1dfe675e1f118de6f
3
  size 3095446256
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad76ae298f288aa3230881b9dfb28684fc083c10f565ddf65a09a40de5f4dc0c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80e397e2074aa93e5dab4d712d0187ee9f61d0160362667daac2e2f5af3c9baa
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:318a20b90d087d5ab98fe55e815e26cf3c50d3ca88ad22e2ca83eebaaef1c1d2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88b9dc452a973f46fe6cf13a85f3a4e2b3a33ff360bd385917df50f48d4a73a2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 38.2982234200539,
3
  "best_model_checkpoint": "warmestman/whisper-large-v3-mn-cv-fleurs/checkpoint-2000",
4
- "epoch": 11.976047904191617,
5
  "eval_steps": 1000,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -505,6 +505,255 @@
505
  "eval_steps_per_second": 0.092,
506
  "eval_wer": 38.2982234200539,
507
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
508
  }
509
  ],
510
  "logging_steps": 25,
@@ -512,7 +761,7 @@
512
  "num_input_tokens_seen": 0,
513
  "num_train_epochs": 60,
514
  "save_steps": 1000,
515
- "total_flos": 1.0860782836580352e+20,
516
  "train_batch_size": 16,
517
  "trial_name": null,
518
  "trial_params": null
 
1
  {
2
  "best_metric": 38.2982234200539,
3
  "best_model_checkpoint": "warmestman/whisper-large-v3-mn-cv-fleurs/checkpoint-2000",
4
+ "epoch": 17.964071856287426,
5
  "eval_steps": 1000,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
505
  "eval_steps_per_second": 0.092,
506
  "eval_wer": 38.2982234200539,
507
  "step": 2000
508
+ },
509
+ {
510
+ "epoch": 12.13,
511
+ "learning_rate": 8.394736842105263e-05,
512
+ "loss": 0.0152,
513
+ "step": 2025
514
+ },
515
+ {
516
+ "epoch": 12.28,
517
+ "learning_rate": 8.36842105263158e-05,
518
+ "loss": 0.0127,
519
+ "step": 2050
520
+ },
521
+ {
522
+ "epoch": 12.43,
523
+ "learning_rate": 8.342105263157896e-05,
524
+ "loss": 0.0162,
525
+ "step": 2075
526
+ },
527
+ {
528
+ "epoch": 12.57,
529
+ "learning_rate": 8.315789473684212e-05,
530
+ "loss": 0.016,
531
+ "step": 2100
532
+ },
533
+ {
534
+ "epoch": 12.72,
535
+ "learning_rate": 8.289473684210527e-05,
536
+ "loss": 0.0146,
537
+ "step": 2125
538
+ },
539
+ {
540
+ "epoch": 12.87,
541
+ "learning_rate": 8.263157894736843e-05,
542
+ "loss": 0.0202,
543
+ "step": 2150
544
+ },
545
+ {
546
+ "epoch": 13.02,
547
+ "learning_rate": 8.236842105263158e-05,
548
+ "loss": 0.0208,
549
+ "step": 2175
550
+ },
551
+ {
552
+ "epoch": 13.17,
553
+ "learning_rate": 8.210526315789474e-05,
554
+ "loss": 0.0218,
555
+ "step": 2200
556
+ },
557
+ {
558
+ "epoch": 13.32,
559
+ "learning_rate": 8.18421052631579e-05,
560
+ "loss": 0.0191,
561
+ "step": 2225
562
+ },
563
+ {
564
+ "epoch": 13.47,
565
+ "learning_rate": 8.157894736842105e-05,
566
+ "loss": 0.0182,
567
+ "step": 2250
568
+ },
569
+ {
570
+ "epoch": 13.62,
571
+ "learning_rate": 8.131578947368421e-05,
572
+ "loss": 0.02,
573
+ "step": 2275
574
+ },
575
+ {
576
+ "epoch": 13.77,
577
+ "learning_rate": 8.105263157894737e-05,
578
+ "loss": 0.0227,
579
+ "step": 2300
580
+ },
581
+ {
582
+ "epoch": 13.92,
583
+ "learning_rate": 8.078947368421052e-05,
584
+ "loss": 0.0215,
585
+ "step": 2325
586
+ },
587
+ {
588
+ "epoch": 14.07,
589
+ "learning_rate": 8.052631578947368e-05,
590
+ "loss": 0.0207,
591
+ "step": 2350
592
+ },
593
+ {
594
+ "epoch": 14.22,
595
+ "learning_rate": 8.026315789473685e-05,
596
+ "loss": 0.0174,
597
+ "step": 2375
598
+ },
599
+ {
600
+ "epoch": 14.37,
601
+ "learning_rate": 8e-05,
602
+ "loss": 0.0163,
603
+ "step": 2400
604
+ },
605
+ {
606
+ "epoch": 14.52,
607
+ "learning_rate": 7.973684210526316e-05,
608
+ "loss": 0.0161,
609
+ "step": 2425
610
+ },
611
+ {
612
+ "epoch": 14.67,
613
+ "learning_rate": 7.947368421052632e-05,
614
+ "loss": 0.014,
615
+ "step": 2450
616
+ },
617
+ {
618
+ "epoch": 14.82,
619
+ "learning_rate": 7.921052631578948e-05,
620
+ "loss": 0.0157,
621
+ "step": 2475
622
+ },
623
+ {
624
+ "epoch": 14.97,
625
+ "learning_rate": 7.894736842105263e-05,
626
+ "loss": 0.0171,
627
+ "step": 2500
628
+ },
629
+ {
630
+ "epoch": 15.12,
631
+ "learning_rate": 7.868421052631579e-05,
632
+ "loss": 0.0111,
633
+ "step": 2525
634
+ },
635
+ {
636
+ "epoch": 15.27,
637
+ "learning_rate": 7.842105263157895e-05,
638
+ "loss": 0.0119,
639
+ "step": 2550
640
+ },
641
+ {
642
+ "epoch": 15.42,
643
+ "learning_rate": 7.81578947368421e-05,
644
+ "loss": 0.0136,
645
+ "step": 2575
646
+ },
647
+ {
648
+ "epoch": 15.57,
649
+ "learning_rate": 7.789473684210526e-05,
650
+ "loss": 0.0148,
651
+ "step": 2600
652
+ },
653
+ {
654
+ "epoch": 15.72,
655
+ "learning_rate": 7.763157894736843e-05,
656
+ "loss": 0.0123,
657
+ "step": 2625
658
+ },
659
+ {
660
+ "epoch": 15.87,
661
+ "learning_rate": 7.736842105263159e-05,
662
+ "loss": 0.0108,
663
+ "step": 2650
664
+ },
665
+ {
666
+ "epoch": 16.02,
667
+ "learning_rate": 7.710526315789474e-05,
668
+ "loss": 0.013,
669
+ "step": 2675
670
+ },
671
+ {
672
+ "epoch": 16.17,
673
+ "learning_rate": 7.68421052631579e-05,
674
+ "loss": 0.0106,
675
+ "step": 2700
676
+ },
677
+ {
678
+ "epoch": 16.32,
679
+ "learning_rate": 7.657894736842105e-05,
680
+ "loss": 0.0123,
681
+ "step": 2725
682
+ },
683
+ {
684
+ "epoch": 16.47,
685
+ "learning_rate": 7.631578947368422e-05,
686
+ "loss": 0.0152,
687
+ "step": 2750
688
+ },
689
+ {
690
+ "epoch": 16.62,
691
+ "learning_rate": 7.605263157894738e-05,
692
+ "loss": 0.0126,
693
+ "step": 2775
694
+ },
695
+ {
696
+ "epoch": 16.77,
697
+ "learning_rate": 7.578947368421054e-05,
698
+ "loss": 0.0137,
699
+ "step": 2800
700
+ },
701
+ {
702
+ "epoch": 16.92,
703
+ "learning_rate": 7.55263157894737e-05,
704
+ "loss": 0.0163,
705
+ "step": 2825
706
+ },
707
+ {
708
+ "epoch": 17.07,
709
+ "learning_rate": 7.526315789473685e-05,
710
+ "loss": 0.0132,
711
+ "step": 2850
712
+ },
713
+ {
714
+ "epoch": 17.22,
715
+ "learning_rate": 7.500000000000001e-05,
716
+ "loss": 0.011,
717
+ "step": 2875
718
+ },
719
+ {
720
+ "epoch": 17.37,
721
+ "learning_rate": 7.473684210526316e-05,
722
+ "loss": 0.0094,
723
+ "step": 2900
724
+ },
725
+ {
726
+ "epoch": 17.51,
727
+ "learning_rate": 7.447368421052632e-05,
728
+ "loss": 0.0094,
729
+ "step": 2925
730
+ },
731
+ {
732
+ "epoch": 17.66,
733
+ "learning_rate": 7.421052631578948e-05,
734
+ "loss": 0.0107,
735
+ "step": 2950
736
+ },
737
+ {
738
+ "epoch": 17.81,
739
+ "learning_rate": 7.394736842105263e-05,
740
+ "loss": 0.011,
741
+ "step": 2975
742
+ },
743
+ {
744
+ "epoch": 17.96,
745
+ "learning_rate": 7.368421052631579e-05,
746
+ "loss": 0.012,
747
+ "step": 3000
748
+ },
749
+ {
750
+ "epoch": 17.96,
751
+ "eval_loss": 0.5328223705291748,
752
+ "eval_runtime": 578.3593,
753
+ "eval_samples_per_second": 0.724,
754
+ "eval_steps_per_second": 0.092,
755
+ "eval_wer": 38.54023431054397,
756
+ "step": 3000
757
  }
758
  ],
759
  "logging_steps": 25,
 
761
  "num_input_tokens_seen": 0,
762
  "num_train_epochs": 60,
763
  "save_steps": 1000,
764
+ "total_flos": 1.6290664630124544e+20,
765
  "train_batch_size": 16,
766
  "trial_name": null,
767
  "trial_params": null