SaladSlayer00 commited on
Commit
c55c0dd
·
1 Parent(s): f2bcdf8

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1b1c2533249bd83cd4dc058e8c9ac7a86cb809728991da29e3e26d9567e4925
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ce4b8b5a646a747bf1c54d4e55c15da75e5472d5f29c289eff679a65ba3d219
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b92ccd1bf0adff93fc0579be75a0b6901f7257675e1c39f5344bc054c1bfcab5
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:415af2ff1829c3edaf1a17ba6db5d45417fa3e84cc95554423a2869871551398
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ad752088b229d78039d00fa98cec499de1d448da781bc7460fcfe8880b39ae1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:979cd4e52f730e90cd87d98931f6627f47d79df2a678fcfc674113ffa4af0794
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f181a0117afba799e29e5c923c7c87a52316143822f05b0b57dfd50707300da
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ff56efc76c16a3b9a712527179ae61c8d6dfccc7e3a53f8c421d6329adacfbb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 96.06429012981661,
3
  "best_model_checkpoint": "./another_local/checkpoint-1500",
4
- "epoch": 7.987220447284345,
5
  "eval_steps": 500,
6
- "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -652,6 +652,135 @@
652
  "eval_steps_per_second": 0.104,
653
  "eval_wer": 126.81331135380178,
654
  "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
  }
656
  ],
657
  "logging_steps": 25,
@@ -659,7 +788,7 @@
659
  "num_input_tokens_seen": 0,
660
  "num_train_epochs": 13,
661
  "save_steps": 500,
662
- "total_flos": 1.152725523038208e+19,
663
  "trial_name": null,
664
  "trial_params": null
665
  }
 
1
  {
2
  "best_metric": 96.06429012981661,
3
  "best_model_checkpoint": "./another_local/checkpoint-1500",
4
+ "epoch": 9.584664536741213,
5
  "eval_steps": 500,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
652
  "eval_steps_per_second": 0.104,
653
  "eval_wer": 126.81331135380178,
654
  "step": 2500
655
+ },
656
+ {
657
+ "epoch": 8.07,
658
+ "learning_rate": 4.225714285714286e-06,
659
+ "loss": 0.0036,
660
+ "step": 2525
661
+ },
662
+ {
663
+ "epoch": 8.15,
664
+ "learning_rate": 4.154285714285714e-06,
665
+ "loss": 0.0037,
666
+ "step": 2550
667
+ },
668
+ {
669
+ "epoch": 8.23,
670
+ "learning_rate": 4.082857142857143e-06,
671
+ "loss": 0.0025,
672
+ "step": 2575
673
+ },
674
+ {
675
+ "epoch": 8.31,
676
+ "learning_rate": 4.011428571428571e-06,
677
+ "loss": 0.0031,
678
+ "step": 2600
679
+ },
680
+ {
681
+ "epoch": 8.39,
682
+ "learning_rate": 3.94e-06,
683
+ "loss": 0.0022,
684
+ "step": 2625
685
+ },
686
+ {
687
+ "epoch": 8.47,
688
+ "learning_rate": 3.8685714285714286e-06,
689
+ "loss": 0.0034,
690
+ "step": 2650
691
+ },
692
+ {
693
+ "epoch": 8.55,
694
+ "learning_rate": 3.7971428571428576e-06,
695
+ "loss": 0.0021,
696
+ "step": 2675
697
+ },
698
+ {
699
+ "epoch": 8.63,
700
+ "learning_rate": 3.7257142857142857e-06,
701
+ "loss": 0.0032,
702
+ "step": 2700
703
+ },
704
+ {
705
+ "epoch": 8.71,
706
+ "learning_rate": 3.6542857142857148e-06,
707
+ "loss": 0.0028,
708
+ "step": 2725
709
+ },
710
+ {
711
+ "epoch": 8.79,
712
+ "learning_rate": 3.582857142857143e-06,
713
+ "loss": 0.0032,
714
+ "step": 2750
715
+ },
716
+ {
717
+ "epoch": 8.87,
718
+ "learning_rate": 3.511428571428572e-06,
719
+ "loss": 0.0035,
720
+ "step": 2775
721
+ },
722
+ {
723
+ "epoch": 8.95,
724
+ "learning_rate": 3.44e-06,
725
+ "loss": 0.0023,
726
+ "step": 2800
727
+ },
728
+ {
729
+ "epoch": 9.03,
730
+ "learning_rate": 3.3685714285714287e-06,
731
+ "loss": 0.0029,
732
+ "step": 2825
733
+ },
734
+ {
735
+ "epoch": 9.11,
736
+ "learning_rate": 3.2971428571428577e-06,
737
+ "loss": 0.0021,
738
+ "step": 2850
739
+ },
740
+ {
741
+ "epoch": 9.19,
742
+ "learning_rate": 3.225714285714286e-06,
743
+ "loss": 0.0022,
744
+ "step": 2875
745
+ },
746
+ {
747
+ "epoch": 9.27,
748
+ "learning_rate": 3.154285714285715e-06,
749
+ "loss": 0.0057,
750
+ "step": 2900
751
+ },
752
+ {
753
+ "epoch": 9.35,
754
+ "learning_rate": 3.082857142857143e-06,
755
+ "loss": 0.0026,
756
+ "step": 2925
757
+ },
758
+ {
759
+ "epoch": 9.42,
760
+ "learning_rate": 3.0114285714285716e-06,
761
+ "loss": 0.0019,
762
+ "step": 2950
763
+ },
764
+ {
765
+ "epoch": 9.5,
766
+ "learning_rate": 2.9400000000000002e-06,
767
+ "loss": 0.0017,
768
+ "step": 2975
769
+ },
770
+ {
771
+ "epoch": 9.58,
772
+ "learning_rate": 2.868571428571429e-06,
773
+ "loss": 0.0047,
774
+ "step": 3000
775
+ },
776
+ {
777
+ "epoch": 9.58,
778
+ "eval_loss": 0.5272812247276306,
779
+ "eval_runtime": 2446.9046,
780
+ "eval_samples_per_second": 0.817,
781
+ "eval_steps_per_second": 0.102,
782
+ "eval_wer": 133.89655882958996,
783
+ "step": 3000
784
  }
785
  ],
786
  "logging_steps": 25,
 
788
  "num_input_tokens_seen": 0,
789
  "num_train_epochs": 13,
790
  "save_steps": 500,
791
+ "total_flos": 1.383132106653696e+19,
792
  "trial_name": null,
793
  "trial_params": null
794
  }