mattbonnell commited on
Commit
24703c3
·
verified ·
1 Parent(s): 171a67f

Training in progress, step 5000, checkpoint

Browse files
last-checkpoint/global_step5000/mp_rank_00_model_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5467b935ec325fda382ba7be58d9846e82411a637dbf0162457c720f6d2141f6
3
  size 197282509
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:328948a5685842864ea90aeb6e72e35c4fb1984f9ccbe9ebc5cc86a3f280e7ae
3
  size 197282509
last-checkpoint/global_step5000/zero_pp_rank_0_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55aecdf548d0b2fbd926ca370af21b493bf58ed5c47467aac68e118ec7876741
3
  size 180416968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b5640b6a947d8dcd778cda3693cf669f3c9c4f5507f133b19896fd2b4af7b8e
3
  size 180416968
last-checkpoint/global_step5000/zero_pp_rank_1_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9463cda9fd971946f5151307a49742837360fbed2efc585bea470fb9b66bc7ce
3
  size 180416776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22ab85cb6919593e8f3dcb8ba26f9b650e6436dadfa8ca31fb5aec718ffbcf04
3
  size 180416776
last-checkpoint/global_step5000/zero_pp_rank_2_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf82e9de1f5f836e92de1c6a58bc2de7c442a0a178f5e9666537ee9f99ad2179
3
  size 180416776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:504679429bee97de4bf3ece1bc0c64196a54e3efca1e5fe6be84138af136212d
3
  size 180416776
last-checkpoint/global_step5000/zero_pp_rank_3_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c487dd63e0d4140f7eb8f70bd099931a3b2a4f447dd6d9cf793e237458f2d8b
3
  size 180416904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:310a42ec7eeca453fa38e7ef810c5d269004592fcb009002f72fb822411d0a9b
3
  size 180416904
last-checkpoint/global_step5000/zero_pp_rank_4_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8796c462fea248b71f7212bd82a7c45a337ffa2495509f61da21c9ad595ac7b
3
  size 180416712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bc7eec7d93e7bfa621225a3951f0f72babf971d1ca5851365e70a4ef4ee4bbb
3
  size 180416712
last-checkpoint/global_step5000/zero_pp_rank_5_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5522d3140a3edd862bd834ca5fdf5213f11e2aa57d3ff149573973ecbf206f93
3
  size 180417096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92c8b3e85dacd27b251744d96d452e2b5b48a1c6ffa06cb019b24ffb1e96fd61
3
  size 180417096
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step4500
 
1
+ global_step5000
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8bbb1e794b148b1b42f92bc9c4dfc4cd5a2cf32376e01890b4ae777f57465c7
3
  size 188836816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57d4fd4308febebec1afff358cf12ab56256227add3b2957e331f5cd2e0691a4
3
  size 188836816
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:421cd6e4a65ff9c7e93e4b75b9c1c13d3a796a419cd06737593a73ba6fca3b7b
3
  size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3f3878f9de7b43bbfd935e88b0d1db5f297e106844e639e3ee8f61d23b71537
3
  size 15536
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:508a0d515d107151d0948a4d35861fa4137e62d74f00c61e276574c299276fcb
3
- size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08dc69792353f98f3975f2146c88ff60332193593dccd2ba5cecd781b9997d67
3
+ size 15536
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f445b982cca1aeb8d581190f301eefc39b0a1c4ac9055b6f8cdd45d1da30a032
3
  size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3d105c1a1aa212e4c659fcde3507788ba2d1192d170b13840512005d89c43c0
3
  size 15536
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:356c5e4b8181f20ab94fc32d456a51114d201213aeb27294c1576e64f5169e93
3
  size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67b13c4c348b0a86bf4854b80c140b765c791fc26576cf4cc28641b09a92b2c2
3
  size 15536
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a140a4b40ff61742ca719f7ef71c9359203b4a0ab0dbc20a37ad643681b576a0
3
- size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7cca33316c4c01433fe48556b2852d35fc05fe7ebfbd4c36c5bfb4e6f315283
3
+ size 15472
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e43129c492e613eb0252ccbd1ceb0d0167f34d7db7c0297504eaa701b56ef655
3
- size 15600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67a8ec020afb0b24788f834efe44fce3daac72b42efaf574b93244a2db48d417
3
+ size 15536
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5d9765e8c902aa7b57efe625b396d0dc8762d66aa57bdc5ac3096900203736d
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2464b12e1ca0e0289d4336d5d6478dcc4211982d590e1532eea5a3218a3c1d09
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 121.62162162162163,
5
  "eval_steps": 1500,
6
- "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -655,6 +655,76 @@
655
  "eval_steps_per_second": 0.286,
656
  "eval_wer": 0.18653152196985778,
657
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658
  }
659
  ],
660
  "logging_steps": 50,
@@ -674,7 +744,7 @@
674
  "attributes": {}
675
  }
676
  },
677
- "total_flos": 1.2632422256120483e+20,
678
  "train_batch_size": 64,
679
  "trial_name": null,
680
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 135.13513513513513,
5
  "eval_steps": 1500,
6
+ "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
655
  "eval_steps_per_second": 0.286,
656
  "eval_wer": 0.18653152196985778,
657
  "step": 4500
658
+ },
659
+ {
660
+ "epoch": 122.97297297297297,
661
+ "grad_norm": 0.5193214416503906,
662
+ "learning_rate": 0.0001,
663
+ "loss": 0.0361,
664
+ "step": 4550
665
+ },
666
+ {
667
+ "epoch": 124.32432432432432,
668
+ "grad_norm": 0.3041287362575531,
669
+ "learning_rate": 0.0001,
670
+ "loss": 0.0335,
671
+ "step": 4600
672
+ },
673
+ {
674
+ "epoch": 125.67567567567568,
675
+ "grad_norm": 0.44249922037124634,
676
+ "learning_rate": 0.0001,
677
+ "loss": 0.034,
678
+ "step": 4650
679
+ },
680
+ {
681
+ "epoch": 127.02702702702703,
682
+ "grad_norm": 0.357164204120636,
683
+ "learning_rate": 0.0001,
684
+ "loss": 0.0326,
685
+ "step": 4700
686
+ },
687
+ {
688
+ "epoch": 128.3783783783784,
689
+ "grad_norm": 0.30578091740608215,
690
+ "learning_rate": 0.0001,
691
+ "loss": 0.0307,
692
+ "step": 4750
693
+ },
694
+ {
695
+ "epoch": 129.72972972972974,
696
+ "grad_norm": 0.4774022102355957,
697
+ "learning_rate": 0.0001,
698
+ "loss": 0.0321,
699
+ "step": 4800
700
+ },
701
+ {
702
+ "epoch": 131.0810810810811,
703
+ "grad_norm": 0.3393169343471527,
704
+ "learning_rate": 0.0001,
705
+ "loss": 0.0336,
706
+ "step": 4850
707
+ },
708
+ {
709
+ "epoch": 132.43243243243242,
710
+ "grad_norm": 0.42481565475463867,
711
+ "learning_rate": 0.0001,
712
+ "loss": 0.0317,
713
+ "step": 4900
714
+ },
715
+ {
716
+ "epoch": 133.78378378378378,
717
+ "grad_norm": 0.45170778036117554,
718
+ "learning_rate": 0.0001,
719
+ "loss": 0.0309,
720
+ "step": 4950
721
+ },
722
+ {
723
+ "epoch": 135.13513513513513,
724
+ "grad_norm": 0.44404086470603943,
725
+ "learning_rate": 0.0001,
726
+ "loss": 0.0331,
727
+ "step": 5000
728
  }
729
  ],
730
  "logging_steps": 50,
 
744
  "attributes": {}
745
  }
746
  },
747
+ "total_flos": 1.4035163536540303e+20,
748
  "train_batch_size": 64,
749
  "trial_name": null,
750
  "trial_params": null