Training in progress, step 5000, checkpoint
Browse files- last-checkpoint/global_step5000/mp_rank_00_model_states.pt +1 -1
- last-checkpoint/global_step5000/zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
- last-checkpoint/global_step5000/zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
- last-checkpoint/global_step5000/zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
- last-checkpoint/global_step5000/zero_pp_rank_3_mp_rank_00_optim_states.pt +1 -1
- last-checkpoint/global_step5000/zero_pp_rank_4_mp_rank_00_optim_states.pt +1 -1
- last-checkpoint/global_step5000/zero_pp_rank_5_mp_rank_00_optim_states.pt +1 -1
- last-checkpoint/latest +1 -1
- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +2 -2
- last-checkpoint/rng_state_5.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +73 -3
last-checkpoint/global_step5000/mp_rank_00_model_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 197282509
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:328948a5685842864ea90aeb6e72e35c4fb1984f9ccbe9ebc5cc86a3f280e7ae
|
3 |
size 197282509
|
last-checkpoint/global_step5000/zero_pp_rank_0_mp_rank_00_optim_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180416968
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b5640b6a947d8dcd778cda3693cf669f3c9c4f5507f133b19896fd2b4af7b8e
|
3 |
size 180416968
|
last-checkpoint/global_step5000/zero_pp_rank_1_mp_rank_00_optim_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180416776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22ab85cb6919593e8f3dcb8ba26f9b650e6436dadfa8ca31fb5aec718ffbcf04
|
3 |
size 180416776
|
last-checkpoint/global_step5000/zero_pp_rank_2_mp_rank_00_optim_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180416776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:504679429bee97de4bf3ece1bc0c64196a54e3efca1e5fe6be84138af136212d
|
3 |
size 180416776
|
last-checkpoint/global_step5000/zero_pp_rank_3_mp_rank_00_optim_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180416904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:310a42ec7eeca453fa38e7ef810c5d269004592fcb009002f72fb822411d0a9b
|
3 |
size 180416904
|
last-checkpoint/global_step5000/zero_pp_rank_4_mp_rank_00_optim_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180416712
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8bc7eec7d93e7bfa621225a3951f0f72babf971d1ca5851365e70a4ef4ee4bbb
|
3 |
size 180416712
|
last-checkpoint/global_step5000/zero_pp_rank_5_mp_rank_00_optim_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180417096
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92c8b3e85dacd27b251744d96d452e2b5b48a1c6ffa06cb019b24ffb1e96fd61
|
3 |
size 180417096
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step5000
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 188836816
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57d4fd4308febebec1afff358cf12ab56256227add3b2957e331f5cd2e0691a4
|
3 |
size 188836816
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3f3878f9de7b43bbfd935e88b0d1db5f297e106844e639e3ee8f61d23b71537
|
3 |
size 15536
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08dc69792353f98f3975f2146c88ff60332193593dccd2ba5cecd781b9997d67
|
3 |
+
size 15536
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3d105c1a1aa212e4c659fcde3507788ba2d1192d170b13840512005d89c43c0
|
3 |
size 15536
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67b13c4c348b0a86bf4854b80c140b765c791fc26576cf4cc28641b09a92b2c2
|
3 |
size 15536
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7cca33316c4c01433fe48556b2852d35fc05fe7ebfbd4c36c5bfb4e6f315283
|
3 |
+
size 15472
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67a8ec020afb0b24788f834efe44fce3daac72b42efaf574b93244a2db48d417
|
3 |
+
size 15536
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2464b12e1ca0e0289d4336d5d6478dcc4211982d590e1532eea5a3218a3c1d09
|
3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 1500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -655,6 +655,76 @@
|
|
655 |
"eval_steps_per_second": 0.286,
|
656 |
"eval_wer": 0.18653152196985778,
|
657 |
"step": 4500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
658 |
}
|
659 |
],
|
660 |
"logging_steps": 50,
|
@@ -674,7 +744,7 @@
|
|
674 |
"attributes": {}
|
675 |
}
|
676 |
},
|
677 |
-
"total_flos": 1.
|
678 |
"train_batch_size": 64,
|
679 |
"trial_name": null,
|
680 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 135.13513513513513,
|
5 |
"eval_steps": 1500,
|
6 |
+
"global_step": 5000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
655 |
"eval_steps_per_second": 0.286,
|
656 |
"eval_wer": 0.18653152196985778,
|
657 |
"step": 4500
|
658 |
+
},
|
659 |
+
{
|
660 |
+
"epoch": 122.97297297297297,
|
661 |
+
"grad_norm": 0.5193214416503906,
|
662 |
+
"learning_rate": 0.0001,
|
663 |
+
"loss": 0.0361,
|
664 |
+
"step": 4550
|
665 |
+
},
|
666 |
+
{
|
667 |
+
"epoch": 124.32432432432432,
|
668 |
+
"grad_norm": 0.3041287362575531,
|
669 |
+
"learning_rate": 0.0001,
|
670 |
+
"loss": 0.0335,
|
671 |
+
"step": 4600
|
672 |
+
},
|
673 |
+
{
|
674 |
+
"epoch": 125.67567567567568,
|
675 |
+
"grad_norm": 0.44249922037124634,
|
676 |
+
"learning_rate": 0.0001,
|
677 |
+
"loss": 0.034,
|
678 |
+
"step": 4650
|
679 |
+
},
|
680 |
+
{
|
681 |
+
"epoch": 127.02702702702703,
|
682 |
+
"grad_norm": 0.357164204120636,
|
683 |
+
"learning_rate": 0.0001,
|
684 |
+
"loss": 0.0326,
|
685 |
+
"step": 4700
|
686 |
+
},
|
687 |
+
{
|
688 |
+
"epoch": 128.3783783783784,
|
689 |
+
"grad_norm": 0.30578091740608215,
|
690 |
+
"learning_rate": 0.0001,
|
691 |
+
"loss": 0.0307,
|
692 |
+
"step": 4750
|
693 |
+
},
|
694 |
+
{
|
695 |
+
"epoch": 129.72972972972974,
|
696 |
+
"grad_norm": 0.4774022102355957,
|
697 |
+
"learning_rate": 0.0001,
|
698 |
+
"loss": 0.0321,
|
699 |
+
"step": 4800
|
700 |
+
},
|
701 |
+
{
|
702 |
+
"epoch": 131.0810810810811,
|
703 |
+
"grad_norm": 0.3393169343471527,
|
704 |
+
"learning_rate": 0.0001,
|
705 |
+
"loss": 0.0336,
|
706 |
+
"step": 4850
|
707 |
+
},
|
708 |
+
{
|
709 |
+
"epoch": 132.43243243243242,
|
710 |
+
"grad_norm": 0.42481565475463867,
|
711 |
+
"learning_rate": 0.0001,
|
712 |
+
"loss": 0.0317,
|
713 |
+
"step": 4900
|
714 |
+
},
|
715 |
+
{
|
716 |
+
"epoch": 133.78378378378378,
|
717 |
+
"grad_norm": 0.45170778036117554,
|
718 |
+
"learning_rate": 0.0001,
|
719 |
+
"loss": 0.0309,
|
720 |
+
"step": 4950
|
721 |
+
},
|
722 |
+
{
|
723 |
+
"epoch": 135.13513513513513,
|
724 |
+
"grad_norm": 0.44404086470603943,
|
725 |
+
"learning_rate": 0.0001,
|
726 |
+
"loss": 0.0331,
|
727 |
+
"step": 5000
|
728 |
}
|
729 |
],
|
730 |
"logging_steps": 50,
|
|
|
744 |
"attributes": {}
|
745 |
}
|
746 |
},
|
747 |
+
"total_flos": 1.4035163536540303e+20,
|
748 |
"train_batch_size": 64,
|
749 |
"trial_name": null,
|
750 |
"trial_params": null
|