Training in progress, step 14000, checkpoint
Browse files- last-checkpoint/global_step14000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step14000/zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step14000/zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step14000/zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step14000/zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step14000/zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step14000/zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/rng_state_4.pth +2 -2
- last-checkpoint/rng_state_5.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +73 -3
last-checkpoint/global_step14000/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02bebdd5de7c1d9d90fd26218dd96282e121d09b9159d013a3569c5ae2c27fb4
|
3 |
+
size 197282509
|
last-checkpoint/global_step14000/zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32ae4fc9e42b351442985cc36fc81d83793b3e473a6f6865a668706310202596
|
3 |
+
size 180416968
|
last-checkpoint/global_step14000/zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22e28d2d81cadfbfa12f1daa7195939246db3a1882547be253c1065d67b53a35
|
3 |
+
size 180416776
|
last-checkpoint/global_step14000/zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2177a4547d04661db0774eed72bb7cb123a6920000ee365b2db13d38253268a
|
3 |
+
size 180416776
|
last-checkpoint/global_step14000/zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5df125d700c5947f620d5a131df8d5d4a4008f3e5cef4ffb68cae3654ce17846
|
3 |
+
size 180416904
|
last-checkpoint/global_step14000/zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c6054940157be9517d6c0fb4a1b3db1fd611b318ec63baaec52c87627def3b2
|
3 |
+
size 180416712
|
last-checkpoint/global_step14000/zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fba5f1db40a076f08bcec8c4e7a63747bfc2c5b8fae2ba648851d7414b9049d
|
3 |
+
size 180417096
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step14000
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 188836816
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc76e91f7c430955a9c99095113fcd1babc25289a48cfc4ca0d735607be3435b
|
3 |
size 188836816
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1693ed0aa550d028c5296d44f56e801f84a1280db6a97f42dfe34e34110ae4f
|
3 |
+
size 15536
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15472
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11ff77e693e531050a6d1812f76b9871e17f20f68e215508928ca64785e5251e
|
3 |
size 15472
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d62b18b7eb0aded1dd39102c6afe436923d8a04ed3a046654b037fa2fdb91a70
|
3 |
size 15536
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a7769491a119bd1e33a2abce5162152cd680e687dbe03d7959eadbc1484228e
|
3 |
+
size 15536
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be5c0e12d74adafdf73a4301f0fd601009322deaaa04b1bdc85c19322d09712b
|
3 |
+
size 15536
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a6fbc22976e4200d2dfe699e869535d064502235a2c6d93f138d76d6ce8e9c2
|
3 |
+
size 15472
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bcf66ac1e7571a4edc6b80653947ebcc0595575727313ab43eebe76504b10ff
|
3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 1500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1969,6 +1969,76 @@
|
|
1969 |
"eval_steps_per_second": 0.286,
|
1970 |
"eval_wer": 0.20101889195499895,
|
1971 |
"step": 13500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1972 |
}
|
1973 |
],
|
1974 |
"logging_steps": 50,
|
@@ -1988,7 +2058,7 @@
|
|
1988 |
"attributes": {}
|
1989 |
}
|
1990 |
},
|
1991 |
-
"total_flos": 3.
|
1992 |
"train_batch_size": 64,
|
1993 |
"trial_name": null,
|
1994 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 378.3783783783784,
|
5 |
"eval_steps": 1500,
|
6 |
+
"global_step": 14000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1969 |
"eval_steps_per_second": 0.286,
|
1970 |
"eval_wer": 0.20101889195499895,
|
1971 |
"step": 13500
|
1972 |
+
},
|
1973 |
+
{
|
1974 |
+
"epoch": 366.2162162162162,
|
1975 |
+
"grad_norm": 0.2017332762479782,
|
1976 |
+
"learning_rate": 0.0001,
|
1977 |
+
"loss": 0.0134,
|
1978 |
+
"step": 13550
|
1979 |
+
},
|
1980 |
+
{
|
1981 |
+
"epoch": 367.56756756756755,
|
1982 |
+
"grad_norm": 0.26539239287376404,
|
1983 |
+
"learning_rate": 0.0001,
|
1984 |
+
"loss": 0.0148,
|
1985 |
+
"step": 13600
|
1986 |
+
},
|
1987 |
+
{
|
1988 |
+
"epoch": 368.9189189189189,
|
1989 |
+
"grad_norm": 0.2736688256263733,
|
1990 |
+
"learning_rate": 0.0001,
|
1991 |
+
"loss": 0.0148,
|
1992 |
+
"step": 13650
|
1993 |
+
},
|
1994 |
+
{
|
1995 |
+
"epoch": 370.27027027027026,
|
1996 |
+
"grad_norm": 0.28902319073677063,
|
1997 |
+
"learning_rate": 0.0001,
|
1998 |
+
"loss": 0.0137,
|
1999 |
+
"step": 13700
|
2000 |
+
},
|
2001 |
+
{
|
2002 |
+
"epoch": 371.6216216216216,
|
2003 |
+
"grad_norm": 0.1861814558506012,
|
2004 |
+
"learning_rate": 0.0001,
|
2005 |
+
"loss": 0.0132,
|
2006 |
+
"step": 13750
|
2007 |
+
},
|
2008 |
+
{
|
2009 |
+
"epoch": 372.97297297297297,
|
2010 |
+
"grad_norm": 0.2393738031387329,
|
2011 |
+
"learning_rate": 0.0001,
|
2012 |
+
"loss": 0.0125,
|
2013 |
+
"step": 13800
|
2014 |
+
},
|
2015 |
+
{
|
2016 |
+
"epoch": 374.3243243243243,
|
2017 |
+
"grad_norm": 0.3993573486804962,
|
2018 |
+
"learning_rate": 0.0001,
|
2019 |
+
"loss": 0.0125,
|
2020 |
+
"step": 13850
|
2021 |
+
},
|
2022 |
+
{
|
2023 |
+
"epoch": 375.6756756756757,
|
2024 |
+
"grad_norm": 0.3024432361125946,
|
2025 |
+
"learning_rate": 0.0001,
|
2026 |
+
"loss": 0.0134,
|
2027 |
+
"step": 13900
|
2028 |
+
},
|
2029 |
+
{
|
2030 |
+
"epoch": 377.02702702702703,
|
2031 |
+
"grad_norm": 0.35330072045326233,
|
2032 |
+
"learning_rate": 0.0001,
|
2033 |
+
"loss": 0.0143,
|
2034 |
+
"step": 13950
|
2035 |
+
},
|
2036 |
+
{
|
2037 |
+
"epoch": 378.3783783783784,
|
2038 |
+
"grad_norm": 0.21859917044639587,
|
2039 |
+
"learning_rate": 0.0001,
|
2040 |
+
"loss": 0.014,
|
2041 |
+
"step": 14000
|
2042 |
}
|
2043 |
],
|
2044 |
"logging_steps": 50,
|
|
|
2058 |
"attributes": {}
|
2059 |
}
|
2060 |
},
|
2061 |
+
"total_flos": 3.929919945418631e+20,
|
2062 |
"train_batch_size": 64,
|
2063 |
"trial_name": null,
|
2064 |
"trial_params": null
|