Training in progress, step 16000, checkpoint
Browse files- last-checkpoint/global_step16000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step16000/zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step16000/zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step16000/zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step16000/zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step16000/zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step16000/zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +73 -3
last-checkpoint/global_step16000/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74a25234c1f780e15e83309158ecb07c49c8c3351f6b3d5ebfbdf10c3d34478a
|
3 |
+
size 197282509
|
last-checkpoint/global_step16000/zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b74f02b0b5a6b6d5843ca46bc53062ab759eed1584bc29a69cffa6a1736ec15
|
3 |
+
size 180416968
|
last-checkpoint/global_step16000/zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ff92b38172cd635a23c777b6b6eefb0da4d30754fdf34e7d0630ddca213f14e
|
3 |
+
size 180416776
|
last-checkpoint/global_step16000/zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11febf62c9f73266c25236ab50dfab41409f97bdb56adbf4d4edf021564aa534
|
3 |
+
size 180416776
|
last-checkpoint/global_step16000/zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05e31ccf4cfa0c45c8d9396cff77752649a49b0f35441d5d054bece6e2267085
|
3 |
+
size 180416904
|
last-checkpoint/global_step16000/zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b1074e1d46fa74656185ee0b7b31dcbb97450cf0590305861d259d6c3219803
|
3 |
+
size 180416712
|
last-checkpoint/global_step16000/zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cd2bae6b2bd55970d2a0ec1ce3814d6bc32e58d931c3bb34290e3f28f220dab
|
3 |
+
size 180417096
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step16000
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 188836816
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86cdbcf702f36b103531085178dbb194b5ed24a617c693786569c5f06a28a997
|
3 |
size 188836816
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1762a051d2ef63782191c6ddb670046fd603a79f169ade2a20e3ec7968d73ae2
|
3 |
size 15536
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:528307c815a7fc3ae0e940360ffd3bbc2afb3e407b6c50c9bc322b05f89b5a94
|
3 |
size 15536
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94b4c77667b44a108f2c4524ecb78b00f15dea830fc559e3dd27f09695096d4e
|
3 |
size 15536
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2af1fcdccef02a571531525d33c8c2ab59e4a0d3036f87ff6b31fa9cbedb4da
|
3 |
size 15536
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89c6761be70409b46c72197709a9276709a6e1e13c93978aa3d44a91c5dd5845
|
3 |
size 15536
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ecd7e3c5995ac03f69428fef9fdfff61483fb597ae67d40f95f01eb5737db5e4
|
3 |
+
size 15536
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90507ecd8d24438d3d06f3d874381f825e91ab823f24d3428100127026ea819d
|
3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 1500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2258,6 +2258,76 @@
|
|
2258 |
"learning_rate": 0.0001,
|
2259 |
"loss": 0.0129,
|
2260 |
"step": 15500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2261 |
}
|
2262 |
],
|
2263 |
"logging_steps": 50,
|
@@ -2277,7 +2347,7 @@
|
|
2277 |
"attributes": {}
|
2278 |
}
|
2279 |
},
|
2280 |
-
"total_flos": 4.
|
2281 |
"train_batch_size": 64,
|
2282 |
"trial_name": null,
|
2283 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 432.43243243243245,
|
5 |
"eval_steps": 1500,
|
6 |
+
"global_step": 16000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2258 |
"learning_rate": 0.0001,
|
2259 |
"loss": 0.0129,
|
2260 |
"step": 15500
|
2261 |
+
},
|
2262 |
+
{
|
2263 |
+
"epoch": 420.27027027027026,
|
2264 |
+
"grad_norm": 0.35932862758636475,
|
2265 |
+
"learning_rate": 0.0001,
|
2266 |
+
"loss": 0.0133,
|
2267 |
+
"step": 15550
|
2268 |
+
},
|
2269 |
+
{
|
2270 |
+
"epoch": 421.6216216216216,
|
2271 |
+
"grad_norm": 0.20093189179897308,
|
2272 |
+
"learning_rate": 0.0001,
|
2273 |
+
"loss": 0.0126,
|
2274 |
+
"step": 15600
|
2275 |
+
},
|
2276 |
+
{
|
2277 |
+
"epoch": 422.97297297297297,
|
2278 |
+
"grad_norm": 0.32909420132637024,
|
2279 |
+
"learning_rate": 0.0001,
|
2280 |
+
"loss": 0.0128,
|
2281 |
+
"step": 15650
|
2282 |
+
},
|
2283 |
+
{
|
2284 |
+
"epoch": 424.3243243243243,
|
2285 |
+
"grad_norm": 0.28278329968452454,
|
2286 |
+
"learning_rate": 0.0001,
|
2287 |
+
"loss": 0.0117,
|
2288 |
+
"step": 15700
|
2289 |
+
},
|
2290 |
+
{
|
2291 |
+
"epoch": 425.6756756756757,
|
2292 |
+
"grad_norm": 0.1597350388765335,
|
2293 |
+
"learning_rate": 0.0001,
|
2294 |
+
"loss": 0.0119,
|
2295 |
+
"step": 15750
|
2296 |
+
},
|
2297 |
+
{
|
2298 |
+
"epoch": 427.02702702702703,
|
2299 |
+
"grad_norm": 0.20241086184978485,
|
2300 |
+
"learning_rate": 0.0001,
|
2301 |
+
"loss": 0.0114,
|
2302 |
+
"step": 15800
|
2303 |
+
},
|
2304 |
+
{
|
2305 |
+
"epoch": 428.3783783783784,
|
2306 |
+
"grad_norm": 0.24632301926612854,
|
2307 |
+
"learning_rate": 0.0001,
|
2308 |
+
"loss": 0.0115,
|
2309 |
+
"step": 15850
|
2310 |
+
},
|
2311 |
+
{
|
2312 |
+
"epoch": 429.72972972972974,
|
2313 |
+
"grad_norm": 0.36104726791381836,
|
2314 |
+
"learning_rate": 0.0001,
|
2315 |
+
"loss": 0.0114,
|
2316 |
+
"step": 15900
|
2317 |
+
},
|
2318 |
+
{
|
2319 |
+
"epoch": 431.0810810810811,
|
2320 |
+
"grad_norm": 0.23273630440235138,
|
2321 |
+
"learning_rate": 0.0001,
|
2322 |
+
"loss": 0.0115,
|
2323 |
+
"step": 15950
|
2324 |
+
},
|
2325 |
+
{
|
2326 |
+
"epoch": 432.43243243243245,
|
2327 |
+
"grad_norm": 0.2528134882450104,
|
2328 |
+
"learning_rate": 0.0001,
|
2329 |
+
"loss": 0.0113,
|
2330 |
+
"step": 16000
|
2331 |
}
|
2332 |
],
|
2333 |
"logging_steps": 50,
|
|
|
2347 |
"attributes": {}
|
2348 |
}
|
2349 |
},
|
2350 |
+
"total_flos": 4.491310429887309e+20,
|
2351 |
"train_batch_size": 64,
|
2352 |
"trial_name": null,
|
2353 |
"trial_params": null
|