Training in progress, step 380000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:acfe31078427c2466c04f91ee9954112b94b597697f50ae668465f7dbaa80c33
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:671a639f98cfc5cfe3a76fa4d953a2396bd5fea5c2da42dd392d72e90b22a992
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6efc0dba6eb98b22955214199c73c4905ac919dfd211579a6b4dec17e2f9c75
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6efc0dba6eb98b22955214199c73c4905ac919dfd211579a6b4dec17e2f9c75
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6efc0dba6eb98b22955214199c73c4905ac919dfd211579a6b4dec17e2f9c75
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6efc0dba6eb98b22955214199c73c4905ac919dfd211579a6b4dec17e2f9c75
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6efc0dba6eb98b22955214199c73c4905ac919dfd211579a6b4dec17e2f9c75
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6efc0dba6eb98b22955214199c73c4905ac919dfd211579a6b4dec17e2f9c75
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6efc0dba6eb98b22955214199c73c4905ac919dfd211579a6b4dec17e2f9c75
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6efc0dba6eb98b22955214199c73c4905ac919dfd211579a6b4dec17e2f9c75
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c2047b5f47fc3de929bb0738f7fbdd248300ab063f6fd4eddcabc29f5482852
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 8.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -7406,11 +7406,211 @@
|
|
7406 |
"eval_samples_per_second": 1151.317,
|
7407 |
"eval_steps_per_second": 18.044,
|
7408 |
"step": 370000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7409 |
}
|
7410 |
],
|
7411 |
"max_steps": 500000,
|
7412 |
"num_train_epochs": 12,
|
7413 |
-
"total_flos": 1.
|
7414 |
"trial_name": null,
|
7415 |
"trial_params": null
|
7416 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.476088508208422,
|
5 |
+
"global_step": 380000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
7406 |
"eval_samples_per_second": 1151.317,
|
7407 |
"eval_steps_per_second": 18.044,
|
7408 |
"step": 370000
|
7409 |
+
},
|
7410 |
+
{
|
7411 |
+
"epoch": 8.26,
|
7412 |
+
"learning_rate": 6.0012253629189544e-05,
|
7413 |
+
"loss": 0.261,
|
7414 |
+
"step": 370500
|
7415 |
+
},
|
7416 |
+
{
|
7417 |
+
"epoch": 8.28,
|
7418 |
+
"learning_rate": 5.965048145015944e-05,
|
7419 |
+
"loss": 0.2611,
|
7420 |
+
"step": 371000
|
7421 |
+
},
|
7422 |
+
{
|
7423 |
+
"epoch": 8.28,
|
7424 |
+
"eval_loss": 0.24591800570487976,
|
7425 |
+
"eval_runtime": 1.9624,
|
7426 |
+
"eval_samples_per_second": 1170.514,
|
7427 |
+
"eval_steps_per_second": 18.345,
|
7428 |
+
"step": 371000
|
7429 |
+
},
|
7430 |
+
{
|
7431 |
+
"epoch": 8.29,
|
7432 |
+
"learning_rate": 5.928975199818785e-05,
|
7433 |
+
"loss": 0.2611,
|
7434 |
+
"step": 371500
|
7435 |
+
},
|
7436 |
+
{
|
7437 |
+
"epoch": 8.3,
|
7438 |
+
"learning_rate": 5.893006921815428e-05,
|
7439 |
+
"loss": 0.2608,
|
7440 |
+
"step": 372000
|
7441 |
+
},
|
7442 |
+
{
|
7443 |
+
"epoch": 8.3,
|
7444 |
+
"eval_loss": 0.2432386726140976,
|
7445 |
+
"eval_runtime": 2.0093,
|
7446 |
+
"eval_samples_per_second": 1143.209,
|
7447 |
+
"eval_steps_per_second": 17.917,
|
7448 |
+
"step": 372000
|
7449 |
+
},
|
7450 |
+
{
|
7451 |
+
"epoch": 8.31,
|
7452 |
+
"learning_rate": 5.857143704349198e-05,
|
7453 |
+
"loss": 0.2608,
|
7454 |
+
"step": 372500
|
7455 |
+
},
|
7456 |
+
{
|
7457 |
+
"epoch": 8.32,
|
7458 |
+
"learning_rate": 5.8213859396144986e-05,
|
7459 |
+
"loss": 0.2605,
|
7460 |
+
"step": 373000
|
7461 |
+
},
|
7462 |
+
{
|
7463 |
+
"epoch": 8.32,
|
7464 |
+
"eval_loss": 0.2445555180311203,
|
7465 |
+
"eval_runtime": 1.9525,
|
7466 |
+
"eval_samples_per_second": 1176.451,
|
7467 |
+
"eval_steps_per_second": 18.438,
|
7468 |
+
"step": 373000
|
7469 |
+
},
|
7470 |
+
{
|
7471 |
+
"epoch": 8.33,
|
7472 |
+
"learning_rate": 5.785734018652507e-05,
|
7473 |
+
"loss": 0.2609,
|
7474 |
+
"step": 373500
|
7475 |
+
},
|
7476 |
+
{
|
7477 |
+
"epoch": 8.34,
|
7478 |
+
"learning_rate": 5.750188331346927e-05,
|
7479 |
+
"loss": 0.2609,
|
7480 |
+
"step": 374000
|
7481 |
+
},
|
7482 |
+
{
|
7483 |
+
"epoch": 8.34,
|
7484 |
+
"eval_loss": 0.24143685400485992,
|
7485 |
+
"eval_runtime": 1.9945,
|
7486 |
+
"eval_samples_per_second": 1151.665,
|
7487 |
+
"eval_steps_per_second": 18.05,
|
7488 |
+
"step": 374000
|
7489 |
+
},
|
7490 |
+
{
|
7491 |
+
"epoch": 8.35,
|
7492 |
+
"learning_rate": 5.714749266419695e-05,
|
7493 |
+
"loss": 0.2605,
|
7494 |
+
"step": 374500
|
7495 |
+
},
|
7496 |
+
{
|
7497 |
+
"epoch": 8.36,
|
7498 |
+
"learning_rate": 5.6794172114267566e-05,
|
7499 |
+
"loss": 0.2614,
|
7500 |
+
"step": 375000
|
7501 |
+
},
|
7502 |
+
{
|
7503 |
+
"epoch": 8.36,
|
7504 |
+
"eval_loss": 0.2436528205871582,
|
7505 |
+
"eval_runtime": 1.9628,
|
7506 |
+
"eval_samples_per_second": 1170.242,
|
7507 |
+
"eval_steps_per_second": 18.341,
|
7508 |
+
"step": 375000
|
7509 |
+
},
|
7510 |
+
{
|
7511 |
+
"epoch": 8.38,
|
7512 |
+
"learning_rate": 5.6441925527537914e-05,
|
7513 |
+
"loss": 0.2614,
|
7514 |
+
"step": 375500
|
7515 |
+
},
|
7516 |
+
{
|
7517 |
+
"epoch": 8.39,
|
7518 |
+
"learning_rate": 5.60907567561203e-05,
|
7519 |
+
"loss": 0.2624,
|
7520 |
+
"step": 376000
|
7521 |
+
},
|
7522 |
+
{
|
7523 |
+
"epoch": 8.39,
|
7524 |
+
"eval_loss": 0.24615313112735748,
|
7525 |
+
"eval_runtime": 1.9915,
|
7526 |
+
"eval_samples_per_second": 1153.415,
|
7527 |
+
"eval_steps_per_second": 18.077,
|
7528 |
+
"step": 376000
|
7529 |
+
},
|
7530 |
+
{
|
7531 |
+
"epoch": 8.4,
|
7532 |
+
"learning_rate": 5.574066964034012e-05,
|
7533 |
+
"loss": 0.2614,
|
7534 |
+
"step": 376500
|
7535 |
+
},
|
7536 |
+
{
|
7537 |
+
"epoch": 8.41,
|
7538 |
+
"learning_rate": 5.539166800869402e-05,
|
7539 |
+
"loss": 0.2611,
|
7540 |
+
"step": 377000
|
7541 |
+
},
|
7542 |
+
{
|
7543 |
+
"epoch": 8.41,
|
7544 |
+
"eval_loss": 0.24158482253551483,
|
7545 |
+
"eval_runtime": 1.9896,
|
7546 |
+
"eval_samples_per_second": 1154.526,
|
7547 |
+
"eval_steps_per_second": 18.094,
|
7548 |
+
"step": 377000
|
7549 |
+
},
|
7550 |
+
{
|
7551 |
+
"epoch": 8.42,
|
7552 |
+
"learning_rate": 5.5043755677807955e-05,
|
7553 |
+
"loss": 0.261,
|
7554 |
+
"step": 377500
|
7555 |
+
},
|
7556 |
+
{
|
7557 |
+
"epoch": 8.43,
|
7558 |
+
"learning_rate": 5.4696936452395344e-05,
|
7559 |
+
"loss": 0.2604,
|
7560 |
+
"step": 378000
|
7561 |
+
},
|
7562 |
+
{
|
7563 |
+
"epoch": 8.43,
|
7564 |
+
"eval_loss": 0.2432401180267334,
|
7565 |
+
"eval_runtime": 1.9707,
|
7566 |
+
"eval_samples_per_second": 1165.6,
|
7567 |
+
"eval_steps_per_second": 18.268,
|
7568 |
+
"step": 378000
|
7569 |
+
},
|
7570 |
+
{
|
7571 |
+
"epoch": 8.44,
|
7572 |
+
"learning_rate": 5.435121412521576e-05,
|
7573 |
+
"loss": 0.2604,
|
7574 |
+
"step": 378500
|
7575 |
+
},
|
7576 |
+
{
|
7577 |
+
"epoch": 8.45,
|
7578 |
+
"learning_rate": 5.400659247703307e-05,
|
7579 |
+
"loss": 0.2605,
|
7580 |
+
"step": 379000
|
7581 |
+
},
|
7582 |
+
{
|
7583 |
+
"epoch": 8.45,
|
7584 |
+
"eval_loss": 0.24426017701625824,
|
7585 |
+
"eval_runtime": 2.017,
|
7586 |
+
"eval_samples_per_second": 1138.839,
|
7587 |
+
"eval_steps_per_second": 17.849,
|
7588 |
+
"step": 379000
|
7589 |
+
},
|
7590 |
+
{
|
7591 |
+
"epoch": 8.46,
|
7592 |
+
"learning_rate": 5.36630752765745e-05,
|
7593 |
+
"loss": 0.2605,
|
7594 |
+
"step": 379500
|
7595 |
+
},
|
7596 |
+
{
|
7597 |
+
"epoch": 8.48,
|
7598 |
+
"learning_rate": 5.3320666280489146e-05,
|
7599 |
+
"loss": 0.26,
|
7600 |
+
"step": 380000
|
7601 |
+
},
|
7602 |
+
{
|
7603 |
+
"epoch": 8.48,
|
7604 |
+
"eval_loss": 0.24248941242694855,
|
7605 |
+
"eval_runtime": 2.0075,
|
7606 |
+
"eval_samples_per_second": 1144.188,
|
7607 |
+
"eval_steps_per_second": 17.932,
|
7608 |
+
"step": 380000
|
7609 |
}
|
7610 |
],
|
7611 |
"max_steps": 500000,
|
7612 |
"num_train_epochs": 12,
|
7613 |
+
"total_flos": 1.2140283639339678e+22,
|
7614 |
"trial_name": null,
|
7615 |
"trial_params": null
|
7616 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:671a639f98cfc5cfe3a76fa4d953a2396bd5fea5c2da42dd392d72e90b22a992
|
3 |
size 102501541
|