Training in progress, step 520, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e50a11a2f59ac5336df1f861c9746e3ea52482c3b25e71230c372afe31efafd
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc1656d9776aa288a9d3c783e91c4974f0bbbcdbed454e7021b914cb6025fb87
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdcc456807bf80d9919c70d11abc1b5d0f53d48b9062054a56c7c3a7e1457747
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18af6c6b5b006201434fd2ba8945bc39086f249834bda28bb96a3935a9852f63
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3628,6 +3628,41 @@
|
|
3628 |
"learning_rate": 9.636948197292052e-05,
|
3629 |
"loss": 0.8313,
|
3630 |
"step": 515
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3631 |
}
|
3632 |
],
|
3633 |
"logging_steps": 1,
|
@@ -3647,7 +3682,7 @@
|
|
3647 |
"attributes": {}
|
3648 |
}
|
3649 |
},
|
3650 |
-
"total_flos": 5.
|
3651 |
"train_batch_size": 4,
|
3652 |
"trial_name": null,
|
3653 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.6729213846651569,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 520,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3628 |
"learning_rate": 9.636948197292052e-05,
|
3629 |
"loss": 0.8313,
|
3630 |
"step": 515
|
3631 |
+
},
|
3632 |
+
{
|
3633 |
+
"epoch": 0.6677450663215788,
|
3634 |
+
"grad_norm": 0.7210227251052856,
|
3635 |
+
"learning_rate": 9.635404285252777e-05,
|
3636 |
+
"loss": 0.8593,
|
3637 |
+
"step": 516
|
3638 |
+
},
|
3639 |
+
{
|
3640 |
+
"epoch": 0.6690391459074733,
|
3641 |
+
"grad_norm": 0.7367059588432312,
|
3642 |
+
"learning_rate": 9.633857221567815e-05,
|
3643 |
+
"loss": 0.9743,
|
3644 |
+
"step": 517
|
3645 |
+
},
|
3646 |
+
{
|
3647 |
+
"epoch": 0.6703332254933678,
|
3648 |
+
"grad_norm": 0.8669779896736145,
|
3649 |
+
"learning_rate": 9.632307007289027e-05,
|
3650 |
+
"loss": 0.8914,
|
3651 |
+
"step": 518
|
3652 |
+
},
|
3653 |
+
{
|
3654 |
+
"epoch": 0.6716273050792624,
|
3655 |
+
"grad_norm": 0.700624406337738,
|
3656 |
+
"learning_rate": 9.630753643470416e-05,
|
3657 |
+
"loss": 0.9032,
|
3658 |
+
"step": 519
|
3659 |
+
},
|
3660 |
+
{
|
3661 |
+
"epoch": 0.6729213846651569,
|
3662 |
+
"grad_norm": 0.81085205078125,
|
3663 |
+
"learning_rate": 9.629197131168124e-05,
|
3664 |
+
"loss": 0.8612,
|
3665 |
+
"step": 520
|
3666 |
}
|
3667 |
],
|
3668 |
"logging_steps": 1,
|
|
|
3682 |
"attributes": {}
|
3683 |
}
|
3684 |
},
|
3685 |
+
"total_flos": 5.813273292806554e+17,
|
3686 |
"train_batch_size": 4,
|
3687 |
"trial_name": null,
|
3688 |
"trial_params": null
|