Training in progress, step 555, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f8e941e5c7588fbc5cab26786881bb5e357ec1562a118646bc389224d60a45a
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d65945072135b85c2df9ab1f2183dceaa4884df6149d34e86ba3b5291e5289d
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2892548ae9c9c42bac9274716154c6f511e3242c31db918587a29bba4aed7b3f
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb205f512e2dbedc6e9149b55b4f69ad4c7795b9c876b3eb5aa8d8b727bca50e
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3873,6 +3873,41 @@
|
|
3873 |
"learning_rate": 9.581043043084259e-05,
|
3874 |
"loss": 0.9006,
|
3875 |
"step": 550
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3876 |
}
|
3877 |
],
|
3878 |
"logging_steps": 1,
|
@@ -3892,7 +3927,7 @@
|
|
3892 |
"attributes": {}
|
3893 |
}
|
3894 |
},
|
3895 |
-
"total_flos": 6.
|
3896 |
"train_batch_size": 4,
|
3897 |
"trial_name": null,
|
3898 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7182141701714655,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 555,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3873 |
"learning_rate": 9.581043043084259e-05,
|
3874 |
"loss": 0.9006,
|
3875 |
"step": 550
|
3876 |
+
},
|
3877 |
+
{
|
3878 |
+
"epoch": 0.7130378518278874,
|
3879 |
+
"grad_norm": 0.7422590255737305,
|
3880 |
+
"learning_rate": 9.579389463284834e-05,
|
3881 |
+
"loss": 0.9591,
|
3882 |
+
"step": 551
|
3883 |
+
},
|
3884 |
+
{
|
3885 |
+
"epoch": 0.7143319314137819,
|
3886 |
+
"grad_norm": 0.7437978982925415,
|
3887 |
+
"learning_rate": 9.577732769924614e-05,
|
3888 |
+
"loss": 0.8823,
|
3889 |
+
"step": 552
|
3890 |
+
},
|
3891 |
+
{
|
3892 |
+
"epoch": 0.7156260109996765,
|
3893 |
+
"grad_norm": 0.7947098016738892,
|
3894 |
+
"learning_rate": 9.576072964129998e-05,
|
3895 |
+
"loss": 0.8161,
|
3896 |
+
"step": 553
|
3897 |
+
},
|
3898 |
+
{
|
3899 |
+
"epoch": 0.716920090585571,
|
3900 |
+
"grad_norm": 0.8041345477104187,
|
3901 |
+
"learning_rate": 9.574410047029502e-05,
|
3902 |
+
"loss": 0.9831,
|
3903 |
+
"step": 554
|
3904 |
+
},
|
3905 |
+
{
|
3906 |
+
"epoch": 0.7182141701714655,
|
3907 |
+
"grad_norm": 0.9462196826934814,
|
3908 |
+
"learning_rate": 9.572744019753752e-05,
|
3909 |
+
"loss": 0.9516,
|
3910 |
+
"step": 555
|
3911 |
}
|
3912 |
],
|
3913 |
"logging_steps": 1,
|
|
|
3927 |
"attributes": {}
|
3928 |
}
|
3929 |
},
|
3930 |
+
"total_flos": 6.204551302899302e+17,
|
3931 |
"train_batch_size": 4,
|
3932 |
"trial_name": null,
|
3933 |
"trial_params": null
|