Training in progress, step 550, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc223b1f7a1f91499c6fce1d9c7b9777a001cfe758ed08b366efeaf34c67c8e2
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3d3f7959fdeb60dff6f4ccbb618b2866aed60776f61dc00bb8c8a81aaf08a53
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a68ff03bce9d2cb56819b178dca5e85d0900211ca081621ebca96dbb0b14518a
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a885f4d2be962a847ad04a40c0900fb3136769296a180ac63536e4bcec83f515
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3838,6 +3838,41 @@
|
|
3838 |
"learning_rate": 9.589264199393776e-05,
|
3839 |
"loss": 1.0543,
|
3840 |
"step": 545
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3841 |
}
|
3842 |
],
|
3843 |
"logging_steps": 1,
|
@@ -3857,7 +3892,7 @@
|
|
3857 |
"attributes": {}
|
3858 |
}
|
3859 |
},
|
3860 |
-
"total_flos": 6.
|
3861 |
"train_batch_size": 4,
|
3862 |
"trial_name": null,
|
3863 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7117437722419929,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 550,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3838 |
"learning_rate": 9.589264199393776e-05,
|
3839 |
"loss": 1.0543,
|
3840 |
"step": 545
|
3841 |
+
},
|
3842 |
+
{
|
3843 |
+
"epoch": 0.7065674538984148,
|
3844 |
+
"grad_norm": 0.7370772361755371,
|
3845 |
+
"learning_rate": 9.587626204220171e-05,
|
3846 |
+
"loss": 0.8853,
|
3847 |
+
"step": 546
|
3848 |
+
},
|
3849 |
+
{
|
3850 |
+
"epoch": 0.7078615334843092,
|
3851 |
+
"grad_norm": 0.7535960674285889,
|
3852 |
+
"learning_rate": 9.585985089885552e-05,
|
3853 |
+
"loss": 0.8419,
|
3854 |
+
"step": 547
|
3855 |
+
},
|
3856 |
+
{
|
3857 |
+
"epoch": 0.7091556130702038,
|
3858 |
+
"grad_norm": 0.7049685716629028,
|
3859 |
+
"learning_rate": 9.584340857505722e-05,
|
3860 |
+
"loss": 0.8282,
|
3861 |
+
"step": 548
|
3862 |
+
},
|
3863 |
+
{
|
3864 |
+
"epoch": 0.7104496926560984,
|
3865 |
+
"grad_norm": 0.7157800197601318,
|
3866 |
+
"learning_rate": 9.58269350819861e-05,
|
3867 |
+
"loss": 0.8282,
|
3868 |
+
"step": 549
|
3869 |
+
},
|
3870 |
+
{
|
3871 |
+
"epoch": 0.7117437722419929,
|
3872 |
+
"grad_norm": 0.7660270929336548,
|
3873 |
+
"learning_rate": 9.581043043084259e-05,
|
3874 |
+
"loss": 0.9006,
|
3875 |
+
"step": 550
|
3876 |
}
|
3877 |
],
|
3878 |
"logging_steps": 1,
|
|
|
3892 |
"attributes": {}
|
3893 |
}
|
3894 |
},
|
3895 |
+
"total_flos": 6.148654444314624e+17,
|
3896 |
"train_batch_size": 4,
|
3897 |
"trial_name": null,
|
3898 |
"trial_params": null
|