Training in progress, step 560, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdcb8e5e7c548a56cd0a35a2c9bb10ad791d3828eed0c063e5a6d352374c517d
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61a19135443e44dedc426550bf9883c5626a874a708b9a836625a70bf1db005e
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce764d34a7f8f42ea0e1eeb92ce016727a74857ee03f898e45301c13248fe7f7
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5863424f0312bb85e67d22274e1da6de21abafad54b7c797160a5bf5abd89083
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3908,6 +3908,41 @@
|
|
3908 |
"learning_rate": 9.572744019753752e-05,
|
3909 |
"loss": 0.9516,
|
3910 |
"step": 555
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3911 |
}
|
3912 |
],
|
3913 |
"logging_steps": 1,
|
@@ -3927,7 +3962,7 @@
|
|
3927 |
"attributes": {}
|
3928 |
}
|
3929 |
},
|
3930 |
-
"total_flos": 6.
|
3931 |
"train_batch_size": 4,
|
3932 |
"trial_name": null,
|
3933 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7246845681009382,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 560,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3908 |
"learning_rate": 9.572744019753752e-05,
|
3909 |
"loss": 0.9516,
|
3910 |
"step": 555
|
3911 |
+
},
|
3912 |
+
{
|
3913 |
+
"epoch": 0.71950824975736,
|
3914 |
+
"grad_norm": 0.7633935213088989,
|
3915 |
+
"learning_rate": 9.571074883435496e-05,
|
3916 |
+
"loss": 0.8415,
|
3917 |
+
"step": 556
|
3918 |
+
},
|
3919 |
+
{
|
3920 |
+
"epoch": 0.7208023293432546,
|
3921 |
+
"grad_norm": 0.7996200919151306,
|
3922 |
+
"learning_rate": 9.569402639209589e-05,
|
3923 |
+
"loss": 0.9336,
|
3924 |
+
"step": 557
|
3925 |
+
},
|
3926 |
+
{
|
3927 |
+
"epoch": 0.7220964089291492,
|
3928 |
+
"grad_norm": 0.879639208316803,
|
3929 |
+
"learning_rate": 9.567727288213005e-05,
|
3930 |
+
"loss": 0.9479,
|
3931 |
+
"step": 558
|
3932 |
+
},
|
3933 |
+
{
|
3934 |
+
"epoch": 0.7233904885150437,
|
3935 |
+
"grad_norm": 0.7905813455581665,
|
3936 |
+
"learning_rate": 9.566048831584826e-05,
|
3937 |
+
"loss": 0.7692,
|
3938 |
+
"step": 559
|
3939 |
+
},
|
3940 |
+
{
|
3941 |
+
"epoch": 0.7246845681009382,
|
3942 |
+
"grad_norm": 0.7934820055961609,
|
3943 |
+
"learning_rate": 9.564367270466247e-05,
|
3944 |
+
"loss": 0.8735,
|
3945 |
+
"step": 560
|
3946 |
}
|
3947 |
],
|
3948 |
"logging_steps": 1,
|
|
|
3962 |
"attributes": {}
|
3963 |
}
|
3964 |
},
|
3965 |
+
"total_flos": 6.260448161483981e+17,
|
3966 |
"train_batch_size": 4,
|
3967 |
"trial_name": null,
|
3968 |
"trial_params": null
|