gabrielaltay
commited on
Commit
•
c2bb426
1
Parent(s):
b1cea36
Training in progress, step 8256, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 439648328
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8555c6c30b01b7b518e204262ad49bdfc8a647ffac30864d51ee8c8057b5b58b
|
3 |
size 439648328
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 879415866
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c65419cfd6a880659ccadbe3db88894c9a0bc93ac16d9c1b7a0c6cf2cbc2b395
|
3 |
size 879415866
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5528a8a5438254c67bb6f375f3876eeca26717fef489265e3b041c5387c9fb8f
|
3 |
size 14512
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc1d87868b3d95ab9fb053bc3e7b7216c1360a2d6ef559d5a4f71fdb1eb48e41
|
3 |
size 14512
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c699c45754dba9f295f88b976126b3ed2ecc4605b1af134d5e1f2b88049fd75b
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -12649,6 +12649,1812 @@
|
|
12649 |
"learning_rate": 1.4979639325189065e-05,
|
12650 |
"loss": 5.2557,
|
12651 |
"step": 7224
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12652 |
}
|
12653 |
],
|
12654 |
"logging_steps": 4,
|
@@ -12656,7 +14462,7 @@
|
|
12656 |
"num_input_tokens_seen": 0,
|
12657 |
"num_train_epochs": 1,
|
12658 |
"save_steps": 1032,
|
12659 |
-
"total_flos": 6.
|
12660 |
"train_batch_size": 8,
|
12661 |
"trial_name": null,
|
12662 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8004653868528214,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 8256,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
12649 |
"learning_rate": 1.4979639325189065e-05,
|
12650 |
"loss": 5.2557,
|
12651 |
"step": 7224
|
12652 |
+
},
|
12653 |
+
{
|
12654 |
+
"epoch": 0.7,
|
12655 |
+
"grad_norm": 1.0959160327911377,
|
12656 |
+
"learning_rate": 1.4960248206321506e-05,
|
12657 |
+
"loss": 5.2666,
|
12658 |
+
"step": 7228
|
12659 |
+
},
|
12660 |
+
{
|
12661 |
+
"epoch": 0.7,
|
12662 |
+
"grad_norm": 1.0157089233398438,
|
12663 |
+
"learning_rate": 1.4940857087453946e-05,
|
12664 |
+
"loss": 5.3451,
|
12665 |
+
"step": 7232
|
12666 |
+
},
|
12667 |
+
{
|
12668 |
+
"epoch": 0.7,
|
12669 |
+
"grad_norm": 1.1046866178512573,
|
12670 |
+
"learning_rate": 1.4921465968586387e-05,
|
12671 |
+
"loss": 5.2349,
|
12672 |
+
"step": 7236
|
12673 |
+
},
|
12674 |
+
{
|
12675 |
+
"epoch": 0.7,
|
12676 |
+
"grad_norm": 1.0781642198562622,
|
12677 |
+
"learning_rate": 1.4902074849718831e-05,
|
12678 |
+
"loss": 5.2609,
|
12679 |
+
"step": 7240
|
12680 |
+
},
|
12681 |
+
{
|
12682 |
+
"epoch": 0.7,
|
12683 |
+
"grad_norm": 1.0019387006759644,
|
12684 |
+
"learning_rate": 1.4882683730851271e-05,
|
12685 |
+
"loss": 5.3954,
|
12686 |
+
"step": 7244
|
12687 |
+
},
|
12688 |
+
{
|
12689 |
+
"epoch": 0.7,
|
12690 |
+
"grad_norm": 1.1083266735076904,
|
12691 |
+
"learning_rate": 1.4863292611983712e-05,
|
12692 |
+
"loss": 5.2636,
|
12693 |
+
"step": 7248
|
12694 |
+
},
|
12695 |
+
{
|
12696 |
+
"epoch": 0.7,
|
12697 |
+
"grad_norm": 1.2309002876281738,
|
12698 |
+
"learning_rate": 1.4843901493116152e-05,
|
12699 |
+
"loss": 5.2955,
|
12700 |
+
"step": 7252
|
12701 |
+
},
|
12702 |
+
{
|
12703 |
+
"epoch": 0.7,
|
12704 |
+
"grad_norm": 1.087774634361267,
|
12705 |
+
"learning_rate": 1.4824510374248596e-05,
|
12706 |
+
"loss": 5.3,
|
12707 |
+
"step": 7256
|
12708 |
+
},
|
12709 |
+
{
|
12710 |
+
"epoch": 0.7,
|
12711 |
+
"grad_norm": 1.075287938117981,
|
12712 |
+
"learning_rate": 1.4805119255381037e-05,
|
12713 |
+
"loss": 5.3727,
|
12714 |
+
"step": 7260
|
12715 |
+
},
|
12716 |
+
{
|
12717 |
+
"epoch": 0.7,
|
12718 |
+
"grad_norm": 1.0246081352233887,
|
12719 |
+
"learning_rate": 1.4785728136513477e-05,
|
12720 |
+
"loss": 5.3558,
|
12721 |
+
"step": 7264
|
12722 |
+
},
|
12723 |
+
{
|
12724 |
+
"epoch": 0.7,
|
12725 |
+
"grad_norm": 1.124543309211731,
|
12726 |
+
"learning_rate": 1.4766337017645918e-05,
|
12727 |
+
"loss": 5.3379,
|
12728 |
+
"step": 7268
|
12729 |
+
},
|
12730 |
+
{
|
12731 |
+
"epoch": 0.71,
|
12732 |
+
"grad_norm": 1.047892689704895,
|
12733 |
+
"learning_rate": 1.474694589877836e-05,
|
12734 |
+
"loss": 5.3975,
|
12735 |
+
"step": 7272
|
12736 |
+
},
|
12737 |
+
{
|
12738 |
+
"epoch": 0.71,
|
12739 |
+
"grad_norm": 1.0381947755813599,
|
12740 |
+
"learning_rate": 1.47275547799108e-05,
|
12741 |
+
"loss": 5.3515,
|
12742 |
+
"step": 7276
|
12743 |
+
},
|
12744 |
+
{
|
12745 |
+
"epoch": 0.71,
|
12746 |
+
"grad_norm": 1.0230307579040527,
|
12747 |
+
"learning_rate": 1.4708163661043243e-05,
|
12748 |
+
"loss": 5.2925,
|
12749 |
+
"step": 7280
|
12750 |
+
},
|
12751 |
+
{
|
12752 |
+
"epoch": 0.71,
|
12753 |
+
"grad_norm": 1.0596458911895752,
|
12754 |
+
"learning_rate": 1.4688772542175685e-05,
|
12755 |
+
"loss": 5.3191,
|
12756 |
+
"step": 7284
|
12757 |
+
},
|
12758 |
+
{
|
12759 |
+
"epoch": 0.71,
|
12760 |
+
"grad_norm": 1.1031346321105957,
|
12761 |
+
"learning_rate": 1.4669381423308126e-05,
|
12762 |
+
"loss": 5.402,
|
12763 |
+
"step": 7288
|
12764 |
+
},
|
12765 |
+
{
|
12766 |
+
"epoch": 0.71,
|
12767 |
+
"grad_norm": 1.0289580821990967,
|
12768 |
+
"learning_rate": 1.4649990304440566e-05,
|
12769 |
+
"loss": 5.3957,
|
12770 |
+
"step": 7292
|
12771 |
+
},
|
12772 |
+
{
|
12773 |
+
"epoch": 0.71,
|
12774 |
+
"grad_norm": 1.1469511985778809,
|
12775 |
+
"learning_rate": 1.4630599185573007e-05,
|
12776 |
+
"loss": 5.3244,
|
12777 |
+
"step": 7296
|
12778 |
+
},
|
12779 |
+
{
|
12780 |
+
"epoch": 0.71,
|
12781 |
+
"grad_norm": 1.0669410228729248,
|
12782 |
+
"learning_rate": 1.461120806670545e-05,
|
12783 |
+
"loss": 5.431,
|
12784 |
+
"step": 7300
|
12785 |
+
},
|
12786 |
+
{
|
12787 |
+
"epoch": 0.71,
|
12788 |
+
"grad_norm": 1.05574631690979,
|
12789 |
+
"learning_rate": 1.4591816947837891e-05,
|
12790 |
+
"loss": 5.3382,
|
12791 |
+
"step": 7304
|
12792 |
+
},
|
12793 |
+
{
|
12794 |
+
"epoch": 0.71,
|
12795 |
+
"grad_norm": 1.0296452045440674,
|
12796 |
+
"learning_rate": 1.4572425828970332e-05,
|
12797 |
+
"loss": 5.337,
|
12798 |
+
"step": 7308
|
12799 |
+
},
|
12800 |
+
{
|
12801 |
+
"epoch": 0.71,
|
12802 |
+
"grad_norm": 1.0180591344833374,
|
12803 |
+
"learning_rate": 1.4553034710102772e-05,
|
12804 |
+
"loss": 5.3239,
|
12805 |
+
"step": 7312
|
12806 |
+
},
|
12807 |
+
{
|
12808 |
+
"epoch": 0.71,
|
12809 |
+
"grad_norm": 1.0508371591567993,
|
12810 |
+
"learning_rate": 1.4533643591235216e-05,
|
12811 |
+
"loss": 5.2944,
|
12812 |
+
"step": 7316
|
12813 |
+
},
|
12814 |
+
{
|
12815 |
+
"epoch": 0.71,
|
12816 |
+
"grad_norm": 1.0255225896835327,
|
12817 |
+
"learning_rate": 1.4514252472367657e-05,
|
12818 |
+
"loss": 5.3574,
|
12819 |
+
"step": 7320
|
12820 |
+
},
|
12821 |
+
{
|
12822 |
+
"epoch": 0.71,
|
12823 |
+
"grad_norm": 1.0599967241287231,
|
12824 |
+
"learning_rate": 1.4494861353500097e-05,
|
12825 |
+
"loss": 5.3568,
|
12826 |
+
"step": 7324
|
12827 |
+
},
|
12828 |
+
{
|
12829 |
+
"epoch": 0.71,
|
12830 |
+
"grad_norm": 0.9832557439804077,
|
12831 |
+
"learning_rate": 1.4475470234632538e-05,
|
12832 |
+
"loss": 5.2891,
|
12833 |
+
"step": 7328
|
12834 |
+
},
|
12835 |
+
{
|
12836 |
+
"epoch": 0.71,
|
12837 |
+
"grad_norm": 1.0541869401931763,
|
12838 |
+
"learning_rate": 1.4456079115764982e-05,
|
12839 |
+
"loss": 5.2502,
|
12840 |
+
"step": 7332
|
12841 |
+
},
|
12842 |
+
{
|
12843 |
+
"epoch": 0.71,
|
12844 |
+
"grad_norm": 1.069973111152649,
|
12845 |
+
"learning_rate": 1.4436687996897422e-05,
|
12846 |
+
"loss": 5.3376,
|
12847 |
+
"step": 7336
|
12848 |
+
},
|
12849 |
+
{
|
12850 |
+
"epoch": 0.71,
|
12851 |
+
"grad_norm": 1.0768502950668335,
|
12852 |
+
"learning_rate": 1.4417296878029863e-05,
|
12853 |
+
"loss": 5.2392,
|
12854 |
+
"step": 7340
|
12855 |
+
},
|
12856 |
+
{
|
12857 |
+
"epoch": 0.71,
|
12858 |
+
"grad_norm": 1.000628113746643,
|
12859 |
+
"learning_rate": 1.4397905759162305e-05,
|
12860 |
+
"loss": 5.3293,
|
12861 |
+
"step": 7344
|
12862 |
+
},
|
12863 |
+
{
|
12864 |
+
"epoch": 0.71,
|
12865 |
+
"grad_norm": 0.9960778951644897,
|
12866 |
+
"learning_rate": 1.4378514640294746e-05,
|
12867 |
+
"loss": 5.2655,
|
12868 |
+
"step": 7348
|
12869 |
+
},
|
12870 |
+
{
|
12871 |
+
"epoch": 0.71,
|
12872 |
+
"grad_norm": 1.0327279567718506,
|
12873 |
+
"learning_rate": 1.4359123521427186e-05,
|
12874 |
+
"loss": 5.3477,
|
12875 |
+
"step": 7352
|
12876 |
+
},
|
12877 |
+
{
|
12878 |
+
"epoch": 0.71,
|
12879 |
+
"grad_norm": 1.0809035301208496,
|
12880 |
+
"learning_rate": 1.4339732402559627e-05,
|
12881 |
+
"loss": 5.331,
|
12882 |
+
"step": 7356
|
12883 |
+
},
|
12884 |
+
{
|
12885 |
+
"epoch": 0.71,
|
12886 |
+
"grad_norm": 1.0690807104110718,
|
12887 |
+
"learning_rate": 1.432034128369207e-05,
|
12888 |
+
"loss": 5.3986,
|
12889 |
+
"step": 7360
|
12890 |
+
},
|
12891 |
+
{
|
12892 |
+
"epoch": 0.71,
|
12893 |
+
"grad_norm": 1.0245548486709595,
|
12894 |
+
"learning_rate": 1.4300950164824511e-05,
|
12895 |
+
"loss": 5.3024,
|
12896 |
+
"step": 7364
|
12897 |
+
},
|
12898 |
+
{
|
12899 |
+
"epoch": 0.71,
|
12900 |
+
"grad_norm": 0.9999493956565857,
|
12901 |
+
"learning_rate": 1.4281559045956952e-05,
|
12902 |
+
"loss": 5.2878,
|
12903 |
+
"step": 7368
|
12904 |
+
},
|
12905 |
+
{
|
12906 |
+
"epoch": 0.71,
|
12907 |
+
"grad_norm": 1.0037769079208374,
|
12908 |
+
"learning_rate": 1.4262167927089392e-05,
|
12909 |
+
"loss": 5.2216,
|
12910 |
+
"step": 7372
|
12911 |
+
},
|
12912 |
+
{
|
12913 |
+
"epoch": 0.72,
|
12914 |
+
"grad_norm": 1.002113699913025,
|
12915 |
+
"learning_rate": 1.4242776808221836e-05,
|
12916 |
+
"loss": 5.4347,
|
12917 |
+
"step": 7376
|
12918 |
+
},
|
12919 |
+
{
|
12920 |
+
"epoch": 0.72,
|
12921 |
+
"grad_norm": 1.04192054271698,
|
12922 |
+
"learning_rate": 1.4223385689354277e-05,
|
12923 |
+
"loss": 5.3812,
|
12924 |
+
"step": 7380
|
12925 |
+
},
|
12926 |
+
{
|
12927 |
+
"epoch": 0.72,
|
12928 |
+
"grad_norm": 1.0271295309066772,
|
12929 |
+
"learning_rate": 1.4203994570486717e-05,
|
12930 |
+
"loss": 5.427,
|
12931 |
+
"step": 7384
|
12932 |
+
},
|
12933 |
+
{
|
12934 |
+
"epoch": 0.72,
|
12935 |
+
"grad_norm": 1.0071477890014648,
|
12936 |
+
"learning_rate": 1.4184603451619158e-05,
|
12937 |
+
"loss": 5.2727,
|
12938 |
+
"step": 7388
|
12939 |
+
},
|
12940 |
+
{
|
12941 |
+
"epoch": 0.72,
|
12942 |
+
"grad_norm": 1.0228255987167358,
|
12943 |
+
"learning_rate": 1.4165212332751602e-05,
|
12944 |
+
"loss": 5.3244,
|
12945 |
+
"step": 7392
|
12946 |
+
},
|
12947 |
+
{
|
12948 |
+
"epoch": 0.72,
|
12949 |
+
"grad_norm": 1.0034810304641724,
|
12950 |
+
"learning_rate": 1.4145821213884042e-05,
|
12951 |
+
"loss": 5.3883,
|
12952 |
+
"step": 7396
|
12953 |
+
},
|
12954 |
+
{
|
12955 |
+
"epoch": 0.72,
|
12956 |
+
"grad_norm": 1.0305688381195068,
|
12957 |
+
"learning_rate": 1.4126430095016483e-05,
|
12958 |
+
"loss": 5.3851,
|
12959 |
+
"step": 7400
|
12960 |
+
},
|
12961 |
+
{
|
12962 |
+
"epoch": 0.72,
|
12963 |
+
"grad_norm": 1.074646234512329,
|
12964 |
+
"learning_rate": 1.4107038976148925e-05,
|
12965 |
+
"loss": 5.2406,
|
12966 |
+
"step": 7404
|
12967 |
+
},
|
12968 |
+
{
|
12969 |
+
"epoch": 0.72,
|
12970 |
+
"grad_norm": 0.9961770176887512,
|
12971 |
+
"learning_rate": 1.4087647857281366e-05,
|
12972 |
+
"loss": 5.2848,
|
12973 |
+
"step": 7408
|
12974 |
+
},
|
12975 |
+
{
|
12976 |
+
"epoch": 0.72,
|
12977 |
+
"grad_norm": 1.034627079963684,
|
12978 |
+
"learning_rate": 1.4068256738413808e-05,
|
12979 |
+
"loss": 5.2984,
|
12980 |
+
"step": 7412
|
12981 |
+
},
|
12982 |
+
{
|
12983 |
+
"epoch": 0.72,
|
12984 |
+
"grad_norm": 1.072096824645996,
|
12985 |
+
"learning_rate": 1.4048865619546248e-05,
|
12986 |
+
"loss": 5.3182,
|
12987 |
+
"step": 7416
|
12988 |
+
},
|
12989 |
+
{
|
12990 |
+
"epoch": 0.72,
|
12991 |
+
"grad_norm": 1.0899096727371216,
|
12992 |
+
"learning_rate": 1.402947450067869e-05,
|
12993 |
+
"loss": 5.3017,
|
12994 |
+
"step": 7420
|
12995 |
+
},
|
12996 |
+
{
|
12997 |
+
"epoch": 0.72,
|
12998 |
+
"grad_norm": 1.1063376665115356,
|
12999 |
+
"learning_rate": 1.4010083381811131e-05,
|
13000 |
+
"loss": 5.311,
|
13001 |
+
"step": 7424
|
13002 |
+
},
|
13003 |
+
{
|
13004 |
+
"epoch": 0.72,
|
13005 |
+
"grad_norm": 1.0748202800750732,
|
13006 |
+
"learning_rate": 1.3990692262943572e-05,
|
13007 |
+
"loss": 5.318,
|
13008 |
+
"step": 7428
|
13009 |
+
},
|
13010 |
+
{
|
13011 |
+
"epoch": 0.72,
|
13012 |
+
"grad_norm": 1.0851057767868042,
|
13013 |
+
"learning_rate": 1.3971301144076012e-05,
|
13014 |
+
"loss": 5.3425,
|
13015 |
+
"step": 7432
|
13016 |
+
},
|
13017 |
+
{
|
13018 |
+
"epoch": 0.72,
|
13019 |
+
"grad_norm": 0.9942495822906494,
|
13020 |
+
"learning_rate": 1.3951910025208456e-05,
|
13021 |
+
"loss": 5.3258,
|
13022 |
+
"step": 7436
|
13023 |
+
},
|
13024 |
+
{
|
13025 |
+
"epoch": 0.72,
|
13026 |
+
"grad_norm": 1.0894228219985962,
|
13027 |
+
"learning_rate": 1.3932518906340897e-05,
|
13028 |
+
"loss": 5.2769,
|
13029 |
+
"step": 7440
|
13030 |
+
},
|
13031 |
+
{
|
13032 |
+
"epoch": 0.72,
|
13033 |
+
"grad_norm": 0.9603523015975952,
|
13034 |
+
"learning_rate": 1.3913127787473337e-05,
|
13035 |
+
"loss": 5.4087,
|
13036 |
+
"step": 7444
|
13037 |
+
},
|
13038 |
+
{
|
13039 |
+
"epoch": 0.72,
|
13040 |
+
"grad_norm": 1.1049365997314453,
|
13041 |
+
"learning_rate": 1.3893736668605778e-05,
|
13042 |
+
"loss": 5.339,
|
13043 |
+
"step": 7448
|
13044 |
+
},
|
13045 |
+
{
|
13046 |
+
"epoch": 0.72,
|
13047 |
+
"grad_norm": 1.021468162536621,
|
13048 |
+
"learning_rate": 1.3874345549738222e-05,
|
13049 |
+
"loss": 5.3447,
|
13050 |
+
"step": 7452
|
13051 |
+
},
|
13052 |
+
{
|
13053 |
+
"epoch": 0.72,
|
13054 |
+
"grad_norm": 1.088437557220459,
|
13055 |
+
"learning_rate": 1.3854954430870662e-05,
|
13056 |
+
"loss": 5.3546,
|
13057 |
+
"step": 7456
|
13058 |
+
},
|
13059 |
+
{
|
13060 |
+
"epoch": 0.72,
|
13061 |
+
"grad_norm": 1.0266107320785522,
|
13062 |
+
"learning_rate": 1.3835563312003103e-05,
|
13063 |
+
"loss": 5.1867,
|
13064 |
+
"step": 7460
|
13065 |
+
},
|
13066 |
+
{
|
13067 |
+
"epoch": 0.72,
|
13068 |
+
"grad_norm": 1.088911533355713,
|
13069 |
+
"learning_rate": 1.3816172193135547e-05,
|
13070 |
+
"loss": 5.3247,
|
13071 |
+
"step": 7464
|
13072 |
+
},
|
13073 |
+
{
|
13074 |
+
"epoch": 0.72,
|
13075 |
+
"grad_norm": 1.0675928592681885,
|
13076 |
+
"learning_rate": 1.3796781074267987e-05,
|
13077 |
+
"loss": 5.2807,
|
13078 |
+
"step": 7468
|
13079 |
+
},
|
13080 |
+
{
|
13081 |
+
"epoch": 0.72,
|
13082 |
+
"grad_norm": 0.9511438608169556,
|
13083 |
+
"learning_rate": 1.3777389955400428e-05,
|
13084 |
+
"loss": 5.3774,
|
13085 |
+
"step": 7472
|
13086 |
+
},
|
13087 |
+
{
|
13088 |
+
"epoch": 0.72,
|
13089 |
+
"grad_norm": 1.0546114444732666,
|
13090 |
+
"learning_rate": 1.3757998836532868e-05,
|
13091 |
+
"loss": 5.2856,
|
13092 |
+
"step": 7476
|
13093 |
+
},
|
13094 |
+
{
|
13095 |
+
"epoch": 0.73,
|
13096 |
+
"grad_norm": 1.0445231199264526,
|
13097 |
+
"learning_rate": 1.373860771766531e-05,
|
13098 |
+
"loss": 5.3805,
|
13099 |
+
"step": 7480
|
13100 |
+
},
|
13101 |
+
{
|
13102 |
+
"epoch": 0.73,
|
13103 |
+
"grad_norm": 1.0493693351745605,
|
13104 |
+
"learning_rate": 1.3719216598797751e-05,
|
13105 |
+
"loss": 5.3603,
|
13106 |
+
"step": 7484
|
13107 |
+
},
|
13108 |
+
{
|
13109 |
+
"epoch": 0.73,
|
13110 |
+
"grad_norm": 1.0047773122787476,
|
13111 |
+
"learning_rate": 1.3699825479930192e-05,
|
13112 |
+
"loss": 5.3224,
|
13113 |
+
"step": 7488
|
13114 |
+
},
|
13115 |
+
{
|
13116 |
+
"epoch": 0.73,
|
13117 |
+
"grad_norm": 1.0536508560180664,
|
13118 |
+
"learning_rate": 1.3680434361062632e-05,
|
13119 |
+
"loss": 5.4235,
|
13120 |
+
"step": 7492
|
13121 |
+
},
|
13122 |
+
{
|
13123 |
+
"epoch": 0.73,
|
13124 |
+
"grad_norm": 1.0347819328308105,
|
13125 |
+
"learning_rate": 1.3661043242195076e-05,
|
13126 |
+
"loss": 5.2528,
|
13127 |
+
"step": 7496
|
13128 |
+
},
|
13129 |
+
{
|
13130 |
+
"epoch": 0.73,
|
13131 |
+
"grad_norm": 1.0392394065856934,
|
13132 |
+
"learning_rate": 1.3641652123327517e-05,
|
13133 |
+
"loss": 5.4127,
|
13134 |
+
"step": 7500
|
13135 |
+
},
|
13136 |
+
{
|
13137 |
+
"epoch": 0.73,
|
13138 |
+
"grad_norm": 1.0930792093276978,
|
13139 |
+
"learning_rate": 1.3622261004459957e-05,
|
13140 |
+
"loss": 5.3221,
|
13141 |
+
"step": 7504
|
13142 |
+
},
|
13143 |
+
{
|
13144 |
+
"epoch": 0.73,
|
13145 |
+
"grad_norm": 0.9962918758392334,
|
13146 |
+
"learning_rate": 1.3602869885592398e-05,
|
13147 |
+
"loss": 5.3108,
|
13148 |
+
"step": 7508
|
13149 |
+
},
|
13150 |
+
{
|
13151 |
+
"epoch": 0.73,
|
13152 |
+
"grad_norm": 1.030331015586853,
|
13153 |
+
"learning_rate": 1.3583478766724842e-05,
|
13154 |
+
"loss": 5.2632,
|
13155 |
+
"step": 7512
|
13156 |
+
},
|
13157 |
+
{
|
13158 |
+
"epoch": 0.73,
|
13159 |
+
"grad_norm": 1.008636236190796,
|
13160 |
+
"learning_rate": 1.3564087647857282e-05,
|
13161 |
+
"loss": 5.3089,
|
13162 |
+
"step": 7516
|
13163 |
+
},
|
13164 |
+
{
|
13165 |
+
"epoch": 0.73,
|
13166 |
+
"grad_norm": 1.006934642791748,
|
13167 |
+
"learning_rate": 1.3544696528989723e-05,
|
13168 |
+
"loss": 5.2903,
|
13169 |
+
"step": 7520
|
13170 |
+
},
|
13171 |
+
{
|
13172 |
+
"epoch": 0.73,
|
13173 |
+
"grad_norm": 1.0194462537765503,
|
13174 |
+
"learning_rate": 1.3525305410122167e-05,
|
13175 |
+
"loss": 5.3827,
|
13176 |
+
"step": 7524
|
13177 |
+
},
|
13178 |
+
{
|
13179 |
+
"epoch": 0.73,
|
13180 |
+
"grad_norm": 0.9879323840141296,
|
13181 |
+
"learning_rate": 1.3505914291254607e-05,
|
13182 |
+
"loss": 5.4307,
|
13183 |
+
"step": 7528
|
13184 |
+
},
|
13185 |
+
{
|
13186 |
+
"epoch": 0.73,
|
13187 |
+
"grad_norm": 1.0651185512542725,
|
13188 |
+
"learning_rate": 1.3486523172387048e-05,
|
13189 |
+
"loss": 5.3134,
|
13190 |
+
"step": 7532
|
13191 |
+
},
|
13192 |
+
{
|
13193 |
+
"epoch": 0.73,
|
13194 |
+
"grad_norm": 1.0858125686645508,
|
13195 |
+
"learning_rate": 1.3467132053519488e-05,
|
13196 |
+
"loss": 5.3024,
|
13197 |
+
"step": 7536
|
13198 |
+
},
|
13199 |
+
{
|
13200 |
+
"epoch": 0.73,
|
13201 |
+
"grad_norm": 1.0240780115127563,
|
13202 |
+
"learning_rate": 1.344774093465193e-05,
|
13203 |
+
"loss": 5.3555,
|
13204 |
+
"step": 7540
|
13205 |
+
},
|
13206 |
+
{
|
13207 |
+
"epoch": 0.73,
|
13208 |
+
"grad_norm": 1.0629985332489014,
|
13209 |
+
"learning_rate": 1.3428349815784373e-05,
|
13210 |
+
"loss": 5.3652,
|
13211 |
+
"step": 7544
|
13212 |
+
},
|
13213 |
+
{
|
13214 |
+
"epoch": 0.73,
|
13215 |
+
"grad_norm": 1.1222716569900513,
|
13216 |
+
"learning_rate": 1.3408958696916813e-05,
|
13217 |
+
"loss": 5.3845,
|
13218 |
+
"step": 7548
|
13219 |
+
},
|
13220 |
+
{
|
13221 |
+
"epoch": 0.73,
|
13222 |
+
"grad_norm": 1.0008291006088257,
|
13223 |
+
"learning_rate": 1.3389567578049254e-05,
|
13224 |
+
"loss": 5.3552,
|
13225 |
+
"step": 7552
|
13226 |
+
},
|
13227 |
+
{
|
13228 |
+
"epoch": 0.73,
|
13229 |
+
"grad_norm": 1.0860753059387207,
|
13230 |
+
"learning_rate": 1.3370176459181696e-05,
|
13231 |
+
"loss": 5.3044,
|
13232 |
+
"step": 7556
|
13233 |
+
},
|
13234 |
+
{
|
13235 |
+
"epoch": 0.73,
|
13236 |
+
"grad_norm": 1.0495448112487793,
|
13237 |
+
"learning_rate": 1.3350785340314136e-05,
|
13238 |
+
"loss": 5.3259,
|
13239 |
+
"step": 7560
|
13240 |
+
},
|
13241 |
+
{
|
13242 |
+
"epoch": 0.73,
|
13243 |
+
"grad_norm": 1.0432664155960083,
|
13244 |
+
"learning_rate": 1.3331394221446577e-05,
|
13245 |
+
"loss": 5.3482,
|
13246 |
+
"step": 7564
|
13247 |
+
},
|
13248 |
+
{
|
13249 |
+
"epoch": 0.73,
|
13250 |
+
"grad_norm": 1.0440526008605957,
|
13251 |
+
"learning_rate": 1.3312003102579018e-05,
|
13252 |
+
"loss": 5.2362,
|
13253 |
+
"step": 7568
|
13254 |
+
},
|
13255 |
+
{
|
13256 |
+
"epoch": 0.73,
|
13257 |
+
"grad_norm": 1.0317118167877197,
|
13258 |
+
"learning_rate": 1.3292611983711461e-05,
|
13259 |
+
"loss": 5.3579,
|
13260 |
+
"step": 7572
|
13261 |
+
},
|
13262 |
+
{
|
13263 |
+
"epoch": 0.73,
|
13264 |
+
"grad_norm": 0.9613714218139648,
|
13265 |
+
"learning_rate": 1.3273220864843902e-05,
|
13266 |
+
"loss": 5.3631,
|
13267 |
+
"step": 7576
|
13268 |
+
},
|
13269 |
+
{
|
13270 |
+
"epoch": 0.73,
|
13271 |
+
"grad_norm": 1.1608860492706299,
|
13272 |
+
"learning_rate": 1.3253829745976343e-05,
|
13273 |
+
"loss": 5.3547,
|
13274 |
+
"step": 7580
|
13275 |
+
},
|
13276 |
+
{
|
13277 |
+
"epoch": 0.74,
|
13278 |
+
"grad_norm": 1.06599760055542,
|
13279 |
+
"learning_rate": 1.3234438627108786e-05,
|
13280 |
+
"loss": 5.3728,
|
13281 |
+
"step": 7584
|
13282 |
+
},
|
13283 |
+
{
|
13284 |
+
"epoch": 0.74,
|
13285 |
+
"grad_norm": 1.0115044116973877,
|
13286 |
+
"learning_rate": 1.3215047508241227e-05,
|
13287 |
+
"loss": 5.2954,
|
13288 |
+
"step": 7588
|
13289 |
+
},
|
13290 |
+
{
|
13291 |
+
"epoch": 0.74,
|
13292 |
+
"grad_norm": 1.0504167079925537,
|
13293 |
+
"learning_rate": 1.3195656389373668e-05,
|
13294 |
+
"loss": 5.3378,
|
13295 |
+
"step": 7592
|
13296 |
+
},
|
13297 |
+
{
|
13298 |
+
"epoch": 0.74,
|
13299 |
+
"grad_norm": 1.1047917604446411,
|
13300 |
+
"learning_rate": 1.3176265270506108e-05,
|
13301 |
+
"loss": 5.3153,
|
13302 |
+
"step": 7596
|
13303 |
+
},
|
13304 |
+
{
|
13305 |
+
"epoch": 0.74,
|
13306 |
+
"grad_norm": 1.0862175226211548,
|
13307 |
+
"learning_rate": 1.3156874151638552e-05,
|
13308 |
+
"loss": 5.4315,
|
13309 |
+
"step": 7600
|
13310 |
+
},
|
13311 |
+
{
|
13312 |
+
"epoch": 0.74,
|
13313 |
+
"grad_norm": 1.05397629737854,
|
13314 |
+
"learning_rate": 1.3137483032770993e-05,
|
13315 |
+
"loss": 5.3807,
|
13316 |
+
"step": 7604
|
13317 |
+
},
|
13318 |
+
{
|
13319 |
+
"epoch": 0.74,
|
13320 |
+
"grad_norm": 1.0143108367919922,
|
13321 |
+
"learning_rate": 1.3118091913903433e-05,
|
13322 |
+
"loss": 5.346,
|
13323 |
+
"step": 7608
|
13324 |
+
},
|
13325 |
+
{
|
13326 |
+
"epoch": 0.74,
|
13327 |
+
"grad_norm": 1.0487464666366577,
|
13328 |
+
"learning_rate": 1.3098700795035874e-05,
|
13329 |
+
"loss": 5.2966,
|
13330 |
+
"step": 7612
|
13331 |
+
},
|
13332 |
+
{
|
13333 |
+
"epoch": 0.74,
|
13334 |
+
"grad_norm": 1.156467318534851,
|
13335 |
+
"learning_rate": 1.3079309676168316e-05,
|
13336 |
+
"loss": 5.24,
|
13337 |
+
"step": 7616
|
13338 |
+
},
|
13339 |
+
{
|
13340 |
+
"epoch": 0.74,
|
13341 |
+
"grad_norm": 1.0586912631988525,
|
13342 |
+
"learning_rate": 1.3059918557300756e-05,
|
13343 |
+
"loss": 5.3162,
|
13344 |
+
"step": 7620
|
13345 |
+
},
|
13346 |
+
{
|
13347 |
+
"epoch": 0.74,
|
13348 |
+
"grad_norm": 1.0472930669784546,
|
13349 |
+
"learning_rate": 1.3040527438433197e-05,
|
13350 |
+
"loss": 5.4309,
|
13351 |
+
"step": 7624
|
13352 |
+
},
|
13353 |
+
{
|
13354 |
+
"epoch": 0.74,
|
13355 |
+
"grad_norm": 1.0239797830581665,
|
13356 |
+
"learning_rate": 1.3021136319565637e-05,
|
13357 |
+
"loss": 5.3124,
|
13358 |
+
"step": 7628
|
13359 |
+
},
|
13360 |
+
{
|
13361 |
+
"epoch": 0.74,
|
13362 |
+
"grad_norm": 1.084915041923523,
|
13363 |
+
"learning_rate": 1.3001745200698081e-05,
|
13364 |
+
"loss": 5.382,
|
13365 |
+
"step": 7632
|
13366 |
+
},
|
13367 |
+
{
|
13368 |
+
"epoch": 0.74,
|
13369 |
+
"grad_norm": 1.121639370918274,
|
13370 |
+
"learning_rate": 1.2982354081830522e-05,
|
13371 |
+
"loss": 5.2888,
|
13372 |
+
"step": 7636
|
13373 |
+
},
|
13374 |
+
{
|
13375 |
+
"epoch": 0.74,
|
13376 |
+
"grad_norm": 1.06790030002594,
|
13377 |
+
"learning_rate": 1.2962962962962962e-05,
|
13378 |
+
"loss": 5.3484,
|
13379 |
+
"step": 7640
|
13380 |
+
},
|
13381 |
+
{
|
13382 |
+
"epoch": 0.74,
|
13383 |
+
"grad_norm": 1.0664012432098389,
|
13384 |
+
"learning_rate": 1.2943571844095406e-05,
|
13385 |
+
"loss": 5.3624,
|
13386 |
+
"step": 7644
|
13387 |
+
},
|
13388 |
+
{
|
13389 |
+
"epoch": 0.74,
|
13390 |
+
"grad_norm": 1.032593846321106,
|
13391 |
+
"learning_rate": 1.2924180725227847e-05,
|
13392 |
+
"loss": 5.3109,
|
13393 |
+
"step": 7648
|
13394 |
+
},
|
13395 |
+
{
|
13396 |
+
"epoch": 0.74,
|
13397 |
+
"grad_norm": 1.0182029008865356,
|
13398 |
+
"learning_rate": 1.2904789606360287e-05,
|
13399 |
+
"loss": 5.3785,
|
13400 |
+
"step": 7652
|
13401 |
+
},
|
13402 |
+
{
|
13403 |
+
"epoch": 0.74,
|
13404 |
+
"grad_norm": 0.9787065982818604,
|
13405 |
+
"learning_rate": 1.2885398487492728e-05,
|
13406 |
+
"loss": 5.2586,
|
13407 |
+
"step": 7656
|
13408 |
+
},
|
13409 |
+
{
|
13410 |
+
"epoch": 0.74,
|
13411 |
+
"grad_norm": 1.14923095703125,
|
13412 |
+
"learning_rate": 1.2866007368625172e-05,
|
13413 |
+
"loss": 5.3378,
|
13414 |
+
"step": 7660
|
13415 |
+
},
|
13416 |
+
{
|
13417 |
+
"epoch": 0.74,
|
13418 |
+
"grad_norm": 1.0064685344696045,
|
13419 |
+
"learning_rate": 1.2846616249757612e-05,
|
13420 |
+
"loss": 5.3339,
|
13421 |
+
"step": 7664
|
13422 |
+
},
|
13423 |
+
{
|
13424 |
+
"epoch": 0.74,
|
13425 |
+
"grad_norm": 1.015594720840454,
|
13426 |
+
"learning_rate": 1.2827225130890053e-05,
|
13427 |
+
"loss": 5.2412,
|
13428 |
+
"step": 7668
|
13429 |
+
},
|
13430 |
+
{
|
13431 |
+
"epoch": 0.74,
|
13432 |
+
"grad_norm": 1.1527953147888184,
|
13433 |
+
"learning_rate": 1.2807834012022494e-05,
|
13434 |
+
"loss": 5.4026,
|
13435 |
+
"step": 7672
|
13436 |
+
},
|
13437 |
+
{
|
13438 |
+
"epoch": 0.74,
|
13439 |
+
"grad_norm": 1.024170994758606,
|
13440 |
+
"learning_rate": 1.2788442893154936e-05,
|
13441 |
+
"loss": 5.2431,
|
13442 |
+
"step": 7676
|
13443 |
+
},
|
13444 |
+
{
|
13445 |
+
"epoch": 0.74,
|
13446 |
+
"grad_norm": 1.0834672451019287,
|
13447 |
+
"learning_rate": 1.2769051774287378e-05,
|
13448 |
+
"loss": 5.3018,
|
13449 |
+
"step": 7680
|
13450 |
+
},
|
13451 |
+
{
|
13452 |
+
"epoch": 0.75,
|
13453 |
+
"grad_norm": 1.00787353515625,
|
13454 |
+
"learning_rate": 1.2749660655419819e-05,
|
13455 |
+
"loss": 5.3129,
|
13456 |
+
"step": 7684
|
13457 |
+
},
|
13458 |
+
{
|
13459 |
+
"epoch": 0.75,
|
13460 |
+
"grad_norm": 1.0804412364959717,
|
13461 |
+
"learning_rate": 1.2730269536552259e-05,
|
13462 |
+
"loss": 5.3974,
|
13463 |
+
"step": 7688
|
13464 |
+
},
|
13465 |
+
{
|
13466 |
+
"epoch": 0.75,
|
13467 |
+
"grad_norm": 1.0361813306808472,
|
13468 |
+
"learning_rate": 1.2710878417684701e-05,
|
13469 |
+
"loss": 5.291,
|
13470 |
+
"step": 7692
|
13471 |
+
},
|
13472 |
+
{
|
13473 |
+
"epoch": 0.75,
|
13474 |
+
"grad_norm": 0.9639879465103149,
|
13475 |
+
"learning_rate": 1.2691487298817142e-05,
|
13476 |
+
"loss": 5.3649,
|
13477 |
+
"step": 7696
|
13478 |
+
},
|
13479 |
+
{
|
13480 |
+
"epoch": 0.75,
|
13481 |
+
"grad_norm": 1.035768985748291,
|
13482 |
+
"learning_rate": 1.2672096179949582e-05,
|
13483 |
+
"loss": 5.3159,
|
13484 |
+
"step": 7700
|
13485 |
+
},
|
13486 |
+
{
|
13487 |
+
"epoch": 0.75,
|
13488 |
+
"grad_norm": 1.002968430519104,
|
13489 |
+
"learning_rate": 1.2652705061082026e-05,
|
13490 |
+
"loss": 5.1975,
|
13491 |
+
"step": 7704
|
13492 |
+
},
|
13493 |
+
{
|
13494 |
+
"epoch": 0.75,
|
13495 |
+
"grad_norm": 1.0666626691818237,
|
13496 |
+
"learning_rate": 1.2633313942214467e-05,
|
13497 |
+
"loss": 5.2216,
|
13498 |
+
"step": 7708
|
13499 |
+
},
|
13500 |
+
{
|
13501 |
+
"epoch": 0.75,
|
13502 |
+
"grad_norm": 1.031752347946167,
|
13503 |
+
"learning_rate": 1.2613922823346907e-05,
|
13504 |
+
"loss": 5.4174,
|
13505 |
+
"step": 7712
|
13506 |
+
},
|
13507 |
+
{
|
13508 |
+
"epoch": 0.75,
|
13509 |
+
"grad_norm": 1.0436227321624756,
|
13510 |
+
"learning_rate": 1.2594531704479348e-05,
|
13511 |
+
"loss": 5.2295,
|
13512 |
+
"step": 7716
|
13513 |
+
},
|
13514 |
+
{
|
13515 |
+
"epoch": 0.75,
|
13516 |
+
"grad_norm": 1.0823688507080078,
|
13517 |
+
"learning_rate": 1.2575140585611792e-05,
|
13518 |
+
"loss": 5.3274,
|
13519 |
+
"step": 7720
|
13520 |
+
},
|
13521 |
+
{
|
13522 |
+
"epoch": 0.75,
|
13523 |
+
"grad_norm": 1.0667200088500977,
|
13524 |
+
"learning_rate": 1.2555749466744232e-05,
|
13525 |
+
"loss": 5.3149,
|
13526 |
+
"step": 7724
|
13527 |
+
},
|
13528 |
+
{
|
13529 |
+
"epoch": 0.75,
|
13530 |
+
"grad_norm": 1.0479573011398315,
|
13531 |
+
"learning_rate": 1.2536358347876673e-05,
|
13532 |
+
"loss": 5.2653,
|
13533 |
+
"step": 7728
|
13534 |
+
},
|
13535 |
+
{
|
13536 |
+
"epoch": 0.75,
|
13537 |
+
"grad_norm": 1.1973553895950317,
|
13538 |
+
"learning_rate": 1.2516967229009113e-05,
|
13539 |
+
"loss": 5.2504,
|
13540 |
+
"step": 7732
|
13541 |
+
},
|
13542 |
+
{
|
13543 |
+
"epoch": 0.75,
|
13544 |
+
"grad_norm": 1.0791184902191162,
|
13545 |
+
"learning_rate": 1.2497576110141556e-05,
|
13546 |
+
"loss": 5.2797,
|
13547 |
+
"step": 7736
|
13548 |
+
},
|
13549 |
+
{
|
13550 |
+
"epoch": 0.75,
|
13551 |
+
"grad_norm": 1.091112732887268,
|
13552 |
+
"learning_rate": 1.2478184991273998e-05,
|
13553 |
+
"loss": 5.3129,
|
13554 |
+
"step": 7740
|
13555 |
+
},
|
13556 |
+
{
|
13557 |
+
"epoch": 0.75,
|
13558 |
+
"grad_norm": 1.0607527494430542,
|
13559 |
+
"learning_rate": 1.2458793872406438e-05,
|
13560 |
+
"loss": 5.2997,
|
13561 |
+
"step": 7744
|
13562 |
+
},
|
13563 |
+
{
|
13564 |
+
"epoch": 0.75,
|
13565 |
+
"grad_norm": 1.0152305364608765,
|
13566 |
+
"learning_rate": 1.243940275353888e-05,
|
13567 |
+
"loss": 5.3655,
|
13568 |
+
"step": 7748
|
13569 |
+
},
|
13570 |
+
{
|
13571 |
+
"epoch": 0.75,
|
13572 |
+
"grad_norm": 1.009064793586731,
|
13573 |
+
"learning_rate": 1.2420011634671321e-05,
|
13574 |
+
"loss": 5.3058,
|
13575 |
+
"step": 7752
|
13576 |
+
},
|
13577 |
+
{
|
13578 |
+
"epoch": 0.75,
|
13579 |
+
"grad_norm": 1.0338549613952637,
|
13580 |
+
"learning_rate": 1.2400620515803762e-05,
|
13581 |
+
"loss": 5.2527,
|
13582 |
+
"step": 7756
|
13583 |
+
},
|
13584 |
+
{
|
13585 |
+
"epoch": 0.75,
|
13586 |
+
"grad_norm": 1.0737717151641846,
|
13587 |
+
"learning_rate": 1.2381229396936204e-05,
|
13588 |
+
"loss": 5.3859,
|
13589 |
+
"step": 7760
|
13590 |
+
},
|
13591 |
+
{
|
13592 |
+
"epoch": 0.75,
|
13593 |
+
"grad_norm": 1.0524028539657593,
|
13594 |
+
"learning_rate": 1.2361838278068645e-05,
|
13595 |
+
"loss": 5.407,
|
13596 |
+
"step": 7764
|
13597 |
+
},
|
13598 |
+
{
|
13599 |
+
"epoch": 0.75,
|
13600 |
+
"grad_norm": 1.0182230472564697,
|
13601 |
+
"learning_rate": 1.2342447159201087e-05,
|
13602 |
+
"loss": 5.3243,
|
13603 |
+
"step": 7768
|
13604 |
+
},
|
13605 |
+
{
|
13606 |
+
"epoch": 0.75,
|
13607 |
+
"grad_norm": 1.042325496673584,
|
13608 |
+
"learning_rate": 1.2323056040333527e-05,
|
13609 |
+
"loss": 5.372,
|
13610 |
+
"step": 7772
|
13611 |
+
},
|
13612 |
+
{
|
13613 |
+
"epoch": 0.75,
|
13614 |
+
"grad_norm": 1.099138855934143,
|
13615 |
+
"learning_rate": 1.230366492146597e-05,
|
13616 |
+
"loss": 5.3577,
|
13617 |
+
"step": 7776
|
13618 |
+
},
|
13619 |
+
{
|
13620 |
+
"epoch": 0.75,
|
13621 |
+
"grad_norm": 1.007309913635254,
|
13622 |
+
"learning_rate": 1.228427380259841e-05,
|
13623 |
+
"loss": 5.252,
|
13624 |
+
"step": 7780
|
13625 |
+
},
|
13626 |
+
{
|
13627 |
+
"epoch": 0.75,
|
13628 |
+
"grad_norm": 1.1510696411132812,
|
13629 |
+
"learning_rate": 1.2264882683730852e-05,
|
13630 |
+
"loss": 5.3726,
|
13631 |
+
"step": 7784
|
13632 |
+
},
|
13633 |
+
{
|
13634 |
+
"epoch": 0.76,
|
13635 |
+
"grad_norm": 1.0110242366790771,
|
13636 |
+
"learning_rate": 1.2245491564863293e-05,
|
13637 |
+
"loss": 5.3932,
|
13638 |
+
"step": 7788
|
13639 |
+
},
|
13640 |
+
{
|
13641 |
+
"epoch": 0.76,
|
13642 |
+
"grad_norm": 1.0620808601379395,
|
13643 |
+
"learning_rate": 1.2226100445995735e-05,
|
13644 |
+
"loss": 5.2691,
|
13645 |
+
"step": 7792
|
13646 |
+
},
|
13647 |
+
{
|
13648 |
+
"epoch": 0.76,
|
13649 |
+
"grad_norm": 1.052786946296692,
|
13650 |
+
"learning_rate": 1.2206709327128176e-05,
|
13651 |
+
"loss": 5.3597,
|
13652 |
+
"step": 7796
|
13653 |
+
},
|
13654 |
+
{
|
13655 |
+
"epoch": 0.76,
|
13656 |
+
"grad_norm": 1.1259844303131104,
|
13657 |
+
"learning_rate": 1.2187318208260618e-05,
|
13658 |
+
"loss": 5.3324,
|
13659 |
+
"step": 7800
|
13660 |
+
},
|
13661 |
+
{
|
13662 |
+
"epoch": 0.76,
|
13663 |
+
"grad_norm": 1.0244724750518799,
|
13664 |
+
"learning_rate": 1.2167927089393058e-05,
|
13665 |
+
"loss": 5.3643,
|
13666 |
+
"step": 7804
|
13667 |
+
},
|
13668 |
+
{
|
13669 |
+
"epoch": 0.76,
|
13670 |
+
"grad_norm": 1.1225011348724365,
|
13671 |
+
"learning_rate": 1.21485359705255e-05,
|
13672 |
+
"loss": 5.3501,
|
13673 |
+
"step": 7808
|
13674 |
+
},
|
13675 |
+
{
|
13676 |
+
"epoch": 0.76,
|
13677 |
+
"grad_norm": 0.9966182112693787,
|
13678 |
+
"learning_rate": 1.2129144851657941e-05,
|
13679 |
+
"loss": 5.3966,
|
13680 |
+
"step": 7812
|
13681 |
+
},
|
13682 |
+
{
|
13683 |
+
"epoch": 0.76,
|
13684 |
+
"grad_norm": 1.102308988571167,
|
13685 |
+
"learning_rate": 1.2109753732790383e-05,
|
13686 |
+
"loss": 5.2625,
|
13687 |
+
"step": 7816
|
13688 |
+
},
|
13689 |
+
{
|
13690 |
+
"epoch": 0.76,
|
13691 |
+
"grad_norm": 1.0900803804397583,
|
13692 |
+
"learning_rate": 1.2095210393639714e-05,
|
13693 |
+
"loss": 5.365,
|
13694 |
+
"step": 7820
|
13695 |
+
},
|
13696 |
+
{
|
13697 |
+
"epoch": 0.76,
|
13698 |
+
"grad_norm": 1.1339807510375977,
|
13699 |
+
"learning_rate": 1.2075819274772154e-05,
|
13700 |
+
"loss": 5.3816,
|
13701 |
+
"step": 7824
|
13702 |
+
},
|
13703 |
+
{
|
13704 |
+
"epoch": 0.76,
|
13705 |
+
"grad_norm": 1.077379822731018,
|
13706 |
+
"learning_rate": 1.2056428155904597e-05,
|
13707 |
+
"loss": 5.1332,
|
13708 |
+
"step": 7828
|
13709 |
+
},
|
13710 |
+
{
|
13711 |
+
"epoch": 0.76,
|
13712 |
+
"grad_norm": 1.1201417446136475,
|
13713 |
+
"learning_rate": 1.2037037037037037e-05,
|
13714 |
+
"loss": 5.3037,
|
13715 |
+
"step": 7832
|
13716 |
+
},
|
13717 |
+
{
|
13718 |
+
"epoch": 0.76,
|
13719 |
+
"grad_norm": 1.1047320365905762,
|
13720 |
+
"learning_rate": 1.201764591816948e-05,
|
13721 |
+
"loss": 5.4453,
|
13722 |
+
"step": 7836
|
13723 |
+
},
|
13724 |
+
{
|
13725 |
+
"epoch": 0.76,
|
13726 |
+
"grad_norm": 1.0625344514846802,
|
13727 |
+
"learning_rate": 1.1998254799301922e-05,
|
13728 |
+
"loss": 5.2676,
|
13729 |
+
"step": 7840
|
13730 |
+
},
|
13731 |
+
{
|
13732 |
+
"epoch": 0.76,
|
13733 |
+
"grad_norm": 1.0774505138397217,
|
13734 |
+
"learning_rate": 1.1978863680434362e-05,
|
13735 |
+
"loss": 5.3222,
|
13736 |
+
"step": 7844
|
13737 |
+
},
|
13738 |
+
{
|
13739 |
+
"epoch": 0.76,
|
13740 |
+
"grad_norm": 1.0557003021240234,
|
13741 |
+
"learning_rate": 1.1959472561566804e-05,
|
13742 |
+
"loss": 5.3235,
|
13743 |
+
"step": 7848
|
13744 |
+
},
|
13745 |
+
{
|
13746 |
+
"epoch": 0.76,
|
13747 |
+
"grad_norm": 0.9856312274932861,
|
13748 |
+
"learning_rate": 1.1940081442699245e-05,
|
13749 |
+
"loss": 5.2947,
|
13750 |
+
"step": 7852
|
13751 |
+
},
|
13752 |
+
{
|
13753 |
+
"epoch": 0.76,
|
13754 |
+
"grad_norm": 1.0185786485671997,
|
13755 |
+
"learning_rate": 1.1920690323831685e-05,
|
13756 |
+
"loss": 5.3262,
|
13757 |
+
"step": 7856
|
13758 |
+
},
|
13759 |
+
{
|
13760 |
+
"epoch": 0.76,
|
13761 |
+
"grad_norm": 1.0777360200881958,
|
13762 |
+
"learning_rate": 1.1901299204964126e-05,
|
13763 |
+
"loss": 5.254,
|
13764 |
+
"step": 7860
|
13765 |
+
},
|
13766 |
+
{
|
13767 |
+
"epoch": 0.76,
|
13768 |
+
"grad_norm": 1.0238205194473267,
|
13769 |
+
"learning_rate": 1.1881908086096568e-05,
|
13770 |
+
"loss": 5.356,
|
13771 |
+
"step": 7864
|
13772 |
+
},
|
13773 |
+
{
|
13774 |
+
"epoch": 0.76,
|
13775 |
+
"grad_norm": 1.0025471448898315,
|
13776 |
+
"learning_rate": 1.1862516967229009e-05,
|
13777 |
+
"loss": 5.2541,
|
13778 |
+
"step": 7868
|
13779 |
+
},
|
13780 |
+
{
|
13781 |
+
"epoch": 0.76,
|
13782 |
+
"grad_norm": 1.030316710472107,
|
13783 |
+
"learning_rate": 1.1843125848361451e-05,
|
13784 |
+
"loss": 5.2578,
|
13785 |
+
"step": 7872
|
13786 |
+
},
|
13787 |
+
{
|
13788 |
+
"epoch": 0.76,
|
13789 |
+
"grad_norm": 1.091535210609436,
|
13790 |
+
"learning_rate": 1.1823734729493891e-05,
|
13791 |
+
"loss": 5.3234,
|
13792 |
+
"step": 7876
|
13793 |
+
},
|
13794 |
+
{
|
13795 |
+
"epoch": 0.76,
|
13796 |
+
"grad_norm": 1.0827471017837524,
|
13797 |
+
"learning_rate": 1.1804343610626334e-05,
|
13798 |
+
"loss": 5.2876,
|
13799 |
+
"step": 7880
|
13800 |
+
},
|
13801 |
+
{
|
13802 |
+
"epoch": 0.76,
|
13803 |
+
"grad_norm": 1.0991337299346924,
|
13804 |
+
"learning_rate": 1.1784952491758774e-05,
|
13805 |
+
"loss": 5.473,
|
13806 |
+
"step": 7884
|
13807 |
+
},
|
13808 |
+
{
|
13809 |
+
"epoch": 0.76,
|
13810 |
+
"grad_norm": 1.0421675443649292,
|
13811 |
+
"learning_rate": 1.1765561372891216e-05,
|
13812 |
+
"loss": 5.3059,
|
13813 |
+
"step": 7888
|
13814 |
+
},
|
13815 |
+
{
|
13816 |
+
"epoch": 0.77,
|
13817 |
+
"grad_norm": 1.1006362438201904,
|
13818 |
+
"learning_rate": 1.1746170254023657e-05,
|
13819 |
+
"loss": 5.2836,
|
13820 |
+
"step": 7892
|
13821 |
+
},
|
13822 |
+
{
|
13823 |
+
"epoch": 0.77,
|
13824 |
+
"grad_norm": 1.0210435390472412,
|
13825 |
+
"learning_rate": 1.17267791351561e-05,
|
13826 |
+
"loss": 5.3632,
|
13827 |
+
"step": 7896
|
13828 |
+
},
|
13829 |
+
{
|
13830 |
+
"epoch": 0.77,
|
13831 |
+
"grad_norm": 0.9662442207336426,
|
13832 |
+
"learning_rate": 1.1707388016288541e-05,
|
13833 |
+
"loss": 5.3246,
|
13834 |
+
"step": 7900
|
13835 |
+
},
|
13836 |
+
{
|
13837 |
+
"epoch": 0.77,
|
13838 |
+
"grad_norm": 1.1299954652786255,
|
13839 |
+
"learning_rate": 1.1687996897420982e-05,
|
13840 |
+
"loss": 5.2499,
|
13841 |
+
"step": 7904
|
13842 |
+
},
|
13843 |
+
{
|
13844 |
+
"epoch": 0.77,
|
13845 |
+
"grad_norm": 1.0942116975784302,
|
13846 |
+
"learning_rate": 1.1668605778553424e-05,
|
13847 |
+
"loss": 5.2834,
|
13848 |
+
"step": 7908
|
13849 |
+
},
|
13850 |
+
{
|
13851 |
+
"epoch": 0.77,
|
13852 |
+
"grad_norm": 1.0262593030929565,
|
13853 |
+
"learning_rate": 1.1649214659685865e-05,
|
13854 |
+
"loss": 5.3228,
|
13855 |
+
"step": 7912
|
13856 |
+
},
|
13857 |
+
{
|
13858 |
+
"epoch": 0.77,
|
13859 |
+
"grad_norm": 1.0624995231628418,
|
13860 |
+
"learning_rate": 1.1629823540818307e-05,
|
13861 |
+
"loss": 5.2603,
|
13862 |
+
"step": 7916
|
13863 |
+
},
|
13864 |
+
{
|
13865 |
+
"epoch": 0.77,
|
13866 |
+
"grad_norm": 1.0851109027862549,
|
13867 |
+
"learning_rate": 1.1610432421950747e-05,
|
13868 |
+
"loss": 5.2343,
|
13869 |
+
"step": 7920
|
13870 |
+
},
|
13871 |
+
{
|
13872 |
+
"epoch": 0.77,
|
13873 |
+
"grad_norm": 1.0987358093261719,
|
13874 |
+
"learning_rate": 1.1591041303083188e-05,
|
13875 |
+
"loss": 5.2615,
|
13876 |
+
"step": 7924
|
13877 |
+
},
|
13878 |
+
{
|
13879 |
+
"epoch": 0.77,
|
13880 |
+
"grad_norm": 1.0852704048156738,
|
13881 |
+
"learning_rate": 1.1571650184215629e-05,
|
13882 |
+
"loss": 5.2886,
|
13883 |
+
"step": 7928
|
13884 |
+
},
|
13885 |
+
{
|
13886 |
+
"epoch": 0.77,
|
13887 |
+
"grad_norm": 1.0782544612884521,
|
13888 |
+
"learning_rate": 1.155225906534807e-05,
|
13889 |
+
"loss": 5.3585,
|
13890 |
+
"step": 7932
|
13891 |
+
},
|
13892 |
+
{
|
13893 |
+
"epoch": 0.77,
|
13894 |
+
"grad_norm": 1.0928773880004883,
|
13895 |
+
"learning_rate": 1.1532867946480511e-05,
|
13896 |
+
"loss": 5.2083,
|
13897 |
+
"step": 7936
|
13898 |
+
},
|
13899 |
+
{
|
13900 |
+
"epoch": 0.77,
|
13901 |
+
"grad_norm": 0.9779551029205322,
|
13902 |
+
"learning_rate": 1.1513476827612954e-05,
|
13903 |
+
"loss": 5.2969,
|
13904 |
+
"step": 7940
|
13905 |
+
},
|
13906 |
+
{
|
13907 |
+
"epoch": 0.77,
|
13908 |
+
"grad_norm": 1.0859659910202026,
|
13909 |
+
"learning_rate": 1.1494085708745394e-05,
|
13910 |
+
"loss": 5.3044,
|
13911 |
+
"step": 7944
|
13912 |
+
},
|
13913 |
+
{
|
13914 |
+
"epoch": 0.77,
|
13915 |
+
"grad_norm": 1.0626839399337769,
|
13916 |
+
"learning_rate": 1.1474694589877836e-05,
|
13917 |
+
"loss": 5.3455,
|
13918 |
+
"step": 7948
|
13919 |
+
},
|
13920 |
+
{
|
13921 |
+
"epoch": 0.77,
|
13922 |
+
"grad_norm": 1.1160836219787598,
|
13923 |
+
"learning_rate": 1.1455303471010277e-05,
|
13924 |
+
"loss": 5.3082,
|
13925 |
+
"step": 7952
|
13926 |
+
},
|
13927 |
+
{
|
13928 |
+
"epoch": 0.77,
|
13929 |
+
"grad_norm": 1.089357614517212,
|
13930 |
+
"learning_rate": 1.1435912352142719e-05,
|
13931 |
+
"loss": 5.3013,
|
13932 |
+
"step": 7956
|
13933 |
+
},
|
13934 |
+
{
|
13935 |
+
"epoch": 0.77,
|
13936 |
+
"grad_norm": 0.9616773128509521,
|
13937 |
+
"learning_rate": 1.1416521233275161e-05,
|
13938 |
+
"loss": 5.2995,
|
13939 |
+
"step": 7960
|
13940 |
+
},
|
13941 |
+
{
|
13942 |
+
"epoch": 0.77,
|
13943 |
+
"grad_norm": 1.0657833814620972,
|
13944 |
+
"learning_rate": 1.1397130114407602e-05,
|
13945 |
+
"loss": 5.3208,
|
13946 |
+
"step": 7964
|
13947 |
+
},
|
13948 |
+
{
|
13949 |
+
"epoch": 0.77,
|
13950 |
+
"grad_norm": 1.0845454931259155,
|
13951 |
+
"learning_rate": 1.1377738995540044e-05,
|
13952 |
+
"loss": 5.3213,
|
13953 |
+
"step": 7968
|
13954 |
+
},
|
13955 |
+
{
|
13956 |
+
"epoch": 0.77,
|
13957 |
+
"grad_norm": 1.0332484245300293,
|
13958 |
+
"learning_rate": 1.1358347876672485e-05,
|
13959 |
+
"loss": 5.2833,
|
13960 |
+
"step": 7972
|
13961 |
+
},
|
13962 |
+
{
|
13963 |
+
"epoch": 0.77,
|
13964 |
+
"grad_norm": 1.0877047777175903,
|
13965 |
+
"learning_rate": 1.1338956757804927e-05,
|
13966 |
+
"loss": 5.2889,
|
13967 |
+
"step": 7976
|
13968 |
+
},
|
13969 |
+
{
|
13970 |
+
"epoch": 0.77,
|
13971 |
+
"grad_norm": 1.0782090425491333,
|
13972 |
+
"learning_rate": 1.1319565638937367e-05,
|
13973 |
+
"loss": 5.2914,
|
13974 |
+
"step": 7980
|
13975 |
+
},
|
13976 |
+
{
|
13977 |
+
"epoch": 0.77,
|
13978 |
+
"grad_norm": 1.0773468017578125,
|
13979 |
+
"learning_rate": 1.130017452006981e-05,
|
13980 |
+
"loss": 5.2853,
|
13981 |
+
"step": 7984
|
13982 |
+
},
|
13983 |
+
{
|
13984 |
+
"epoch": 0.77,
|
13985 |
+
"grad_norm": 1.1031116247177124,
|
13986 |
+
"learning_rate": 1.128078340120225e-05,
|
13987 |
+
"loss": 5.2285,
|
13988 |
+
"step": 7988
|
13989 |
+
},
|
13990 |
+
{
|
13991 |
+
"epoch": 0.77,
|
13992 |
+
"grad_norm": 1.0579017400741577,
|
13993 |
+
"learning_rate": 1.126139228233469e-05,
|
13994 |
+
"loss": 5.2608,
|
13995 |
+
"step": 7992
|
13996 |
+
},
|
13997 |
+
{
|
13998 |
+
"epoch": 0.78,
|
13999 |
+
"grad_norm": 1.0039610862731934,
|
14000 |
+
"learning_rate": 1.1242001163467133e-05,
|
14001 |
+
"loss": 5.2801,
|
14002 |
+
"step": 7996
|
14003 |
+
},
|
14004 |
+
{
|
14005 |
+
"epoch": 0.78,
|
14006 |
+
"grad_norm": 1.1101170778274536,
|
14007 |
+
"learning_rate": 1.1222610044599573e-05,
|
14008 |
+
"loss": 5.3003,
|
14009 |
+
"step": 8000
|
14010 |
+
},
|
14011 |
+
{
|
14012 |
+
"epoch": 0.78,
|
14013 |
+
"grad_norm": 1.1231920719146729,
|
14014 |
+
"learning_rate": 1.1203218925732014e-05,
|
14015 |
+
"loss": 5.2951,
|
14016 |
+
"step": 8004
|
14017 |
+
},
|
14018 |
+
{
|
14019 |
+
"epoch": 0.78,
|
14020 |
+
"grad_norm": 1.104062557220459,
|
14021 |
+
"learning_rate": 1.1183827806864456e-05,
|
14022 |
+
"loss": 5.2419,
|
14023 |
+
"step": 8008
|
14024 |
+
},
|
14025 |
+
{
|
14026 |
+
"epoch": 0.78,
|
14027 |
+
"grad_norm": 1.0631533861160278,
|
14028 |
+
"learning_rate": 1.1164436687996897e-05,
|
14029 |
+
"loss": 5.2927,
|
14030 |
+
"step": 8012
|
14031 |
+
},
|
14032 |
+
{
|
14033 |
+
"epoch": 0.78,
|
14034 |
+
"grad_norm": 1.0923023223876953,
|
14035 |
+
"learning_rate": 1.1145045569129339e-05,
|
14036 |
+
"loss": 5.3566,
|
14037 |
+
"step": 8016
|
14038 |
+
},
|
14039 |
+
{
|
14040 |
+
"epoch": 0.78,
|
14041 |
+
"grad_norm": 1.1646323204040527,
|
14042 |
+
"learning_rate": 1.1125654450261781e-05,
|
14043 |
+
"loss": 5.229,
|
14044 |
+
"step": 8020
|
14045 |
+
},
|
14046 |
+
{
|
14047 |
+
"epoch": 0.78,
|
14048 |
+
"grad_norm": 1.0775333642959595,
|
14049 |
+
"learning_rate": 1.1106263331394222e-05,
|
14050 |
+
"loss": 5.3121,
|
14051 |
+
"step": 8024
|
14052 |
+
},
|
14053 |
+
{
|
14054 |
+
"epoch": 0.78,
|
14055 |
+
"grad_norm": 1.101682186126709,
|
14056 |
+
"learning_rate": 1.1086872212526664e-05,
|
14057 |
+
"loss": 5.3746,
|
14058 |
+
"step": 8028
|
14059 |
+
},
|
14060 |
+
{
|
14061 |
+
"epoch": 0.78,
|
14062 |
+
"grad_norm": 1.06061589717865,
|
14063 |
+
"learning_rate": 1.1067481093659105e-05,
|
14064 |
+
"loss": 5.2878,
|
14065 |
+
"step": 8032
|
14066 |
+
},
|
14067 |
+
{
|
14068 |
+
"epoch": 0.78,
|
14069 |
+
"grad_norm": 1.0500963926315308,
|
14070 |
+
"learning_rate": 1.1048089974791547e-05,
|
14071 |
+
"loss": 5.3188,
|
14072 |
+
"step": 8036
|
14073 |
+
},
|
14074 |
+
{
|
14075 |
+
"epoch": 0.78,
|
14076 |
+
"grad_norm": 1.1280819177627563,
|
14077 |
+
"learning_rate": 1.1028698855923987e-05,
|
14078 |
+
"loss": 5.2829,
|
14079 |
+
"step": 8040
|
14080 |
+
},
|
14081 |
+
{
|
14082 |
+
"epoch": 0.78,
|
14083 |
+
"grad_norm": 1.0025454759597778,
|
14084 |
+
"learning_rate": 1.100930773705643e-05,
|
14085 |
+
"loss": 5.3121,
|
14086 |
+
"step": 8044
|
14087 |
+
},
|
14088 |
+
{
|
14089 |
+
"epoch": 0.78,
|
14090 |
+
"grad_norm": 1.0445626974105835,
|
14091 |
+
"learning_rate": 1.098991661818887e-05,
|
14092 |
+
"loss": 5.3264,
|
14093 |
+
"step": 8048
|
14094 |
+
},
|
14095 |
+
{
|
14096 |
+
"epoch": 0.78,
|
14097 |
+
"grad_norm": 1.0949641466140747,
|
14098 |
+
"learning_rate": 1.0970525499321312e-05,
|
14099 |
+
"loss": 5.3306,
|
14100 |
+
"step": 8052
|
14101 |
+
},
|
14102 |
+
{
|
14103 |
+
"epoch": 0.78,
|
14104 |
+
"grad_norm": 1.044668197631836,
|
14105 |
+
"learning_rate": 1.0951134380453753e-05,
|
14106 |
+
"loss": 5.3335,
|
14107 |
+
"step": 8056
|
14108 |
+
},
|
14109 |
+
{
|
14110 |
+
"epoch": 0.78,
|
14111 |
+
"grad_norm": 1.079334020614624,
|
14112 |
+
"learning_rate": 1.0931743261586193e-05,
|
14113 |
+
"loss": 5.3383,
|
14114 |
+
"step": 8060
|
14115 |
+
},
|
14116 |
+
{
|
14117 |
+
"epoch": 0.78,
|
14118 |
+
"grad_norm": 1.0334477424621582,
|
14119 |
+
"learning_rate": 1.0912352142718636e-05,
|
14120 |
+
"loss": 5.3128,
|
14121 |
+
"step": 8064
|
14122 |
+
},
|
14123 |
+
{
|
14124 |
+
"epoch": 0.78,
|
14125 |
+
"grad_norm": 0.9899519681930542,
|
14126 |
+
"learning_rate": 1.0892961023851076e-05,
|
14127 |
+
"loss": 5.3245,
|
14128 |
+
"step": 8068
|
14129 |
+
},
|
14130 |
+
{
|
14131 |
+
"epoch": 0.78,
|
14132 |
+
"grad_norm": 1.0130146741867065,
|
14133 |
+
"learning_rate": 1.0873569904983517e-05,
|
14134 |
+
"loss": 5.2891,
|
14135 |
+
"step": 8072
|
14136 |
+
},
|
14137 |
+
{
|
14138 |
+
"epoch": 0.78,
|
14139 |
+
"grad_norm": 1.0125179290771484,
|
14140 |
+
"learning_rate": 1.0854178786115959e-05,
|
14141 |
+
"loss": 5.3007,
|
14142 |
+
"step": 8076
|
14143 |
+
},
|
14144 |
+
{
|
14145 |
+
"epoch": 0.78,
|
14146 |
+
"grad_norm": 1.0482258796691895,
|
14147 |
+
"learning_rate": 1.0834787667248401e-05,
|
14148 |
+
"loss": 5.2417,
|
14149 |
+
"step": 8080
|
14150 |
+
},
|
14151 |
+
{
|
14152 |
+
"epoch": 0.78,
|
14153 |
+
"grad_norm": 1.055640697479248,
|
14154 |
+
"learning_rate": 1.0815396548380842e-05,
|
14155 |
+
"loss": 5.3779,
|
14156 |
+
"step": 8084
|
14157 |
+
},
|
14158 |
+
{
|
14159 |
+
"epoch": 0.78,
|
14160 |
+
"grad_norm": 1.0579723119735718,
|
14161 |
+
"learning_rate": 1.0796005429513284e-05,
|
14162 |
+
"loss": 5.232,
|
14163 |
+
"step": 8088
|
14164 |
+
},
|
14165 |
+
{
|
14166 |
+
"epoch": 0.78,
|
14167 |
+
"grad_norm": 1.083598017692566,
|
14168 |
+
"learning_rate": 1.0776614310645724e-05,
|
14169 |
+
"loss": 5.4277,
|
14170 |
+
"step": 8092
|
14171 |
+
},
|
14172 |
+
{
|
14173 |
+
"epoch": 0.78,
|
14174 |
+
"grad_norm": 0.9834340214729309,
|
14175 |
+
"learning_rate": 1.0757223191778167e-05,
|
14176 |
+
"loss": 5.2498,
|
14177 |
+
"step": 8096
|
14178 |
+
},
|
14179 |
+
{
|
14180 |
+
"epoch": 0.79,
|
14181 |
+
"grad_norm": 1.0400574207305908,
|
14182 |
+
"learning_rate": 1.0737832072910607e-05,
|
14183 |
+
"loss": 5.3321,
|
14184 |
+
"step": 8100
|
14185 |
+
},
|
14186 |
+
{
|
14187 |
+
"epoch": 0.79,
|
14188 |
+
"grad_norm": 1.083775281906128,
|
14189 |
+
"learning_rate": 1.071844095404305e-05,
|
14190 |
+
"loss": 5.1589,
|
14191 |
+
"step": 8104
|
14192 |
+
},
|
14193 |
+
{
|
14194 |
+
"epoch": 0.79,
|
14195 |
+
"grad_norm": 1.035290241241455,
|
14196 |
+
"learning_rate": 1.069904983517549e-05,
|
14197 |
+
"loss": 5.4106,
|
14198 |
+
"step": 8108
|
14199 |
+
},
|
14200 |
+
{
|
14201 |
+
"epoch": 0.79,
|
14202 |
+
"grad_norm": 1.0348436832427979,
|
14203 |
+
"learning_rate": 1.0679658716307932e-05,
|
14204 |
+
"loss": 5.2728,
|
14205 |
+
"step": 8112
|
14206 |
+
},
|
14207 |
+
{
|
14208 |
+
"epoch": 0.79,
|
14209 |
+
"grad_norm": 1.058597445487976,
|
14210 |
+
"learning_rate": 1.0660267597440373e-05,
|
14211 |
+
"loss": 5.2081,
|
14212 |
+
"step": 8116
|
14213 |
+
},
|
14214 |
+
{
|
14215 |
+
"epoch": 0.79,
|
14216 |
+
"grad_norm": 1.0467309951782227,
|
14217 |
+
"learning_rate": 1.0640876478572815e-05,
|
14218 |
+
"loss": 5.232,
|
14219 |
+
"step": 8120
|
14220 |
+
},
|
14221 |
+
{
|
14222 |
+
"epoch": 0.79,
|
14223 |
+
"grad_norm": 0.9697101712226868,
|
14224 |
+
"learning_rate": 1.0621485359705256e-05,
|
14225 |
+
"loss": 5.2641,
|
14226 |
+
"step": 8124
|
14227 |
+
},
|
14228 |
+
{
|
14229 |
+
"epoch": 0.79,
|
14230 |
+
"grad_norm": 1.0596665143966675,
|
14231 |
+
"learning_rate": 1.0602094240837698e-05,
|
14232 |
+
"loss": 5.2865,
|
14233 |
+
"step": 8128
|
14234 |
+
},
|
14235 |
+
{
|
14236 |
+
"epoch": 0.79,
|
14237 |
+
"grad_norm": 1.0948309898376465,
|
14238 |
+
"learning_rate": 1.0582703121970138e-05,
|
14239 |
+
"loss": 5.2826,
|
14240 |
+
"step": 8132
|
14241 |
+
},
|
14242 |
+
{
|
14243 |
+
"epoch": 0.79,
|
14244 |
+
"grad_norm": 1.0270111560821533,
|
14245 |
+
"learning_rate": 1.0563312003102579e-05,
|
14246 |
+
"loss": 5.2726,
|
14247 |
+
"step": 8136
|
14248 |
+
},
|
14249 |
+
{
|
14250 |
+
"epoch": 0.79,
|
14251 |
+
"grad_norm": 1.0012414455413818,
|
14252 |
+
"learning_rate": 1.0543920884235021e-05,
|
14253 |
+
"loss": 5.3184,
|
14254 |
+
"step": 8140
|
14255 |
+
},
|
14256 |
+
{
|
14257 |
+
"epoch": 0.79,
|
14258 |
+
"grad_norm": 1.019332766532898,
|
14259 |
+
"learning_rate": 1.0524529765367462e-05,
|
14260 |
+
"loss": 5.2908,
|
14261 |
+
"step": 8144
|
14262 |
+
},
|
14263 |
+
{
|
14264 |
+
"epoch": 0.79,
|
14265 |
+
"grad_norm": 1.0300483703613281,
|
14266 |
+
"learning_rate": 1.0505138646499904e-05,
|
14267 |
+
"loss": 5.3333,
|
14268 |
+
"step": 8148
|
14269 |
+
},
|
14270 |
+
{
|
14271 |
+
"epoch": 0.79,
|
14272 |
+
"grad_norm": 1.0536975860595703,
|
14273 |
+
"learning_rate": 1.0485747527632344e-05,
|
14274 |
+
"loss": 5.2944,
|
14275 |
+
"step": 8152
|
14276 |
+
},
|
14277 |
+
{
|
14278 |
+
"epoch": 0.79,
|
14279 |
+
"grad_norm": 0.9881764650344849,
|
14280 |
+
"learning_rate": 1.0466356408764787e-05,
|
14281 |
+
"loss": 5.2767,
|
14282 |
+
"step": 8156
|
14283 |
+
},
|
14284 |
+
{
|
14285 |
+
"epoch": 0.79,
|
14286 |
+
"grad_norm": 1.0163639783859253,
|
14287 |
+
"learning_rate": 1.0446965289897227e-05,
|
14288 |
+
"loss": 5.2723,
|
14289 |
+
"step": 8160
|
14290 |
+
},
|
14291 |
+
{
|
14292 |
+
"epoch": 0.79,
|
14293 |
+
"grad_norm": 1.0919207334518433,
|
14294 |
+
"learning_rate": 1.042757417102967e-05,
|
14295 |
+
"loss": 5.3327,
|
14296 |
+
"step": 8164
|
14297 |
+
},
|
14298 |
+
{
|
14299 |
+
"epoch": 0.79,
|
14300 |
+
"grad_norm": 1.0306917428970337,
|
14301 |
+
"learning_rate": 1.040818305216211e-05,
|
14302 |
+
"loss": 5.2565,
|
14303 |
+
"step": 8168
|
14304 |
+
},
|
14305 |
+
{
|
14306 |
+
"epoch": 0.79,
|
14307 |
+
"grad_norm": 1.0125017166137695,
|
14308 |
+
"learning_rate": 1.0388791933294552e-05,
|
14309 |
+
"loss": 5.342,
|
14310 |
+
"step": 8172
|
14311 |
+
},
|
14312 |
+
{
|
14313 |
+
"epoch": 0.79,
|
14314 |
+
"grad_norm": 1.0879104137420654,
|
14315 |
+
"learning_rate": 1.0369400814426993e-05,
|
14316 |
+
"loss": 5.3122,
|
14317 |
+
"step": 8176
|
14318 |
+
},
|
14319 |
+
{
|
14320 |
+
"epoch": 0.79,
|
14321 |
+
"grad_norm": 0.9527262449264526,
|
14322 |
+
"learning_rate": 1.0350009695559435e-05,
|
14323 |
+
"loss": 5.284,
|
14324 |
+
"step": 8180
|
14325 |
+
},
|
14326 |
+
{
|
14327 |
+
"epoch": 0.79,
|
14328 |
+
"grad_norm": 1.0486680269241333,
|
14329 |
+
"learning_rate": 1.0330618576691875e-05,
|
14330 |
+
"loss": 5.2428,
|
14331 |
+
"step": 8184
|
14332 |
+
},
|
14333 |
+
{
|
14334 |
+
"epoch": 0.79,
|
14335 |
+
"grad_norm": 1.0278397798538208,
|
14336 |
+
"learning_rate": 1.0311227457824318e-05,
|
14337 |
+
"loss": 5.3466,
|
14338 |
+
"step": 8188
|
14339 |
+
},
|
14340 |
+
{
|
14341 |
+
"epoch": 0.79,
|
14342 |
+
"grad_norm": 1.0635344982147217,
|
14343 |
+
"learning_rate": 1.0291836338956758e-05,
|
14344 |
+
"loss": 5.2645,
|
14345 |
+
"step": 8192
|
14346 |
+
},
|
14347 |
+
{
|
14348 |
+
"epoch": 0.79,
|
14349 |
+
"grad_norm": 1.0180613994598389,
|
14350 |
+
"learning_rate": 1.02724452200892e-05,
|
14351 |
+
"loss": 5.3552,
|
14352 |
+
"step": 8196
|
14353 |
+
},
|
14354 |
+
{
|
14355 |
+
"epoch": 0.8,
|
14356 |
+
"grad_norm": 1.0265159606933594,
|
14357 |
+
"learning_rate": 1.0253054101221641e-05,
|
14358 |
+
"loss": 5.3422,
|
14359 |
+
"step": 8200
|
14360 |
+
},
|
14361 |
+
{
|
14362 |
+
"epoch": 0.8,
|
14363 |
+
"grad_norm": 1.029842495918274,
|
14364 |
+
"learning_rate": 1.0233662982354082e-05,
|
14365 |
+
"loss": 5.4009,
|
14366 |
+
"step": 8204
|
14367 |
+
},
|
14368 |
+
{
|
14369 |
+
"epoch": 0.8,
|
14370 |
+
"grad_norm": 1.0848013162612915,
|
14371 |
+
"learning_rate": 1.0214271863486524e-05,
|
14372 |
+
"loss": 5.3982,
|
14373 |
+
"step": 8208
|
14374 |
+
},
|
14375 |
+
{
|
14376 |
+
"epoch": 0.8,
|
14377 |
+
"grad_norm": 1.0277658700942993,
|
14378 |
+
"learning_rate": 1.0194880744618964e-05,
|
14379 |
+
"loss": 5.3567,
|
14380 |
+
"step": 8212
|
14381 |
+
},
|
14382 |
+
{
|
14383 |
+
"epoch": 0.8,
|
14384 |
+
"grad_norm": 1.0273760557174683,
|
14385 |
+
"learning_rate": 1.0175489625751407e-05,
|
14386 |
+
"loss": 5.2762,
|
14387 |
+
"step": 8216
|
14388 |
+
},
|
14389 |
+
{
|
14390 |
+
"epoch": 0.8,
|
14391 |
+
"grad_norm": 0.9951087832450867,
|
14392 |
+
"learning_rate": 1.0156098506883847e-05,
|
14393 |
+
"loss": 5.3267,
|
14394 |
+
"step": 8220
|
14395 |
+
},
|
14396 |
+
{
|
14397 |
+
"epoch": 0.8,
|
14398 |
+
"grad_norm": 1.0347890853881836,
|
14399 |
+
"learning_rate": 1.013670738801629e-05,
|
14400 |
+
"loss": 5.2681,
|
14401 |
+
"step": 8224
|
14402 |
+
},
|
14403 |
+
{
|
14404 |
+
"epoch": 0.8,
|
14405 |
+
"grad_norm": 1.076242446899414,
|
14406 |
+
"learning_rate": 1.011731626914873e-05,
|
14407 |
+
"loss": 5.3621,
|
14408 |
+
"step": 8228
|
14409 |
+
},
|
14410 |
+
{
|
14411 |
+
"epoch": 0.8,
|
14412 |
+
"grad_norm": 1.02762770652771,
|
14413 |
+
"learning_rate": 1.0097925150281172e-05,
|
14414 |
+
"loss": 5.3412,
|
14415 |
+
"step": 8232
|
14416 |
+
},
|
14417 |
+
{
|
14418 |
+
"epoch": 0.8,
|
14419 |
+
"grad_norm": 1.0394989252090454,
|
14420 |
+
"learning_rate": 1.0078534031413613e-05,
|
14421 |
+
"loss": 5.2555,
|
14422 |
+
"step": 8236
|
14423 |
+
},
|
14424 |
+
{
|
14425 |
+
"epoch": 0.8,
|
14426 |
+
"grad_norm": 1.0912150144577026,
|
14427 |
+
"learning_rate": 1.0059142912546055e-05,
|
14428 |
+
"loss": 5.3784,
|
14429 |
+
"step": 8240
|
14430 |
+
},
|
14431 |
+
{
|
14432 |
+
"epoch": 0.8,
|
14433 |
+
"grad_norm": 1.0681991577148438,
|
14434 |
+
"learning_rate": 1.0039751793678495e-05,
|
14435 |
+
"loss": 5.3226,
|
14436 |
+
"step": 8244
|
14437 |
+
},
|
14438 |
+
{
|
14439 |
+
"epoch": 0.8,
|
14440 |
+
"grad_norm": 1.0572381019592285,
|
14441 |
+
"learning_rate": 1.0020360674810938e-05,
|
14442 |
+
"loss": 5.2935,
|
14443 |
+
"step": 8248
|
14444 |
+
},
|
14445 |
+
{
|
14446 |
+
"epoch": 0.8,
|
14447 |
+
"grad_norm": 1.0432649850845337,
|
14448 |
+
"learning_rate": 1.0000969555943378e-05,
|
14449 |
+
"loss": 5.3087,
|
14450 |
+
"step": 8252
|
14451 |
+
},
|
14452 |
+
{
|
14453 |
+
"epoch": 0.8,
|
14454 |
+
"grad_norm": 1.0084688663482666,
|
14455 |
+
"learning_rate": 9.98157843707582e-06,
|
14456 |
+
"loss": 5.2778,
|
14457 |
+
"step": 8256
|
14458 |
}
|
14459 |
],
|
14460 |
"logging_steps": 4,
|
|
|
14462 |
"num_input_tokens_seen": 0,
|
14463 |
"num_train_epochs": 1,
|
14464 |
"save_steps": 1032,
|
14465 |
+
"total_flos": 6.953660644432282e+16,
|
14466 |
"train_batch_size": 8,
|
14467 |
"trial_name": null,
|
14468 |
"trial_params": null
|