Training in progress, step 8000, checkpoint
Browse files
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1856040378
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f8a3cc4fd3d109560b7a96fdbe13f14a6e27436a0d8c767e80e006e65b5b757
|
3 |
size 1856040378
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 928000378
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86699da39a3fab83cd8c5fdd36440d7f77f6445f7569eff311e9b0123ff619ce
|
3 |
size 928000378
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5dafd50c49589ddc23b254f5413b0d2faf890d41ca48d64ead8a7084105e608a
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc42f55e2d0842057dc8ff559f0ec21b6e555cf7df0d60cb6c4bece9d16e8ae6
|
3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 1.5398365259170532,
|
3 |
"best_model_checkpoint": "model/chessformer-3/checkpoint-7000",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -24619,6 +24619,3522 @@
|
|
24619 |
"eval_samples_per_second": 534.768,
|
24620 |
"eval_steps_per_second": 66.865,
|
24621 |
"step": 7000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24622 |
}
|
24623 |
],
|
24624 |
"logging_steps": 2,
|
@@ -24638,7 +28154,7 @@
|
|
24638 |
"attributes": {}
|
24639 |
}
|
24640 |
},
|
24641 |
-
"total_flos": 1.
|
24642 |
"train_batch_size": 768,
|
24643 |
"trial_name": null,
|
24644 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 1.5398365259170532,
|
3 |
"best_model_checkpoint": "model/chessformer-3/checkpoint-7000",
|
4 |
+
"epoch": 0.35569783468943134,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 8000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
24619 |
"eval_samples_per_second": 534.768,
|
24620 |
"eval_steps_per_second": 66.865,
|
24621 |
"step": 7000
|
24622 |
+
},
|
24623 |
+
{
|
24624 |
+
"epoch": 0.31132452981192477,
|
24625 |
+
"grad_norm": 0.08610174804925919,
|
24626 |
+
"learning_rate": 0.0009888111241591819,
|
24627 |
+
"loss": 1.5637,
|
24628 |
+
"step": 7002
|
24629 |
+
},
|
24630 |
+
{
|
24631 |
+
"epoch": 0.31141345427059713,
|
24632 |
+
"grad_norm": 0.08628767728805542,
|
24633 |
+
"learning_rate": 0.0009888036941112907,
|
24634 |
+
"loss": 1.5625,
|
24635 |
+
"step": 7004
|
24636 |
+
},
|
24637 |
+
{
|
24638 |
+
"epoch": 0.3115023787292695,
|
24639 |
+
"grad_norm": 0.08559640496969223,
|
24640 |
+
"learning_rate": 0.0009887962616251654,
|
24641 |
+
"loss": 1.5633,
|
24642 |
+
"step": 7006
|
24643 |
+
},
|
24644 |
+
{
|
24645 |
+
"epoch": 0.31159130318794187,
|
24646 |
+
"grad_norm": 0.08582499623298645,
|
24647 |
+
"learning_rate": 0.0009887888267008432,
|
24648 |
+
"loss": 1.5718,
|
24649 |
+
"step": 7008
|
24650 |
+
},
|
24651 |
+
{
|
24652 |
+
"epoch": 0.3116802276466142,
|
24653 |
+
"grad_norm": 0.08453387767076492,
|
24654 |
+
"learning_rate": 0.0009887813893383612,
|
24655 |
+
"loss": 1.5644,
|
24656 |
+
"step": 7010
|
24657 |
+
},
|
24658 |
+
{
|
24659 |
+
"epoch": 0.31176915210528655,
|
24660 |
+
"grad_norm": 0.0880834087729454,
|
24661 |
+
"learning_rate": 0.0009887739495377564,
|
24662 |
+
"loss": 1.5675,
|
24663 |
+
"step": 7012
|
24664 |
+
},
|
24665 |
+
{
|
24666 |
+
"epoch": 0.3118580765639589,
|
24667 |
+
"grad_norm": 0.08896942436695099,
|
24668 |
+
"learning_rate": 0.0009887665072990661,
|
24669 |
+
"loss": 1.5601,
|
24670 |
+
"step": 7014
|
24671 |
+
},
|
24672 |
+
{
|
24673 |
+
"epoch": 0.3119470010226313,
|
24674 |
+
"grad_norm": 0.08615525811910629,
|
24675 |
+
"learning_rate": 0.0009887590626223273,
|
24676 |
+
"loss": 1.5647,
|
24677 |
+
"step": 7016
|
24678 |
+
},
|
24679 |
+
{
|
24680 |
+
"epoch": 0.31203592548130366,
|
24681 |
+
"grad_norm": 0.08899851888418198,
|
24682 |
+
"learning_rate": 0.0009887516155075772,
|
24683 |
+
"loss": 1.5663,
|
24684 |
+
"step": 7018
|
24685 |
+
},
|
24686 |
+
{
|
24687 |
+
"epoch": 0.31212484993997597,
|
24688 |
+
"grad_norm": 0.08589426428079605,
|
24689 |
+
"learning_rate": 0.000988744165954853,
|
24690 |
+
"loss": 1.5629,
|
24691 |
+
"step": 7020
|
24692 |
+
},
|
24693 |
+
{
|
24694 |
+
"epoch": 0.31221377439864834,
|
24695 |
+
"grad_norm": 0.0895439013838768,
|
24696 |
+
"learning_rate": 0.0009887367139641916,
|
24697 |
+
"loss": 1.5655,
|
24698 |
+
"step": 7022
|
24699 |
+
},
|
24700 |
+
{
|
24701 |
+
"epoch": 0.3123026988573207,
|
24702 |
+
"grad_norm": 0.08594179153442383,
|
24703 |
+
"learning_rate": 0.0009887292595356304,
|
24704 |
+
"loss": 1.5574,
|
24705 |
+
"step": 7024
|
24706 |
+
},
|
24707 |
+
{
|
24708 |
+
"epoch": 0.31239162331599307,
|
24709 |
+
"grad_norm": 0.08816447108983994,
|
24710 |
+
"learning_rate": 0.0009887218026692066,
|
24711 |
+
"loss": 1.5686,
|
24712 |
+
"step": 7026
|
24713 |
+
},
|
24714 |
+
{
|
24715 |
+
"epoch": 0.31248054777466544,
|
24716 |
+
"grad_norm": 0.08560863137245178,
|
24717 |
+
"learning_rate": 0.0009887143433649573,
|
24718 |
+
"loss": 1.5662,
|
24719 |
+
"step": 7028
|
24720 |
+
},
|
24721 |
+
{
|
24722 |
+
"epoch": 0.31256947223333775,
|
24723 |
+
"grad_norm": 0.0848335549235344,
|
24724 |
+
"learning_rate": 0.0009887068816229196,
|
24725 |
+
"loss": 1.5647,
|
24726 |
+
"step": 7030
|
24727 |
+
},
|
24728 |
+
{
|
24729 |
+
"epoch": 0.3126583966920101,
|
24730 |
+
"grad_norm": 0.08963989466428757,
|
24731 |
+
"learning_rate": 0.000988699417443131,
|
24732 |
+
"loss": 1.5681,
|
24733 |
+
"step": 7032
|
24734 |
+
},
|
24735 |
+
{
|
24736 |
+
"epoch": 0.3127473211506825,
|
24737 |
+
"grad_norm": 0.08423878997564316,
|
24738 |
+
"learning_rate": 0.0009886919508256286,
|
24739 |
+
"loss": 1.5634,
|
24740 |
+
"step": 7034
|
24741 |
+
},
|
24742 |
+
{
|
24743 |
+
"epoch": 0.31283624560935486,
|
24744 |
+
"grad_norm": 0.08075986057519913,
|
24745 |
+
"learning_rate": 0.0009886844817704496,
|
24746 |
+
"loss": 1.5645,
|
24747 |
+
"step": 7036
|
24748 |
+
},
|
24749 |
+
{
|
24750 |
+
"epoch": 0.3129251700680272,
|
24751 |
+
"grad_norm": 0.07971926033496857,
|
24752 |
+
"learning_rate": 0.0009886770102776313,
|
24753 |
+
"loss": 1.5615,
|
24754 |
+
"step": 7038
|
24755 |
+
},
|
24756 |
+
{
|
24757 |
+
"epoch": 0.3130140945266996,
|
24758 |
+
"grad_norm": 0.08668291568756104,
|
24759 |
+
"learning_rate": 0.0009886695363472107,
|
24760 |
+
"loss": 1.5657,
|
24761 |
+
"step": 7040
|
24762 |
+
},
|
24763 |
+
{
|
24764 |
+
"epoch": 0.3131030189853719,
|
24765 |
+
"grad_norm": 0.08376525342464447,
|
24766 |
+
"learning_rate": 0.0009886620599792258,
|
24767 |
+
"loss": 1.5634,
|
24768 |
+
"step": 7042
|
24769 |
+
},
|
24770 |
+
{
|
24771 |
+
"epoch": 0.3131919434440443,
|
24772 |
+
"grad_norm": 0.0836341604590416,
|
24773 |
+
"learning_rate": 0.0009886545811737132,
|
24774 |
+
"loss": 1.5597,
|
24775 |
+
"step": 7044
|
24776 |
+
},
|
24777 |
+
{
|
24778 |
+
"epoch": 0.31328086790271664,
|
24779 |
+
"grad_norm": 0.08614721894264221,
|
24780 |
+
"learning_rate": 0.0009886470999307102,
|
24781 |
+
"loss": 1.5616,
|
24782 |
+
"step": 7046
|
24783 |
+
},
|
24784 |
+
{
|
24785 |
+
"epoch": 0.313369792361389,
|
24786 |
+
"grad_norm": 0.08370938897132874,
|
24787 |
+
"learning_rate": 0.0009886396162502547,
|
24788 |
+
"loss": 1.5695,
|
24789 |
+
"step": 7048
|
24790 |
+
},
|
24791 |
+
{
|
24792 |
+
"epoch": 0.3134587168200614,
|
24793 |
+
"grad_norm": 0.08297300338745117,
|
24794 |
+
"learning_rate": 0.0009886321301323835,
|
24795 |
+
"loss": 1.5669,
|
24796 |
+
"step": 7050
|
24797 |
+
},
|
24798 |
+
{
|
24799 |
+
"epoch": 0.3135476412787337,
|
24800 |
+
"grad_norm": 0.07920370995998383,
|
24801 |
+
"learning_rate": 0.0009886246415771343,
|
24802 |
+
"loss": 1.5671,
|
24803 |
+
"step": 7052
|
24804 |
+
},
|
24805 |
+
{
|
24806 |
+
"epoch": 0.31363656573740606,
|
24807 |
+
"grad_norm": 0.08777054399251938,
|
24808 |
+
"learning_rate": 0.000988617150584544,
|
24809 |
+
"loss": 1.5684,
|
24810 |
+
"step": 7054
|
24811 |
+
},
|
24812 |
+
{
|
24813 |
+
"epoch": 0.3137254901960784,
|
24814 |
+
"grad_norm": 0.08385943621397018,
|
24815 |
+
"learning_rate": 0.0009886096571546505,
|
24816 |
+
"loss": 1.5594,
|
24817 |
+
"step": 7056
|
24818 |
+
},
|
24819 |
+
{
|
24820 |
+
"epoch": 0.3138144146547508,
|
24821 |
+
"grad_norm": 0.08605042845010757,
|
24822 |
+
"learning_rate": 0.0009886021612874908,
|
24823 |
+
"loss": 1.5674,
|
24824 |
+
"step": 7058
|
24825 |
+
},
|
24826 |
+
{
|
24827 |
+
"epoch": 0.31390333911342316,
|
24828 |
+
"grad_norm": 0.08706225454807281,
|
24829 |
+
"learning_rate": 0.0009885946629831024,
|
24830 |
+
"loss": 1.5579,
|
24831 |
+
"step": 7060
|
24832 |
+
},
|
24833 |
+
{
|
24834 |
+
"epoch": 0.31399226357209553,
|
24835 |
+
"grad_norm": 0.08403518795967102,
|
24836 |
+
"learning_rate": 0.000988587162241523,
|
24837 |
+
"loss": 1.5624,
|
24838 |
+
"step": 7062
|
24839 |
+
},
|
24840 |
+
{
|
24841 |
+
"epoch": 0.31408118803076784,
|
24842 |
+
"grad_norm": 0.08342042565345764,
|
24843 |
+
"learning_rate": 0.0009885796590627894,
|
24844 |
+
"loss": 1.5651,
|
24845 |
+
"step": 7064
|
24846 |
+
},
|
24847 |
+
{
|
24848 |
+
"epoch": 0.3141701124894402,
|
24849 |
+
"grad_norm": 0.08213895559310913,
|
24850 |
+
"learning_rate": 0.0009885721534469395,
|
24851 |
+
"loss": 1.5653,
|
24852 |
+
"step": 7066
|
24853 |
+
},
|
24854 |
+
{
|
24855 |
+
"epoch": 0.3142590369481126,
|
24856 |
+
"grad_norm": 0.08500027656555176,
|
24857 |
+
"learning_rate": 0.0009885646453940105,
|
24858 |
+
"loss": 1.5693,
|
24859 |
+
"step": 7068
|
24860 |
+
},
|
24861 |
+
{
|
24862 |
+
"epoch": 0.31434796140678495,
|
24863 |
+
"grad_norm": 0.08024073392152786,
|
24864 |
+
"learning_rate": 0.0009885571349040401,
|
24865 |
+
"loss": 1.565,
|
24866 |
+
"step": 7070
|
24867 |
+
},
|
24868 |
+
{
|
24869 |
+
"epoch": 0.3144368858654573,
|
24870 |
+
"grad_norm": 0.08580642938613892,
|
24871 |
+
"learning_rate": 0.0009885496219770656,
|
24872 |
+
"loss": 1.5634,
|
24873 |
+
"step": 7072
|
24874 |
+
},
|
24875 |
+
{
|
24876 |
+
"epoch": 0.31452581032412963,
|
24877 |
+
"grad_norm": 0.08305441588163376,
|
24878 |
+
"learning_rate": 0.0009885421066131244,
|
24879 |
+
"loss": 1.5678,
|
24880 |
+
"step": 7074
|
24881 |
+
},
|
24882 |
+
{
|
24883 |
+
"epoch": 0.314614734782802,
|
24884 |
+
"grad_norm": 0.0872965082526207,
|
24885 |
+
"learning_rate": 0.000988534588812254,
|
24886 |
+
"loss": 1.5619,
|
24887 |
+
"step": 7076
|
24888 |
+
},
|
24889 |
+
{
|
24890 |
+
"epoch": 0.31470365924147436,
|
24891 |
+
"grad_norm": 0.08917077630758286,
|
24892 |
+
"learning_rate": 0.0009885270685744921,
|
24893 |
+
"loss": 1.5585,
|
24894 |
+
"step": 7078
|
24895 |
+
},
|
24896 |
+
{
|
24897 |
+
"epoch": 0.31479258370014673,
|
24898 |
+
"grad_norm": 0.08678824454545975,
|
24899 |
+
"learning_rate": 0.000988519545899876,
|
24900 |
+
"loss": 1.5599,
|
24901 |
+
"step": 7080
|
24902 |
+
},
|
24903 |
+
{
|
24904 |
+
"epoch": 0.3148815081588191,
|
24905 |
+
"grad_norm": 0.08195692300796509,
|
24906 |
+
"learning_rate": 0.0009885120207884434,
|
24907 |
+
"loss": 1.565,
|
24908 |
+
"step": 7082
|
24909 |
+
},
|
24910 |
+
{
|
24911 |
+
"epoch": 0.3149704326174914,
|
24912 |
+
"grad_norm": 0.08080308139324188,
|
24913 |
+
"learning_rate": 0.000988504493240232,
|
24914 |
+
"loss": 1.5663,
|
24915 |
+
"step": 7084
|
24916 |
+
},
|
24917 |
+
{
|
24918 |
+
"epoch": 0.3150593570761638,
|
24919 |
+
"grad_norm": 0.08524754643440247,
|
24920 |
+
"learning_rate": 0.0009884969632552787,
|
24921 |
+
"loss": 1.5669,
|
24922 |
+
"step": 7086
|
24923 |
+
},
|
24924 |
+
{
|
24925 |
+
"epoch": 0.31514828153483615,
|
24926 |
+
"grad_norm": 0.08281772583723068,
|
24927 |
+
"learning_rate": 0.0009884894308336215,
|
24928 |
+
"loss": 1.5625,
|
24929 |
+
"step": 7088
|
24930 |
+
},
|
24931 |
+
{
|
24932 |
+
"epoch": 0.3152372059935085,
|
24933 |
+
"grad_norm": 0.08267832547426224,
|
24934 |
+
"learning_rate": 0.0009884818959752981,
|
24935 |
+
"loss": 1.5649,
|
24936 |
+
"step": 7090
|
24937 |
+
},
|
24938 |
+
{
|
24939 |
+
"epoch": 0.3153261304521809,
|
24940 |
+
"grad_norm": 0.08353657275438309,
|
24941 |
+
"learning_rate": 0.0009884743586803457,
|
24942 |
+
"loss": 1.5639,
|
24943 |
+
"step": 7092
|
24944 |
+
},
|
24945 |
+
{
|
24946 |
+
"epoch": 0.31541505491085325,
|
24947 |
+
"grad_norm": 0.08342524617910385,
|
24948 |
+
"learning_rate": 0.0009884668189488023,
|
24949 |
+
"loss": 1.5646,
|
24950 |
+
"step": 7094
|
24951 |
+
},
|
24952 |
+
{
|
24953 |
+
"epoch": 0.31550397936952557,
|
24954 |
+
"grad_norm": 0.08173046261072159,
|
24955 |
+
"learning_rate": 0.0009884592767807053,
|
24956 |
+
"loss": 1.5661,
|
24957 |
+
"step": 7096
|
24958 |
+
},
|
24959 |
+
{
|
24960 |
+
"epoch": 0.31559290382819793,
|
24961 |
+
"grad_norm": 0.08680406212806702,
|
24962 |
+
"learning_rate": 0.0009884517321760923,
|
24963 |
+
"loss": 1.5619,
|
24964 |
+
"step": 7098
|
24965 |
+
},
|
24966 |
+
{
|
24967 |
+
"epoch": 0.3156818282868703,
|
24968 |
+
"grad_norm": 0.08208227157592773,
|
24969 |
+
"learning_rate": 0.000988444185135001,
|
24970 |
+
"loss": 1.5639,
|
24971 |
+
"step": 7100
|
24972 |
+
},
|
24973 |
+
{
|
24974 |
+
"epoch": 0.31577075274554267,
|
24975 |
+
"grad_norm": 0.0871683731675148,
|
24976 |
+
"learning_rate": 0.0009884366356574692,
|
24977 |
+
"loss": 1.5657,
|
24978 |
+
"step": 7102
|
24979 |
+
},
|
24980 |
+
{
|
24981 |
+
"epoch": 0.31585967720421504,
|
24982 |
+
"grad_norm": 0.08410225808620453,
|
24983 |
+
"learning_rate": 0.0009884290837435341,
|
24984 |
+
"loss": 1.5616,
|
24985 |
+
"step": 7104
|
24986 |
+
},
|
24987 |
+
{
|
24988 |
+
"epoch": 0.31594860166288735,
|
24989 |
+
"grad_norm": 0.08295659720897675,
|
24990 |
+
"learning_rate": 0.0009884215293932339,
|
24991 |
+
"loss": 1.5653,
|
24992 |
+
"step": 7106
|
24993 |
+
},
|
24994 |
+
{
|
24995 |
+
"epoch": 0.3160375261215597,
|
24996 |
+
"grad_norm": 0.08682281523942947,
|
24997 |
+
"learning_rate": 0.000988413972606606,
|
24998 |
+
"loss": 1.5636,
|
24999 |
+
"step": 7108
|
25000 |
+
},
|
25001 |
+
{
|
25002 |
+
"epoch": 0.3161264505802321,
|
25003 |
+
"grad_norm": 0.08395165205001831,
|
25004 |
+
"learning_rate": 0.000988406413383688,
|
25005 |
+
"loss": 1.5628,
|
25006 |
+
"step": 7110
|
25007 |
+
},
|
25008 |
+
{
|
25009 |
+
"epoch": 0.31621537503890446,
|
25010 |
+
"grad_norm": 0.08330830186605453,
|
25011 |
+
"learning_rate": 0.0009883988517245177,
|
25012 |
+
"loss": 1.5585,
|
25013 |
+
"step": 7112
|
25014 |
+
},
|
25015 |
+
{
|
25016 |
+
"epoch": 0.3163042994975768,
|
25017 |
+
"grad_norm": 0.08235388249158859,
|
25018 |
+
"learning_rate": 0.0009883912876291329,
|
25019 |
+
"loss": 1.5712,
|
25020 |
+
"step": 7114
|
25021 |
+
},
|
25022 |
+
{
|
25023 |
+
"epoch": 0.3163932239562492,
|
25024 |
+
"grad_norm": 0.08317794650793076,
|
25025 |
+
"learning_rate": 0.0009883837210975714,
|
25026 |
+
"loss": 1.5658,
|
25027 |
+
"step": 7116
|
25028 |
+
},
|
25029 |
+
{
|
25030 |
+
"epoch": 0.3164821484149215,
|
25031 |
+
"grad_norm": 0.08337473124265671,
|
25032 |
+
"learning_rate": 0.0009883761521298708,
|
25033 |
+
"loss": 1.5615,
|
25034 |
+
"step": 7118
|
25035 |
+
},
|
25036 |
+
{
|
25037 |
+
"epoch": 0.3165710728735939,
|
25038 |
+
"grad_norm": 0.08434969931840897,
|
25039 |
+
"learning_rate": 0.000988368580726069,
|
25040 |
+
"loss": 1.5617,
|
25041 |
+
"step": 7120
|
25042 |
+
},
|
25043 |
+
{
|
25044 |
+
"epoch": 0.31665999733226624,
|
25045 |
+
"grad_norm": 0.08485856652259827,
|
25046 |
+
"learning_rate": 0.0009883610068862035,
|
25047 |
+
"loss": 1.5657,
|
25048 |
+
"step": 7122
|
25049 |
+
},
|
25050 |
+
{
|
25051 |
+
"epoch": 0.3167489217909386,
|
25052 |
+
"grad_norm": 0.08327708393335342,
|
25053 |
+
"learning_rate": 0.0009883534306103121,
|
25054 |
+
"loss": 1.5656,
|
25055 |
+
"step": 7124
|
25056 |
+
},
|
25057 |
+
{
|
25058 |
+
"epoch": 0.316837846249611,
|
25059 |
+
"grad_norm": 0.08332667499780655,
|
25060 |
+
"learning_rate": 0.000988345851898433,
|
25061 |
+
"loss": 1.5645,
|
25062 |
+
"step": 7126
|
25063 |
+
},
|
25064 |
+
{
|
25065 |
+
"epoch": 0.3169267707082833,
|
25066 |
+
"grad_norm": 0.08230622857809067,
|
25067 |
+
"learning_rate": 0.0009883382707506036,
|
25068 |
+
"loss": 1.5623,
|
25069 |
+
"step": 7128
|
25070 |
+
},
|
25071 |
+
{
|
25072 |
+
"epoch": 0.31701569516695566,
|
25073 |
+
"grad_norm": 0.08090732246637344,
|
25074 |
+
"learning_rate": 0.0009883306871668618,
|
25075 |
+
"loss": 1.5695,
|
25076 |
+
"step": 7130
|
25077 |
+
},
|
25078 |
+
{
|
25079 |
+
"epoch": 0.317104619625628,
|
25080 |
+
"grad_norm": 0.08214423060417175,
|
25081 |
+
"learning_rate": 0.0009883231011472455,
|
25082 |
+
"loss": 1.5633,
|
25083 |
+
"step": 7132
|
25084 |
+
},
|
25085 |
+
{
|
25086 |
+
"epoch": 0.3171935440843004,
|
25087 |
+
"grad_norm": 0.08126901835203171,
|
25088 |
+
"learning_rate": 0.0009883155126917925,
|
25089 |
+
"loss": 1.5634,
|
25090 |
+
"step": 7134
|
25091 |
+
},
|
25092 |
+
{
|
25093 |
+
"epoch": 0.31728246854297276,
|
25094 |
+
"grad_norm": 0.08316788822412491,
|
25095 |
+
"learning_rate": 0.0009883079218005407,
|
25096 |
+
"loss": 1.5626,
|
25097 |
+
"step": 7136
|
25098 |
+
},
|
25099 |
+
{
|
25100 |
+
"epoch": 0.31737139300164513,
|
25101 |
+
"grad_norm": 0.0886952131986618,
|
25102 |
+
"learning_rate": 0.000988300328473528,
|
25103 |
+
"loss": 1.5624,
|
25104 |
+
"step": 7138
|
25105 |
+
},
|
25106 |
+
{
|
25107 |
+
"epoch": 0.31746031746031744,
|
25108 |
+
"grad_norm": 0.0825994685292244,
|
25109 |
+
"learning_rate": 0.0009882927327107922,
|
25110 |
+
"loss": 1.5671,
|
25111 |
+
"step": 7140
|
25112 |
+
},
|
25113 |
+
{
|
25114 |
+
"epoch": 0.3175492419189898,
|
25115 |
+
"grad_norm": 0.09127391129732132,
|
25116 |
+
"learning_rate": 0.000988285134512371,
|
25117 |
+
"loss": 1.5592,
|
25118 |
+
"step": 7142
|
25119 |
+
},
|
25120 |
+
{
|
25121 |
+
"epoch": 0.3176381663776622,
|
25122 |
+
"grad_norm": 0.08798851072788239,
|
25123 |
+
"learning_rate": 0.0009882775338783028,
|
25124 |
+
"loss": 1.5618,
|
25125 |
+
"step": 7144
|
25126 |
+
},
|
25127 |
+
{
|
25128 |
+
"epoch": 0.31772709083633455,
|
25129 |
+
"grad_norm": 0.09108838438987732,
|
25130 |
+
"learning_rate": 0.000988269930808625,
|
25131 |
+
"loss": 1.5644,
|
25132 |
+
"step": 7146
|
25133 |
+
},
|
25134 |
+
{
|
25135 |
+
"epoch": 0.3178160152950069,
|
25136 |
+
"grad_norm": 0.08250788599252701,
|
25137 |
+
"learning_rate": 0.0009882623253033758,
|
25138 |
+
"loss": 1.5662,
|
25139 |
+
"step": 7148
|
25140 |
+
},
|
25141 |
+
{
|
25142 |
+
"epoch": 0.3179049397536792,
|
25143 |
+
"grad_norm": 0.08027481287717819,
|
25144 |
+
"learning_rate": 0.000988254717362593,
|
25145 |
+
"loss": 1.5601,
|
25146 |
+
"step": 7150
|
25147 |
+
},
|
25148 |
+
{
|
25149 |
+
"epoch": 0.3179938642123516,
|
25150 |
+
"grad_norm": 0.08304357528686523,
|
25151 |
+
"learning_rate": 0.0009882471069863146,
|
25152 |
+
"loss": 1.5578,
|
25153 |
+
"step": 7152
|
25154 |
+
},
|
25155 |
+
{
|
25156 |
+
"epoch": 0.31808278867102396,
|
25157 |
+
"grad_norm": 0.08454793691635132,
|
25158 |
+
"learning_rate": 0.0009882394941745788,
|
25159 |
+
"loss": 1.5567,
|
25160 |
+
"step": 7154
|
25161 |
+
},
|
25162 |
+
{
|
25163 |
+
"epoch": 0.31817171312969633,
|
25164 |
+
"grad_norm": 0.08314481377601624,
|
25165 |
+
"learning_rate": 0.0009882318789274233,
|
25166 |
+
"loss": 1.5692,
|
25167 |
+
"step": 7156
|
25168 |
+
},
|
25169 |
+
{
|
25170 |
+
"epoch": 0.3182606375883687,
|
25171 |
+
"grad_norm": 0.08372924476861954,
|
25172 |
+
"learning_rate": 0.000988224261244886,
|
25173 |
+
"loss": 1.5597,
|
25174 |
+
"step": 7158
|
25175 |
+
},
|
25176 |
+
{
|
25177 |
+
"epoch": 0.318349562047041,
|
25178 |
+
"grad_norm": 0.08202824741601944,
|
25179 |
+
"learning_rate": 0.000988216641127005,
|
25180 |
+
"loss": 1.5669,
|
25181 |
+
"step": 7160
|
25182 |
+
},
|
25183 |
+
{
|
25184 |
+
"epoch": 0.3184384865057134,
|
25185 |
+
"grad_norm": 0.08950933814048767,
|
25186 |
+
"learning_rate": 0.0009882090185738186,
|
25187 |
+
"loss": 1.5682,
|
25188 |
+
"step": 7162
|
25189 |
+
},
|
25190 |
+
{
|
25191 |
+
"epoch": 0.31852741096438575,
|
25192 |
+
"grad_norm": 0.08493774384260178,
|
25193 |
+
"learning_rate": 0.0009882013935853644,
|
25194 |
+
"loss": 1.5685,
|
25195 |
+
"step": 7164
|
25196 |
+
},
|
25197 |
+
{
|
25198 |
+
"epoch": 0.3186163354230581,
|
25199 |
+
"grad_norm": 0.08362516015768051,
|
25200 |
+
"learning_rate": 0.0009881937661616806,
|
25201 |
+
"loss": 1.5685,
|
25202 |
+
"step": 7166
|
25203 |
+
},
|
25204 |
+
{
|
25205 |
+
"epoch": 0.3187052598817305,
|
25206 |
+
"grad_norm": 0.08802633732557297,
|
25207 |
+
"learning_rate": 0.0009881861363028053,
|
25208 |
+
"loss": 1.5684,
|
25209 |
+
"step": 7168
|
25210 |
+
},
|
25211 |
+
{
|
25212 |
+
"epoch": 0.31879418434040285,
|
25213 |
+
"grad_norm": 0.08594125509262085,
|
25214 |
+
"learning_rate": 0.0009881785040087765,
|
25215 |
+
"loss": 1.5581,
|
25216 |
+
"step": 7170
|
25217 |
+
},
|
25218 |
+
{
|
25219 |
+
"epoch": 0.31888310879907517,
|
25220 |
+
"grad_norm": 0.08420670032501221,
|
25221 |
+
"learning_rate": 0.0009881708692796325,
|
25222 |
+
"loss": 1.5626,
|
25223 |
+
"step": 7172
|
25224 |
+
},
|
25225 |
+
{
|
25226 |
+
"epoch": 0.31897203325774753,
|
25227 |
+
"grad_norm": 0.08396308124065399,
|
25228 |
+
"learning_rate": 0.000988163232115411,
|
25229 |
+
"loss": 1.5624,
|
25230 |
+
"step": 7174
|
25231 |
+
},
|
25232 |
+
{
|
25233 |
+
"epoch": 0.3190609577164199,
|
25234 |
+
"grad_norm": 0.08624763786792755,
|
25235 |
+
"learning_rate": 0.00098815559251615,
|
25236 |
+
"loss": 1.5667,
|
25237 |
+
"step": 7176
|
25238 |
+
},
|
25239 |
+
{
|
25240 |
+
"epoch": 0.31914988217509227,
|
25241 |
+
"grad_norm": 0.0823800191283226,
|
25242 |
+
"learning_rate": 0.0009881479504818883,
|
25243 |
+
"loss": 1.5654,
|
25244 |
+
"step": 7178
|
25245 |
+
},
|
25246 |
+
{
|
25247 |
+
"epoch": 0.31923880663376464,
|
25248 |
+
"grad_norm": 0.08368480205535889,
|
25249 |
+
"learning_rate": 0.0009881403060126635,
|
25250 |
+
"loss": 1.5673,
|
25251 |
+
"step": 7180
|
25252 |
+
},
|
25253 |
+
{
|
25254 |
+
"epoch": 0.31932773109243695,
|
25255 |
+
"grad_norm": 0.08373897522687912,
|
25256 |
+
"learning_rate": 0.0009881326591085135,
|
25257 |
+
"loss": 1.565,
|
25258 |
+
"step": 7182
|
25259 |
+
},
|
25260 |
+
{
|
25261 |
+
"epoch": 0.3194166555511093,
|
25262 |
+
"grad_norm": 0.08619046956300735,
|
25263 |
+
"learning_rate": 0.0009881250097694772,
|
25264 |
+
"loss": 1.5597,
|
25265 |
+
"step": 7184
|
25266 |
+
},
|
25267 |
+
{
|
25268 |
+
"epoch": 0.3195055800097817,
|
25269 |
+
"grad_norm": 0.08758384734392166,
|
25270 |
+
"learning_rate": 0.000988117357995592,
|
25271 |
+
"loss": 1.5618,
|
25272 |
+
"step": 7186
|
25273 |
+
},
|
25274 |
+
{
|
25275 |
+
"epoch": 0.31959450446845405,
|
25276 |
+
"grad_norm": 0.08243545889854431,
|
25277 |
+
"learning_rate": 0.0009881097037868966,
|
25278 |
+
"loss": 1.5549,
|
25279 |
+
"step": 7188
|
25280 |
+
},
|
25281 |
+
{
|
25282 |
+
"epoch": 0.3196834289271264,
|
25283 |
+
"grad_norm": 0.07898540049791336,
|
25284 |
+
"learning_rate": 0.000988102047143429,
|
25285 |
+
"loss": 1.5597,
|
25286 |
+
"step": 7190
|
25287 |
+
},
|
25288 |
+
{
|
25289 |
+
"epoch": 0.3197723533857988,
|
25290 |
+
"grad_norm": 0.08437183499336243,
|
25291 |
+
"learning_rate": 0.0009880943880652274,
|
25292 |
+
"loss": 1.5637,
|
25293 |
+
"step": 7192
|
25294 |
+
},
|
25295 |
+
{
|
25296 |
+
"epoch": 0.3198612778444711,
|
25297 |
+
"grad_norm": 0.08325666189193726,
|
25298 |
+
"learning_rate": 0.0009880867265523296,
|
25299 |
+
"loss": 1.5613,
|
25300 |
+
"step": 7194
|
25301 |
+
},
|
25302 |
+
{
|
25303 |
+
"epoch": 0.31995020230314347,
|
25304 |
+
"grad_norm": 0.08307388424873352,
|
25305 |
+
"learning_rate": 0.0009880790626047747,
|
25306 |
+
"loss": 1.5627,
|
25307 |
+
"step": 7196
|
25308 |
+
},
|
25309 |
+
{
|
25310 |
+
"epoch": 0.32003912676181584,
|
25311 |
+
"grad_norm": 0.08156055212020874,
|
25312 |
+
"learning_rate": 0.0009880713962226,
|
25313 |
+
"loss": 1.5666,
|
25314 |
+
"step": 7198
|
25315 |
+
},
|
25316 |
+
{
|
25317 |
+
"epoch": 0.3201280512204882,
|
25318 |
+
"grad_norm": 0.08196540176868439,
|
25319 |
+
"learning_rate": 0.0009880637274058443,
|
25320 |
+
"loss": 1.5627,
|
25321 |
+
"step": 7200
|
25322 |
+
},
|
25323 |
+
{
|
25324 |
+
"epoch": 0.3202169756791606,
|
25325 |
+
"grad_norm": 0.08538369089365005,
|
25326 |
+
"learning_rate": 0.0009880560561545458,
|
25327 |
+
"loss": 1.5644,
|
25328 |
+
"step": 7202
|
25329 |
+
},
|
25330 |
+
{
|
25331 |
+
"epoch": 0.3203059001378329,
|
25332 |
+
"grad_norm": 0.08306334167718887,
|
25333 |
+
"learning_rate": 0.0009880483824687427,
|
25334 |
+
"loss": 1.5582,
|
25335 |
+
"step": 7204
|
25336 |
+
},
|
25337 |
+
{
|
25338 |
+
"epoch": 0.32039482459650526,
|
25339 |
+
"grad_norm": 0.08345794677734375,
|
25340 |
+
"learning_rate": 0.0009880407063484731,
|
25341 |
+
"loss": 1.5538,
|
25342 |
+
"step": 7206
|
25343 |
+
},
|
25344 |
+
{
|
25345 |
+
"epoch": 0.3204837490551776,
|
25346 |
+
"grad_norm": 0.08639951795339584,
|
25347 |
+
"learning_rate": 0.0009880330277937757,
|
25348 |
+
"loss": 1.5601,
|
25349 |
+
"step": 7208
|
25350 |
+
},
|
25351 |
+
{
|
25352 |
+
"epoch": 0.32057267351385,
|
25353 |
+
"grad_norm": 0.08315160125494003,
|
25354 |
+
"learning_rate": 0.0009880253468046883,
|
25355 |
+
"loss": 1.5591,
|
25356 |
+
"step": 7210
|
25357 |
+
},
|
25358 |
+
{
|
25359 |
+
"epoch": 0.32066159797252236,
|
25360 |
+
"grad_norm": 0.0834464579820633,
|
25361 |
+
"learning_rate": 0.0009880176633812493,
|
25362 |
+
"loss": 1.5663,
|
25363 |
+
"step": 7212
|
25364 |
+
},
|
25365 |
+
{
|
25366 |
+
"epoch": 0.3207505224311947,
|
25367 |
+
"grad_norm": 0.08205483853816986,
|
25368 |
+
"learning_rate": 0.0009880099775234977,
|
25369 |
+
"loss": 1.5662,
|
25370 |
+
"step": 7214
|
25371 |
+
},
|
25372 |
+
{
|
25373 |
+
"epoch": 0.32083944688986704,
|
25374 |
+
"grad_norm": 0.08744161576032639,
|
25375 |
+
"learning_rate": 0.000988002289231471,
|
25376 |
+
"loss": 1.5548,
|
25377 |
+
"step": 7216
|
25378 |
+
},
|
25379 |
+
{
|
25380 |
+
"epoch": 0.3209283713485394,
|
25381 |
+
"grad_norm": 0.08676169067621231,
|
25382 |
+
"learning_rate": 0.000987994598505208,
|
25383 |
+
"loss": 1.5595,
|
25384 |
+
"step": 7218
|
25385 |
+
},
|
25386 |
+
{
|
25387 |
+
"epoch": 0.3210172958072118,
|
25388 |
+
"grad_norm": 0.08752325177192688,
|
25389 |
+
"learning_rate": 0.0009879869053447468,
|
25390 |
+
"loss": 1.5624,
|
25391 |
+
"step": 7220
|
25392 |
+
},
|
25393 |
+
{
|
25394 |
+
"epoch": 0.32110622026588415,
|
25395 |
+
"grad_norm": 0.08283118903636932,
|
25396 |
+
"learning_rate": 0.0009879792097501258,
|
25397 |
+
"loss": 1.5675,
|
25398 |
+
"step": 7222
|
25399 |
+
},
|
25400 |
+
{
|
25401 |
+
"epoch": 0.3211951447245565,
|
25402 |
+
"grad_norm": 0.08117381483316422,
|
25403 |
+
"learning_rate": 0.0009879715117213838,
|
25404 |
+
"loss": 1.5604,
|
25405 |
+
"step": 7224
|
25406 |
+
},
|
25407 |
+
{
|
25408 |
+
"epoch": 0.3212840691832288,
|
25409 |
+
"grad_norm": 0.08399328589439392,
|
25410 |
+
"learning_rate": 0.0009879638112585587,
|
25411 |
+
"loss": 1.5532,
|
25412 |
+
"step": 7226
|
25413 |
+
},
|
25414 |
+
{
|
25415 |
+
"epoch": 0.3213729936419012,
|
25416 |
+
"grad_norm": 0.0844864770770073,
|
25417 |
+
"learning_rate": 0.0009879561083616893,
|
25418 |
+
"loss": 1.5662,
|
25419 |
+
"step": 7228
|
25420 |
+
},
|
25421 |
+
{
|
25422 |
+
"epoch": 0.32146191810057356,
|
25423 |
+
"grad_norm": 0.08030157536268234,
|
25424 |
+
"learning_rate": 0.0009879484030308136,
|
25425 |
+
"loss": 1.5637,
|
25426 |
+
"step": 7230
|
25427 |
+
},
|
25428 |
+
{
|
25429 |
+
"epoch": 0.32155084255924593,
|
25430 |
+
"grad_norm": 0.08214158564805984,
|
25431 |
+
"learning_rate": 0.0009879406952659705,
|
25432 |
+
"loss": 1.5642,
|
25433 |
+
"step": 7232
|
25434 |
+
},
|
25435 |
+
{
|
25436 |
+
"epoch": 0.3216397670179183,
|
25437 |
+
"grad_norm": 0.08249089121818542,
|
25438 |
+
"learning_rate": 0.0009879329850671981,
|
25439 |
+
"loss": 1.5614,
|
25440 |
+
"step": 7234
|
25441 |
+
},
|
25442 |
+
{
|
25443 |
+
"epoch": 0.3217286914765906,
|
25444 |
+
"grad_norm": 0.0858563482761383,
|
25445 |
+
"learning_rate": 0.000987925272434535,
|
25446 |
+
"loss": 1.5587,
|
25447 |
+
"step": 7236
|
25448 |
+
},
|
25449 |
+
{
|
25450 |
+
"epoch": 0.321817615935263,
|
25451 |
+
"grad_norm": 0.08245126157999039,
|
25452 |
+
"learning_rate": 0.0009879175573680196,
|
25453 |
+
"loss": 1.5641,
|
25454 |
+
"step": 7238
|
25455 |
+
},
|
25456 |
+
{
|
25457 |
+
"epoch": 0.32190654039393535,
|
25458 |
+
"grad_norm": 0.0817423164844513,
|
25459 |
+
"learning_rate": 0.0009879098398676905,
|
25460 |
+
"loss": 1.5613,
|
25461 |
+
"step": 7240
|
25462 |
+
},
|
25463 |
+
{
|
25464 |
+
"epoch": 0.3219954648526077,
|
25465 |
+
"grad_norm": 0.08229317516088486,
|
25466 |
+
"learning_rate": 0.0009879021199335862,
|
25467 |
+
"loss": 1.558,
|
25468 |
+
"step": 7242
|
25469 |
+
},
|
25470 |
+
{
|
25471 |
+
"epoch": 0.3220843893112801,
|
25472 |
+
"grad_norm": 0.0869976207613945,
|
25473 |
+
"learning_rate": 0.000987894397565745,
|
25474 |
+
"loss": 1.556,
|
25475 |
+
"step": 7244
|
25476 |
+
},
|
25477 |
+
{
|
25478 |
+
"epoch": 0.32217331376995245,
|
25479 |
+
"grad_norm": 0.08708877116441727,
|
25480 |
+
"learning_rate": 0.0009878866727642056,
|
25481 |
+
"loss": 1.5642,
|
25482 |
+
"step": 7246
|
25483 |
+
},
|
25484 |
+
{
|
25485 |
+
"epoch": 0.32226223822862476,
|
25486 |
+
"grad_norm": 0.08197760581970215,
|
25487 |
+
"learning_rate": 0.0009878789455290064,
|
25488 |
+
"loss": 1.5577,
|
25489 |
+
"step": 7248
|
25490 |
+
},
|
25491 |
+
{
|
25492 |
+
"epoch": 0.32235116268729713,
|
25493 |
+
"grad_norm": 0.08504566550254822,
|
25494 |
+
"learning_rate": 0.0009878712158601863,
|
25495 |
+
"loss": 1.5633,
|
25496 |
+
"step": 7250
|
25497 |
+
},
|
25498 |
+
{
|
25499 |
+
"epoch": 0.3224400871459695,
|
25500 |
+
"grad_norm": 0.0837409570813179,
|
25501 |
+
"learning_rate": 0.0009878634837577832,
|
25502 |
+
"loss": 1.5607,
|
25503 |
+
"step": 7252
|
25504 |
+
},
|
25505 |
+
{
|
25506 |
+
"epoch": 0.32252901160464187,
|
25507 |
+
"grad_norm": 0.08278852701187134,
|
25508 |
+
"learning_rate": 0.0009878557492218364,
|
25509 |
+
"loss": 1.5618,
|
25510 |
+
"step": 7254
|
25511 |
+
},
|
25512 |
+
{
|
25513 |
+
"epoch": 0.32261793606331424,
|
25514 |
+
"grad_norm": 0.07882179319858551,
|
25515 |
+
"learning_rate": 0.0009878480122523842,
|
25516 |
+
"loss": 1.5579,
|
25517 |
+
"step": 7256
|
25518 |
+
},
|
25519 |
+
{
|
25520 |
+
"epoch": 0.32270686052198655,
|
25521 |
+
"grad_norm": 0.08126863837242126,
|
25522 |
+
"learning_rate": 0.0009878402728494648,
|
25523 |
+
"loss": 1.5578,
|
25524 |
+
"step": 7258
|
25525 |
+
},
|
25526 |
+
{
|
25527 |
+
"epoch": 0.3227957849806589,
|
25528 |
+
"grad_norm": 0.08077924698591232,
|
25529 |
+
"learning_rate": 0.0009878325310131173,
|
25530 |
+
"loss": 1.5598,
|
25531 |
+
"step": 7260
|
25532 |
+
},
|
25533 |
+
{
|
25534 |
+
"epoch": 0.3228847094393313,
|
25535 |
+
"grad_norm": 0.08375802636146545,
|
25536 |
+
"learning_rate": 0.0009878247867433803,
|
25537 |
+
"loss": 1.5595,
|
25538 |
+
"step": 7262
|
25539 |
+
},
|
25540 |
+
{
|
25541 |
+
"epoch": 0.32297363389800365,
|
25542 |
+
"grad_norm": 0.08182679116725922,
|
25543 |
+
"learning_rate": 0.000987817040040292,
|
25544 |
+
"loss": 1.5558,
|
25545 |
+
"step": 7264
|
25546 |
+
},
|
25547 |
+
{
|
25548 |
+
"epoch": 0.323062558356676,
|
25549 |
+
"grad_norm": 0.08209866285324097,
|
25550 |
+
"learning_rate": 0.0009878092909038916,
|
25551 |
+
"loss": 1.5582,
|
25552 |
+
"step": 7266
|
25553 |
+
},
|
25554 |
+
{
|
25555 |
+
"epoch": 0.32315148281534833,
|
25556 |
+
"grad_norm": 0.08449520170688629,
|
25557 |
+
"learning_rate": 0.0009878015393342172,
|
25558 |
+
"loss": 1.56,
|
25559 |
+
"step": 7268
|
25560 |
+
},
|
25561 |
+
{
|
25562 |
+
"epoch": 0.3232404072740207,
|
25563 |
+
"grad_norm": 0.07824712991714478,
|
25564 |
+
"learning_rate": 0.000987793785331308,
|
25565 |
+
"loss": 1.5512,
|
25566 |
+
"step": 7270
|
25567 |
+
},
|
25568 |
+
{
|
25569 |
+
"epoch": 0.32332933173269307,
|
25570 |
+
"grad_norm": 0.0843387246131897,
|
25571 |
+
"learning_rate": 0.0009877860288952022,
|
25572 |
+
"loss": 1.5599,
|
25573 |
+
"step": 7272
|
25574 |
+
},
|
25575 |
+
{
|
25576 |
+
"epoch": 0.32341825619136544,
|
25577 |
+
"grad_norm": 0.08296310156583786,
|
25578 |
+
"learning_rate": 0.000987778270025939,
|
25579 |
+
"loss": 1.5652,
|
25580 |
+
"step": 7274
|
25581 |
+
},
|
25582 |
+
{
|
25583 |
+
"epoch": 0.3235071806500378,
|
25584 |
+
"grad_norm": 0.08399280905723572,
|
25585 |
+
"learning_rate": 0.0009877705087235566,
|
25586 |
+
"loss": 1.5624,
|
25587 |
+
"step": 7276
|
25588 |
+
},
|
25589 |
+
{
|
25590 |
+
"epoch": 0.3235961051087102,
|
25591 |
+
"grad_norm": 0.08151458948850632,
|
25592 |
+
"learning_rate": 0.000987762744988094,
|
25593 |
+
"loss": 1.5661,
|
25594 |
+
"step": 7278
|
25595 |
+
},
|
25596 |
+
{
|
25597 |
+
"epoch": 0.3236850295673825,
|
25598 |
+
"grad_norm": 0.08649469912052155,
|
25599 |
+
"learning_rate": 0.00098775497881959,
|
25600 |
+
"loss": 1.5541,
|
25601 |
+
"step": 7280
|
25602 |
+
},
|
25603 |
+
{
|
25604 |
+
"epoch": 0.32377395402605486,
|
25605 |
+
"grad_norm": 0.0848914235830307,
|
25606 |
+
"learning_rate": 0.0009877472102180831,
|
25607 |
+
"loss": 1.5622,
|
25608 |
+
"step": 7282
|
25609 |
+
},
|
25610 |
+
{
|
25611 |
+
"epoch": 0.3238628784847272,
|
25612 |
+
"grad_norm": 0.08599822223186493,
|
25613 |
+
"learning_rate": 0.0009877394391836123,
|
25614 |
+
"loss": 1.5586,
|
25615 |
+
"step": 7284
|
25616 |
+
},
|
25617 |
+
{
|
25618 |
+
"epoch": 0.3239518029433996,
|
25619 |
+
"grad_norm": 0.09074999392032623,
|
25620 |
+
"learning_rate": 0.000987731665716216,
|
25621 |
+
"loss": 1.5584,
|
25622 |
+
"step": 7286
|
25623 |
+
},
|
25624 |
+
{
|
25625 |
+
"epoch": 0.32404072740207196,
|
25626 |
+
"grad_norm": 0.08198599517345428,
|
25627 |
+
"learning_rate": 0.0009877238898159332,
|
25628 |
+
"loss": 1.5623,
|
25629 |
+
"step": 7288
|
25630 |
+
},
|
25631 |
+
{
|
25632 |
+
"epoch": 0.3241296518607443,
|
25633 |
+
"grad_norm": 0.08142637461423874,
|
25634 |
+
"learning_rate": 0.0009877161114828026,
|
25635 |
+
"loss": 1.5591,
|
25636 |
+
"step": 7290
|
25637 |
+
},
|
25638 |
+
{
|
25639 |
+
"epoch": 0.32421857631941664,
|
25640 |
+
"grad_norm": 0.08540283888578415,
|
25641 |
+
"learning_rate": 0.0009877083307168633,
|
25642 |
+
"loss": 1.562,
|
25643 |
+
"step": 7292
|
25644 |
+
},
|
25645 |
+
{
|
25646 |
+
"epoch": 0.324307500778089,
|
25647 |
+
"grad_norm": 0.08285167068243027,
|
25648 |
+
"learning_rate": 0.0009877005475181539,
|
25649 |
+
"loss": 1.5581,
|
25650 |
+
"step": 7294
|
25651 |
+
},
|
25652 |
+
{
|
25653 |
+
"epoch": 0.3243964252367614,
|
25654 |
+
"grad_norm": 0.08604173362255096,
|
25655 |
+
"learning_rate": 0.000987692761886713,
|
25656 |
+
"loss": 1.5646,
|
25657 |
+
"step": 7296
|
25658 |
+
},
|
25659 |
+
{
|
25660 |
+
"epoch": 0.32448534969543374,
|
25661 |
+
"grad_norm": 0.08079639822244644,
|
25662 |
+
"learning_rate": 0.0009876849738225798,
|
25663 |
+
"loss": 1.5611,
|
25664 |
+
"step": 7298
|
25665 |
+
},
|
25666 |
+
{
|
25667 |
+
"epoch": 0.3245742741541061,
|
25668 |
+
"grad_norm": 0.08307557553052902,
|
25669 |
+
"learning_rate": 0.000987677183325793,
|
25670 |
+
"loss": 1.5594,
|
25671 |
+
"step": 7300
|
25672 |
+
},
|
25673 |
+
{
|
25674 |
+
"epoch": 0.3246631986127784,
|
25675 |
+
"grad_norm": 0.08180294185876846,
|
25676 |
+
"learning_rate": 0.0009876693903963913,
|
25677 |
+
"loss": 1.5631,
|
25678 |
+
"step": 7302
|
25679 |
+
},
|
25680 |
+
{
|
25681 |
+
"epoch": 0.3247521230714508,
|
25682 |
+
"grad_norm": 0.08266652375459671,
|
25683 |
+
"learning_rate": 0.000987661595034414,
|
25684 |
+
"loss": 1.559,
|
25685 |
+
"step": 7304
|
25686 |
+
},
|
25687 |
+
{
|
25688 |
+
"epoch": 0.32484104753012316,
|
25689 |
+
"grad_norm": 0.07898559421300888,
|
25690 |
+
"learning_rate": 0.0009876537972398995,
|
25691 |
+
"loss": 1.5604,
|
25692 |
+
"step": 7306
|
25693 |
+
},
|
25694 |
+
{
|
25695 |
+
"epoch": 0.32492997198879553,
|
25696 |
+
"grad_norm": 0.0870475322008133,
|
25697 |
+
"learning_rate": 0.000987645997012887,
|
25698 |
+
"loss": 1.5617,
|
25699 |
+
"step": 7308
|
25700 |
+
},
|
25701 |
+
{
|
25702 |
+
"epoch": 0.3250188964474679,
|
25703 |
+
"grad_norm": 0.08088637888431549,
|
25704 |
+
"learning_rate": 0.0009876381943534154,
|
25705 |
+
"loss": 1.5604,
|
25706 |
+
"step": 7310
|
25707 |
+
},
|
25708 |
+
{
|
25709 |
+
"epoch": 0.3251078209061402,
|
25710 |
+
"grad_norm": 0.08533639460802078,
|
25711 |
+
"learning_rate": 0.0009876303892615234,
|
25712 |
+
"loss": 1.5555,
|
25713 |
+
"step": 7312
|
25714 |
+
},
|
25715 |
+
{
|
25716 |
+
"epoch": 0.3251967453648126,
|
25717 |
+
"grad_norm": 0.08240604400634766,
|
25718 |
+
"learning_rate": 0.0009876225817372502,
|
25719 |
+
"loss": 1.5609,
|
25720 |
+
"step": 7314
|
25721 |
+
},
|
25722 |
+
{
|
25723 |
+
"epoch": 0.32528566982348495,
|
25724 |
+
"grad_norm": 0.08196206390857697,
|
25725 |
+
"learning_rate": 0.0009876147717806346,
|
25726 |
+
"loss": 1.5579,
|
25727 |
+
"step": 7316
|
25728 |
+
},
|
25729 |
+
{
|
25730 |
+
"epoch": 0.3253745942821573,
|
25731 |
+
"grad_norm": 0.08041080087423325,
|
25732 |
+
"learning_rate": 0.0009876069593917154,
|
25733 |
+
"loss": 1.5621,
|
25734 |
+
"step": 7318
|
25735 |
+
},
|
25736 |
+
{
|
25737 |
+
"epoch": 0.3254635187408297,
|
25738 |
+
"grad_norm": 0.07944405823945999,
|
25739 |
+
"learning_rate": 0.000987599144570532,
|
25740 |
+
"loss": 1.5583,
|
25741 |
+
"step": 7320
|
25742 |
+
},
|
25743 |
+
{
|
25744 |
+
"epoch": 0.32555244319950205,
|
25745 |
+
"grad_norm": 0.0833417996764183,
|
25746 |
+
"learning_rate": 0.000987591327317123,
|
25747 |
+
"loss": 1.5588,
|
25748 |
+
"step": 7322
|
25749 |
+
},
|
25750 |
+
{
|
25751 |
+
"epoch": 0.32564136765817436,
|
25752 |
+
"grad_norm": 0.08267171680927277,
|
25753 |
+
"learning_rate": 0.0009875835076315273,
|
25754 |
+
"loss": 1.5576,
|
25755 |
+
"step": 7324
|
25756 |
+
},
|
25757 |
+
{
|
25758 |
+
"epoch": 0.32573029211684673,
|
25759 |
+
"grad_norm": 0.08149762451648712,
|
25760 |
+
"learning_rate": 0.0009875756855137845,
|
25761 |
+
"loss": 1.5535,
|
25762 |
+
"step": 7326
|
25763 |
+
},
|
25764 |
+
{
|
25765 |
+
"epoch": 0.3258192165755191,
|
25766 |
+
"grad_norm": 0.0837164968252182,
|
25767 |
+
"learning_rate": 0.0009875678609639331,
|
25768 |
+
"loss": 1.5554,
|
25769 |
+
"step": 7328
|
25770 |
+
},
|
25771 |
+
{
|
25772 |
+
"epoch": 0.32590814103419147,
|
25773 |
+
"grad_norm": 0.08050676435232162,
|
25774 |
+
"learning_rate": 0.0009875600339820123,
|
25775 |
+
"loss": 1.561,
|
25776 |
+
"step": 7330
|
25777 |
+
},
|
25778 |
+
{
|
25779 |
+
"epoch": 0.32599706549286384,
|
25780 |
+
"grad_norm": 0.08078650385141373,
|
25781 |
+
"learning_rate": 0.000987552204568061,
|
25782 |
+
"loss": 1.5543,
|
25783 |
+
"step": 7332
|
25784 |
+
},
|
25785 |
+
{
|
25786 |
+
"epoch": 0.32608598995153615,
|
25787 |
+
"grad_norm": 0.08506888151168823,
|
25788 |
+
"learning_rate": 0.0009875443727221185,
|
25789 |
+
"loss": 1.5606,
|
25790 |
+
"step": 7334
|
25791 |
+
},
|
25792 |
+
{
|
25793 |
+
"epoch": 0.3261749144102085,
|
25794 |
+
"grad_norm": 0.08433322608470917,
|
25795 |
+
"learning_rate": 0.0009875365384442237,
|
25796 |
+
"loss": 1.5563,
|
25797 |
+
"step": 7336
|
25798 |
+
},
|
25799 |
+
{
|
25800 |
+
"epoch": 0.3262638388688809,
|
25801 |
+
"grad_norm": 0.08262262493371964,
|
25802 |
+
"learning_rate": 0.000987528701734416,
|
25803 |
+
"loss": 1.565,
|
25804 |
+
"step": 7338
|
25805 |
+
},
|
25806 |
+
{
|
25807 |
+
"epoch": 0.32635276332755325,
|
25808 |
+
"grad_norm": 0.08206149190664291,
|
25809 |
+
"learning_rate": 0.0009875208625927338,
|
25810 |
+
"loss": 1.5562,
|
25811 |
+
"step": 7340
|
25812 |
+
},
|
25813 |
+
{
|
25814 |
+
"epoch": 0.3264416877862256,
|
25815 |
+
"grad_norm": 0.08632387220859528,
|
25816 |
+
"learning_rate": 0.0009875130210192167,
|
25817 |
+
"loss": 1.5566,
|
25818 |
+
"step": 7342
|
25819 |
+
},
|
25820 |
+
{
|
25821 |
+
"epoch": 0.32653061224489793,
|
25822 |
+
"grad_norm": 0.08040108531713486,
|
25823 |
+
"learning_rate": 0.0009875051770139038,
|
25824 |
+
"loss": 1.5591,
|
25825 |
+
"step": 7344
|
25826 |
+
},
|
25827 |
+
{
|
25828 |
+
"epoch": 0.3266195367035703,
|
25829 |
+
"grad_norm": 0.08796191215515137,
|
25830 |
+
"learning_rate": 0.0009874973305768343,
|
25831 |
+
"loss": 1.5635,
|
25832 |
+
"step": 7346
|
25833 |
+
},
|
25834 |
+
{
|
25835 |
+
"epoch": 0.32670846116224267,
|
25836 |
+
"grad_norm": 0.08850188553333282,
|
25837 |
+
"learning_rate": 0.000987489481708047,
|
25838 |
+
"loss": 1.5547,
|
25839 |
+
"step": 7348
|
25840 |
+
},
|
25841 |
+
{
|
25842 |
+
"epoch": 0.32679738562091504,
|
25843 |
+
"grad_norm": 0.08623901754617691,
|
25844 |
+
"learning_rate": 0.000987481630407581,
|
25845 |
+
"loss": 1.5583,
|
25846 |
+
"step": 7350
|
25847 |
+
},
|
25848 |
+
{
|
25849 |
+
"epoch": 0.3268863100795874,
|
25850 |
+
"grad_norm": 0.08186235278844833,
|
25851 |
+
"learning_rate": 0.0009874737766754763,
|
25852 |
+
"loss": 1.5568,
|
25853 |
+
"step": 7352
|
25854 |
+
},
|
25855 |
+
{
|
25856 |
+
"epoch": 0.3269752345382598,
|
25857 |
+
"grad_norm": 0.08264327794313431,
|
25858 |
+
"learning_rate": 0.000987465920511771,
|
25859 |
+
"loss": 1.5589,
|
25860 |
+
"step": 7354
|
25861 |
+
},
|
25862 |
+
{
|
25863 |
+
"epoch": 0.3270641589969321,
|
25864 |
+
"grad_norm": 0.08318621665239334,
|
25865 |
+
"learning_rate": 0.000987458061916505,
|
25866 |
+
"loss": 1.5578,
|
25867 |
+
"step": 7356
|
25868 |
+
},
|
25869 |
+
{
|
25870 |
+
"epoch": 0.32715308345560445,
|
25871 |
+
"grad_norm": 0.08507546037435532,
|
25872 |
+
"learning_rate": 0.0009874502008897174,
|
25873 |
+
"loss": 1.5628,
|
25874 |
+
"step": 7358
|
25875 |
+
},
|
25876 |
+
{
|
25877 |
+
"epoch": 0.3272420079142768,
|
25878 |
+
"grad_norm": 0.08034215867519379,
|
25879 |
+
"learning_rate": 0.000987442337431447,
|
25880 |
+
"loss": 1.5597,
|
25881 |
+
"step": 7360
|
25882 |
+
},
|
25883 |
+
{
|
25884 |
+
"epoch": 0.3273309323729492,
|
25885 |
+
"grad_norm": 0.08105006068944931,
|
25886 |
+
"learning_rate": 0.0009874344715417333,
|
25887 |
+
"loss": 1.5562,
|
25888 |
+
"step": 7362
|
25889 |
+
},
|
25890 |
+
{
|
25891 |
+
"epoch": 0.32741985683162156,
|
25892 |
+
"grad_norm": 0.07907504588365555,
|
25893 |
+
"learning_rate": 0.0009874266032206156,
|
25894 |
+
"loss": 1.5649,
|
25895 |
+
"step": 7364
|
25896 |
+
},
|
25897 |
+
{
|
25898 |
+
"epoch": 0.32750878129029387,
|
25899 |
+
"grad_norm": 0.08351552486419678,
|
25900 |
+
"learning_rate": 0.0009874187324681331,
|
25901 |
+
"loss": 1.563,
|
25902 |
+
"step": 7366
|
25903 |
+
},
|
25904 |
+
{
|
25905 |
+
"epoch": 0.32759770574896624,
|
25906 |
+
"grad_norm": 0.07940589636564255,
|
25907 |
+
"learning_rate": 0.0009874108592843253,
|
25908 |
+
"loss": 1.5597,
|
25909 |
+
"step": 7368
|
25910 |
+
},
|
25911 |
+
{
|
25912 |
+
"epoch": 0.3276866302076386,
|
25913 |
+
"grad_norm": 0.0827498808503151,
|
25914 |
+
"learning_rate": 0.000987402983669231,
|
25915 |
+
"loss": 1.5544,
|
25916 |
+
"step": 7370
|
25917 |
+
},
|
25918 |
+
{
|
25919 |
+
"epoch": 0.327775554666311,
|
25920 |
+
"grad_norm": 0.08331042528152466,
|
25921 |
+
"learning_rate": 0.0009873951056228898,
|
25922 |
+
"loss": 1.56,
|
25923 |
+
"step": 7372
|
25924 |
+
},
|
25925 |
+
{
|
25926 |
+
"epoch": 0.32786447912498334,
|
25927 |
+
"grad_norm": 0.08180607855319977,
|
25928 |
+
"learning_rate": 0.000987387225145341,
|
25929 |
+
"loss": 1.5565,
|
25930 |
+
"step": 7374
|
25931 |
+
},
|
25932 |
+
{
|
25933 |
+
"epoch": 0.3279534035836557,
|
25934 |
+
"grad_norm": 0.08340569585561752,
|
25935 |
+
"learning_rate": 0.0009873793422366234,
|
25936 |
+
"loss": 1.5608,
|
25937 |
+
"step": 7376
|
25938 |
+
},
|
25939 |
+
{
|
25940 |
+
"epoch": 0.328042328042328,
|
25941 |
+
"grad_norm": 0.08175882697105408,
|
25942 |
+
"learning_rate": 0.000987371456896777,
|
25943 |
+
"loss": 1.5607,
|
25944 |
+
"step": 7378
|
25945 |
+
},
|
25946 |
+
{
|
25947 |
+
"epoch": 0.3281312525010004,
|
25948 |
+
"grad_norm": 0.0852338895201683,
|
25949 |
+
"learning_rate": 0.000987363569125841,
|
25950 |
+
"loss": 1.5584,
|
25951 |
+
"step": 7380
|
25952 |
+
},
|
25953 |
+
{
|
25954 |
+
"epoch": 0.32822017695967276,
|
25955 |
+
"grad_norm": 0.0846090242266655,
|
25956 |
+
"learning_rate": 0.0009873556789238545,
|
25957 |
+
"loss": 1.5586,
|
25958 |
+
"step": 7382
|
25959 |
+
},
|
25960 |
+
{
|
25961 |
+
"epoch": 0.32830910141834513,
|
25962 |
+
"grad_norm": 0.07967184484004974,
|
25963 |
+
"learning_rate": 0.000987347786290857,
|
25964 |
+
"loss": 1.5576,
|
25965 |
+
"step": 7384
|
25966 |
+
},
|
25967 |
+
{
|
25968 |
+
"epoch": 0.3283980258770175,
|
25969 |
+
"grad_norm": 0.08349616080522537,
|
25970 |
+
"learning_rate": 0.000987339891226888,
|
25971 |
+
"loss": 1.5628,
|
25972 |
+
"step": 7386
|
25973 |
+
},
|
25974 |
+
{
|
25975 |
+
"epoch": 0.3284869503356898,
|
25976 |
+
"grad_norm": 0.08276060223579407,
|
25977 |
+
"learning_rate": 0.0009873319937319864,
|
25978 |
+
"loss": 1.5597,
|
25979 |
+
"step": 7388
|
25980 |
+
},
|
25981 |
+
{
|
25982 |
+
"epoch": 0.3285758747943622,
|
25983 |
+
"grad_norm": 0.08043144643306732,
|
25984 |
+
"learning_rate": 0.0009873240938061921,
|
25985 |
+
"loss": 1.5537,
|
25986 |
+
"step": 7390
|
25987 |
+
},
|
25988 |
+
{
|
25989 |
+
"epoch": 0.32866479925303455,
|
25990 |
+
"grad_norm": 0.08394316583871841,
|
25991 |
+
"learning_rate": 0.0009873161914495444,
|
25992 |
+
"loss": 1.5573,
|
25993 |
+
"step": 7392
|
25994 |
+
},
|
25995 |
+
{
|
25996 |
+
"epoch": 0.3287537237117069,
|
25997 |
+
"grad_norm": 0.08426385372877121,
|
25998 |
+
"learning_rate": 0.0009873082866620824,
|
25999 |
+
"loss": 1.558,
|
26000 |
+
"step": 7394
|
26001 |
+
},
|
26002 |
+
{
|
26003 |
+
"epoch": 0.3288426481703793,
|
26004 |
+
"grad_norm": 0.08154802769422531,
|
26005 |
+
"learning_rate": 0.000987300379443846,
|
26006 |
+
"loss": 1.5572,
|
26007 |
+
"step": 7396
|
26008 |
+
},
|
26009 |
+
{
|
26010 |
+
"epoch": 0.3289315726290516,
|
26011 |
+
"grad_norm": 0.08731735497713089,
|
26012 |
+
"learning_rate": 0.0009872924697948745,
|
26013 |
+
"loss": 1.5567,
|
26014 |
+
"step": 7398
|
26015 |
+
},
|
26016 |
+
{
|
26017 |
+
"epoch": 0.32902049708772396,
|
26018 |
+
"grad_norm": 0.08999709039926529,
|
26019 |
+
"learning_rate": 0.000987284557715207,
|
26020 |
+
"loss": 1.556,
|
26021 |
+
"step": 7400
|
26022 |
+
},
|
26023 |
+
{
|
26024 |
+
"epoch": 0.32910942154639633,
|
26025 |
+
"grad_norm": 0.08462511003017426,
|
26026 |
+
"learning_rate": 0.0009872766432048835,
|
26027 |
+
"loss": 1.5585,
|
26028 |
+
"step": 7402
|
26029 |
+
},
|
26030 |
+
{
|
26031 |
+
"epoch": 0.3291983460050687,
|
26032 |
+
"grad_norm": 0.08666058629751205,
|
26033 |
+
"learning_rate": 0.000987268726263943,
|
26034 |
+
"loss": 1.5567,
|
26035 |
+
"step": 7404
|
26036 |
+
},
|
26037 |
+
{
|
26038 |
+
"epoch": 0.32928727046374107,
|
26039 |
+
"grad_norm": 0.08502695709466934,
|
26040 |
+
"learning_rate": 0.0009872608068924253,
|
26041 |
+
"loss": 1.5566,
|
26042 |
+
"step": 7406
|
26043 |
+
},
|
26044 |
+
{
|
26045 |
+
"epoch": 0.32937619492241343,
|
26046 |
+
"grad_norm": 0.08488810807466507,
|
26047 |
+
"learning_rate": 0.0009872528850903698,
|
26048 |
+
"loss": 1.5529,
|
26049 |
+
"step": 7408
|
26050 |
+
},
|
26051 |
+
{
|
26052 |
+
"epoch": 0.32946511938108575,
|
26053 |
+
"grad_norm": 0.08286993950605392,
|
26054 |
+
"learning_rate": 0.000987244960857816,
|
26055 |
+
"loss": 1.5639,
|
26056 |
+
"step": 7410
|
26057 |
+
},
|
26058 |
+
{
|
26059 |
+
"epoch": 0.3295540438397581,
|
26060 |
+
"grad_norm": 0.0867101177573204,
|
26061 |
+
"learning_rate": 0.0009872370341948036,
|
26062 |
+
"loss": 1.559,
|
26063 |
+
"step": 7412
|
26064 |
+
},
|
26065 |
+
{
|
26066 |
+
"epoch": 0.3296429682984305,
|
26067 |
+
"grad_norm": 0.0828615054488182,
|
26068 |
+
"learning_rate": 0.0009872291051013719,
|
26069 |
+
"loss": 1.5587,
|
26070 |
+
"step": 7414
|
26071 |
+
},
|
26072 |
+
{
|
26073 |
+
"epoch": 0.32973189275710285,
|
26074 |
+
"grad_norm": 0.08587847650051117,
|
26075 |
+
"learning_rate": 0.0009872211735775605,
|
26076 |
+
"loss": 1.5599,
|
26077 |
+
"step": 7416
|
26078 |
+
},
|
26079 |
+
{
|
26080 |
+
"epoch": 0.3298208172157752,
|
26081 |
+
"grad_norm": 0.08348828554153442,
|
26082 |
+
"learning_rate": 0.000987213239623409,
|
26083 |
+
"loss": 1.5556,
|
26084 |
+
"step": 7418
|
26085 |
+
},
|
26086 |
+
{
|
26087 |
+
"epoch": 0.32990974167444753,
|
26088 |
+
"grad_norm": 0.08008415997028351,
|
26089 |
+
"learning_rate": 0.0009872053032389573,
|
26090 |
+
"loss": 1.5642,
|
26091 |
+
"step": 7420
|
26092 |
+
},
|
26093 |
+
{
|
26094 |
+
"epoch": 0.3299986661331199,
|
26095 |
+
"grad_norm": 0.0780973806977272,
|
26096 |
+
"learning_rate": 0.0009871973644242442,
|
26097 |
+
"loss": 1.5563,
|
26098 |
+
"step": 7422
|
26099 |
+
},
|
26100 |
+
{
|
26101 |
+
"epoch": 0.33008759059179227,
|
26102 |
+
"grad_norm": 0.08216983824968338,
|
26103 |
+
"learning_rate": 0.00098718942317931,
|
26104 |
+
"loss": 1.5608,
|
26105 |
+
"step": 7424
|
26106 |
+
},
|
26107 |
+
{
|
26108 |
+
"epoch": 0.33017651505046464,
|
26109 |
+
"grad_norm": 0.08794374763965607,
|
26110 |
+
"learning_rate": 0.0009871814795041941,
|
26111 |
+
"loss": 1.5622,
|
26112 |
+
"step": 7426
|
26113 |
+
},
|
26114 |
+
{
|
26115 |
+
"epoch": 0.330265439509137,
|
26116 |
+
"grad_norm": 0.09090631455183029,
|
26117 |
+
"learning_rate": 0.0009871735333989362,
|
26118 |
+
"loss": 1.5562,
|
26119 |
+
"step": 7428
|
26120 |
+
},
|
26121 |
+
{
|
26122 |
+
"epoch": 0.3303543639678094,
|
26123 |
+
"grad_norm": 0.08172343671321869,
|
26124 |
+
"learning_rate": 0.0009871655848635757,
|
26125 |
+
"loss": 1.5569,
|
26126 |
+
"step": 7430
|
26127 |
+
},
|
26128 |
+
{
|
26129 |
+
"epoch": 0.3304432884264817,
|
26130 |
+
"grad_norm": 0.08407451957464218,
|
26131 |
+
"learning_rate": 0.0009871576338981525,
|
26132 |
+
"loss": 1.5586,
|
26133 |
+
"step": 7432
|
26134 |
+
},
|
26135 |
+
{
|
26136 |
+
"epoch": 0.33053221288515405,
|
26137 |
+
"grad_norm": 0.08738183975219727,
|
26138 |
+
"learning_rate": 0.000987149680502706,
|
26139 |
+
"loss": 1.5631,
|
26140 |
+
"step": 7434
|
26141 |
+
},
|
26142 |
+
{
|
26143 |
+
"epoch": 0.3306211373438264,
|
26144 |
+
"grad_norm": 0.09222719818353653,
|
26145 |
+
"learning_rate": 0.000987141724677276,
|
26146 |
+
"loss": 1.5581,
|
26147 |
+
"step": 7436
|
26148 |
+
},
|
26149 |
+
{
|
26150 |
+
"epoch": 0.3307100618024988,
|
26151 |
+
"grad_norm": 0.08091361075639725,
|
26152 |
+
"learning_rate": 0.0009871337664219024,
|
26153 |
+
"loss": 1.5561,
|
26154 |
+
"step": 7438
|
26155 |
+
},
|
26156 |
+
{
|
26157 |
+
"epoch": 0.33079898626117116,
|
26158 |
+
"grad_norm": 0.09107191115617752,
|
26159 |
+
"learning_rate": 0.0009871258057366247,
|
26160 |
+
"loss": 1.5559,
|
26161 |
+
"step": 7440
|
26162 |
+
},
|
26163 |
+
{
|
26164 |
+
"epoch": 0.33088791071984347,
|
26165 |
+
"grad_norm": 0.08714121580123901,
|
26166 |
+
"learning_rate": 0.0009871178426214826,
|
26167 |
+
"loss": 1.5662,
|
26168 |
+
"step": 7442
|
26169 |
+
},
|
26170 |
+
{
|
26171 |
+
"epoch": 0.33097683517851584,
|
26172 |
+
"grad_norm": 0.0870809406042099,
|
26173 |
+
"learning_rate": 0.000987109877076516,
|
26174 |
+
"loss": 1.5584,
|
26175 |
+
"step": 7444
|
26176 |
+
},
|
26177 |
+
{
|
26178 |
+
"epoch": 0.3310657596371882,
|
26179 |
+
"grad_norm": 0.0869300365447998,
|
26180 |
+
"learning_rate": 0.0009871019091017641,
|
26181 |
+
"loss": 1.5596,
|
26182 |
+
"step": 7446
|
26183 |
+
},
|
26184 |
+
{
|
26185 |
+
"epoch": 0.3311546840958606,
|
26186 |
+
"grad_norm": 0.08757723122835159,
|
26187 |
+
"learning_rate": 0.0009870939386972674,
|
26188 |
+
"loss": 1.558,
|
26189 |
+
"step": 7448
|
26190 |
+
},
|
26191 |
+
{
|
26192 |
+
"epoch": 0.33124360855453294,
|
26193 |
+
"grad_norm": 0.08233436197042465,
|
26194 |
+
"learning_rate": 0.000987085965863065,
|
26195 |
+
"loss": 1.5573,
|
26196 |
+
"step": 7450
|
26197 |
+
},
|
26198 |
+
{
|
26199 |
+
"epoch": 0.33133253301320525,
|
26200 |
+
"grad_norm": 0.08261076360940933,
|
26201 |
+
"learning_rate": 0.0009870779905991971,
|
26202 |
+
"loss": 1.5639,
|
26203 |
+
"step": 7452
|
26204 |
+
},
|
26205 |
+
{
|
26206 |
+
"epoch": 0.3314214574718776,
|
26207 |
+
"grad_norm": 0.08498179167509079,
|
26208 |
+
"learning_rate": 0.0009870700129057032,
|
26209 |
+
"loss": 1.5584,
|
26210 |
+
"step": 7454
|
26211 |
+
},
|
26212 |
+
{
|
26213 |
+
"epoch": 0.33151038193055,
|
26214 |
+
"grad_norm": 0.08367268741130829,
|
26215 |
+
"learning_rate": 0.0009870620327826233,
|
26216 |
+
"loss": 1.5633,
|
26217 |
+
"step": 7456
|
26218 |
+
},
|
26219 |
+
{
|
26220 |
+
"epoch": 0.33159930638922236,
|
26221 |
+
"grad_norm": 0.08087131381034851,
|
26222 |
+
"learning_rate": 0.0009870540502299973,
|
26223 |
+
"loss": 1.5592,
|
26224 |
+
"step": 7458
|
26225 |
+
},
|
26226 |
+
{
|
26227 |
+
"epoch": 0.3316882308478947,
|
26228 |
+
"grad_norm": 0.08614420145750046,
|
26229 |
+
"learning_rate": 0.0009870460652478645,
|
26230 |
+
"loss": 1.5625,
|
26231 |
+
"step": 7460
|
26232 |
+
},
|
26233 |
+
{
|
26234 |
+
"epoch": 0.3317771553065671,
|
26235 |
+
"grad_norm": 0.08858582377433777,
|
26236 |
+
"learning_rate": 0.0009870380778362654,
|
26237 |
+
"loss": 1.5612,
|
26238 |
+
"step": 7462
|
26239 |
+
},
|
26240 |
+
{
|
26241 |
+
"epoch": 0.3318660797652394,
|
26242 |
+
"grad_norm": 0.08249551802873611,
|
26243 |
+
"learning_rate": 0.0009870300879952394,
|
26244 |
+
"loss": 1.5645,
|
26245 |
+
"step": 7464
|
26246 |
+
},
|
26247 |
+
{
|
26248 |
+
"epoch": 0.3319550042239118,
|
26249 |
+
"grad_norm": 0.08597932755947113,
|
26250 |
+
"learning_rate": 0.0009870220957248264,
|
26251 |
+
"loss": 1.5571,
|
26252 |
+
"step": 7466
|
26253 |
+
},
|
26254 |
+
{
|
26255 |
+
"epoch": 0.33204392868258414,
|
26256 |
+
"grad_norm": 0.08219864219427109,
|
26257 |
+
"learning_rate": 0.0009870141010250666,
|
26258 |
+
"loss": 1.5608,
|
26259 |
+
"step": 7468
|
26260 |
+
},
|
26261 |
+
{
|
26262 |
+
"epoch": 0.3321328531412565,
|
26263 |
+
"grad_norm": 0.08492123335599899,
|
26264 |
+
"learning_rate": 0.0009870061038959994,
|
26265 |
+
"loss": 1.5632,
|
26266 |
+
"step": 7470
|
26267 |
+
},
|
26268 |
+
{
|
26269 |
+
"epoch": 0.3322217775999289,
|
26270 |
+
"grad_norm": 0.08149886131286621,
|
26271 |
+
"learning_rate": 0.0009869981043376648,
|
26272 |
+
"loss": 1.5541,
|
26273 |
+
"step": 7472
|
26274 |
+
},
|
26275 |
+
{
|
26276 |
+
"epoch": 0.3323107020586012,
|
26277 |
+
"grad_norm": 0.08603468537330627,
|
26278 |
+
"learning_rate": 0.000986990102350103,
|
26279 |
+
"loss": 1.5528,
|
26280 |
+
"step": 7474
|
26281 |
+
},
|
26282 |
+
{
|
26283 |
+
"epoch": 0.33239962651727356,
|
26284 |
+
"grad_norm": 0.0839407667517662,
|
26285 |
+
"learning_rate": 0.0009869820979333539,
|
26286 |
+
"loss": 1.5573,
|
26287 |
+
"step": 7476
|
26288 |
+
},
|
26289 |
+
{
|
26290 |
+
"epoch": 0.33248855097594593,
|
26291 |
+
"grad_norm": 0.07948873937129974,
|
26292 |
+
"learning_rate": 0.0009869740910874569,
|
26293 |
+
"loss": 1.5646,
|
26294 |
+
"step": 7478
|
26295 |
+
},
|
26296 |
+
{
|
26297 |
+
"epoch": 0.3325774754346183,
|
26298 |
+
"grad_norm": 0.08331003040075302,
|
26299 |
+
"learning_rate": 0.0009869660818124526,
|
26300 |
+
"loss": 1.5538,
|
26301 |
+
"step": 7480
|
26302 |
+
},
|
26303 |
+
{
|
26304 |
+
"epoch": 0.33266639989329067,
|
26305 |
+
"grad_norm": 0.08100567013025284,
|
26306 |
+
"learning_rate": 0.0009869580701083805,
|
26307 |
+
"loss": 1.5628,
|
26308 |
+
"step": 7482
|
26309 |
+
},
|
26310 |
+
{
|
26311 |
+
"epoch": 0.33275532435196303,
|
26312 |
+
"grad_norm": 0.08268828690052032,
|
26313 |
+
"learning_rate": 0.0009869500559752807,
|
26314 |
+
"loss": 1.5591,
|
26315 |
+
"step": 7484
|
26316 |
+
},
|
26317 |
+
{
|
26318 |
+
"epoch": 0.33284424881063535,
|
26319 |
+
"grad_norm": 0.08128658682107925,
|
26320 |
+
"learning_rate": 0.0009869420394131932,
|
26321 |
+
"loss": 1.559,
|
26322 |
+
"step": 7486
|
26323 |
+
},
|
26324 |
+
{
|
26325 |
+
"epoch": 0.3329331732693077,
|
26326 |
+
"grad_norm": 0.07909125089645386,
|
26327 |
+
"learning_rate": 0.0009869340204221582,
|
26328 |
+
"loss": 1.5581,
|
26329 |
+
"step": 7488
|
26330 |
+
},
|
26331 |
+
{
|
26332 |
+
"epoch": 0.3330220977279801,
|
26333 |
+
"grad_norm": 0.08491683751344681,
|
26334 |
+
"learning_rate": 0.0009869259990022152,
|
26335 |
+
"loss": 1.5523,
|
26336 |
+
"step": 7490
|
26337 |
+
},
|
26338 |
+
{
|
26339 |
+
"epoch": 0.33311102218665245,
|
26340 |
+
"grad_norm": 0.08070467412471771,
|
26341 |
+
"learning_rate": 0.0009869179751534046,
|
26342 |
+
"loss": 1.5558,
|
26343 |
+
"step": 7492
|
26344 |
+
},
|
26345 |
+
{
|
26346 |
+
"epoch": 0.3331999466453248,
|
26347 |
+
"grad_norm": 0.08046550303697586,
|
26348 |
+
"learning_rate": 0.0009869099488757662,
|
26349 |
+
"loss": 1.5511,
|
26350 |
+
"step": 7494
|
26351 |
+
},
|
26352 |
+
{
|
26353 |
+
"epoch": 0.33328887110399713,
|
26354 |
+
"grad_norm": 0.08419077098369598,
|
26355 |
+
"learning_rate": 0.0009869019201693403,
|
26356 |
+
"loss": 1.5632,
|
26357 |
+
"step": 7496
|
26358 |
+
},
|
26359 |
+
{
|
26360 |
+
"epoch": 0.3333777955626695,
|
26361 |
+
"grad_norm": 0.07918644696474075,
|
26362 |
+
"learning_rate": 0.0009868938890341668,
|
26363 |
+
"loss": 1.5552,
|
26364 |
+
"step": 7498
|
26365 |
+
},
|
26366 |
+
{
|
26367 |
+
"epoch": 0.33346672002134187,
|
26368 |
+
"grad_norm": 0.08392981439828873,
|
26369 |
+
"learning_rate": 0.0009868858554702856,
|
26370 |
+
"loss": 1.5588,
|
26371 |
+
"step": 7500
|
26372 |
+
},
|
26373 |
+
{
|
26374 |
+
"epoch": 0.33346672002134187,
|
26375 |
+
"eval_loss": 1.5330219268798828,
|
26376 |
+
"eval_runtime": 12.4092,
|
26377 |
+
"eval_samples_per_second": 556.847,
|
26378 |
+
"eval_steps_per_second": 69.626,
|
26379 |
+
"step": 7500
|
26380 |
+
},
|
26381 |
+
{
|
26382 |
+
"epoch": 0.33355564448001424,
|
26383 |
+
"grad_norm": 0.09008117020130157,
|
26384 |
+
"learning_rate": 0.0009868778194777371,
|
26385 |
+
"loss": 1.5609,
|
26386 |
+
"step": 7502
|
26387 |
+
},
|
26388 |
+
{
|
26389 |
+
"epoch": 0.3336445689386866,
|
26390 |
+
"grad_norm": 0.08291777223348618,
|
26391 |
+
"learning_rate": 0.0009868697810565613,
|
26392 |
+
"loss": 1.5649,
|
26393 |
+
"step": 7504
|
26394 |
+
},
|
26395 |
+
{
|
26396 |
+
"epoch": 0.33373349339735897,
|
26397 |
+
"grad_norm": 0.08472921699285507,
|
26398 |
+
"learning_rate": 0.0009868617402067979,
|
26399 |
+
"loss": 1.5604,
|
26400 |
+
"step": 7506
|
26401 |
+
},
|
26402 |
+
{
|
26403 |
+
"epoch": 0.3338224178560313,
|
26404 |
+
"grad_norm": 0.08425983041524887,
|
26405 |
+
"learning_rate": 0.0009868536969284876,
|
26406 |
+
"loss": 1.5625,
|
26407 |
+
"step": 7508
|
26408 |
+
},
|
26409 |
+
{
|
26410 |
+
"epoch": 0.33391134231470365,
|
26411 |
+
"grad_norm": 0.08368197828531265,
|
26412 |
+
"learning_rate": 0.0009868456512216702,
|
26413 |
+
"loss": 1.5517,
|
26414 |
+
"step": 7510
|
26415 |
+
},
|
26416 |
+
{
|
26417 |
+
"epoch": 0.334000266773376,
|
26418 |
+
"grad_norm": 0.07804959267377853,
|
26419 |
+
"learning_rate": 0.0009868376030863857,
|
26420 |
+
"loss": 1.5606,
|
26421 |
+
"step": 7512
|
26422 |
+
},
|
26423 |
+
{
|
26424 |
+
"epoch": 0.3340891912320484,
|
26425 |
+
"grad_norm": 0.08549796044826508,
|
26426 |
+
"learning_rate": 0.0009868295525226746,
|
26427 |
+
"loss": 1.5547,
|
26428 |
+
"step": 7514
|
26429 |
+
},
|
26430 |
+
{
|
26431 |
+
"epoch": 0.33417811569072076,
|
26432 |
+
"grad_norm": 0.08111416548490524,
|
26433 |
+
"learning_rate": 0.0009868214995305768,
|
26434 |
+
"loss": 1.5567,
|
26435 |
+
"step": 7516
|
26436 |
+
},
|
26437 |
+
{
|
26438 |
+
"epoch": 0.33426704014939307,
|
26439 |
+
"grad_norm": 0.07972585409879684,
|
26440 |
+
"learning_rate": 0.0009868134441101326,
|
26441 |
+
"loss": 1.5515,
|
26442 |
+
"step": 7518
|
26443 |
+
},
|
26444 |
+
{
|
26445 |
+
"epoch": 0.33435596460806544,
|
26446 |
+
"grad_norm": 0.0834115669131279,
|
26447 |
+
"learning_rate": 0.0009868053862613821,
|
26448 |
+
"loss": 1.5563,
|
26449 |
+
"step": 7520
|
26450 |
+
},
|
26451 |
+
{
|
26452 |
+
"epoch": 0.3344448890667378,
|
26453 |
+
"grad_norm": 0.08134639263153076,
|
26454 |
+
"learning_rate": 0.0009867973259843656,
|
26455 |
+
"loss": 1.5539,
|
26456 |
+
"step": 7522
|
26457 |
+
},
|
26458 |
+
{
|
26459 |
+
"epoch": 0.3345338135254102,
|
26460 |
+
"grad_norm": 0.07953674346208572,
|
26461 |
+
"learning_rate": 0.0009867892632791232,
|
26462 |
+
"loss": 1.5566,
|
26463 |
+
"step": 7524
|
26464 |
+
},
|
26465 |
+
{
|
26466 |
+
"epoch": 0.33462273798408254,
|
26467 |
+
"grad_norm": 0.07886262983083725,
|
26468 |
+
"learning_rate": 0.000986781198145695,
|
26469 |
+
"loss": 1.5549,
|
26470 |
+
"step": 7526
|
26471 |
+
},
|
26472 |
+
{
|
26473 |
+
"epoch": 0.33471166244275485,
|
26474 |
+
"grad_norm": 0.08340603113174438,
|
26475 |
+
"learning_rate": 0.0009867731305841217,
|
26476 |
+
"loss": 1.5609,
|
26477 |
+
"step": 7528
|
26478 |
+
},
|
26479 |
+
{
|
26480 |
+
"epoch": 0.3348005869014272,
|
26481 |
+
"grad_norm": 0.08290226757526398,
|
26482 |
+
"learning_rate": 0.000986765060594443,
|
26483 |
+
"loss": 1.5561,
|
26484 |
+
"step": 7530
|
26485 |
+
},
|
26486 |
+
{
|
26487 |
+
"epoch": 0.3348895113600996,
|
26488 |
+
"grad_norm": 0.08421459048986435,
|
26489 |
+
"learning_rate": 0.0009867569881766994,
|
26490 |
+
"loss": 1.5549,
|
26491 |
+
"step": 7532
|
26492 |
+
},
|
26493 |
+
{
|
26494 |
+
"epoch": 0.33497843581877196,
|
26495 |
+
"grad_norm": 0.08067211508750916,
|
26496 |
+
"learning_rate": 0.0009867489133309312,
|
26497 |
+
"loss": 1.5613,
|
26498 |
+
"step": 7534
|
26499 |
+
},
|
26500 |
+
{
|
26501 |
+
"epoch": 0.3350673602774443,
|
26502 |
+
"grad_norm": 0.08332887291908264,
|
26503 |
+
"learning_rate": 0.0009867408360571786,
|
26504 |
+
"loss": 1.5533,
|
26505 |
+
"step": 7536
|
26506 |
+
},
|
26507 |
+
{
|
26508 |
+
"epoch": 0.3351562847361167,
|
26509 |
+
"grad_norm": 0.08278394490480423,
|
26510 |
+
"learning_rate": 0.0009867327563554819,
|
26511 |
+
"loss": 1.5563,
|
26512 |
+
"step": 7538
|
26513 |
+
},
|
26514 |
+
{
|
26515 |
+
"epoch": 0.335245209194789,
|
26516 |
+
"grad_norm": 0.08533370494842529,
|
26517 |
+
"learning_rate": 0.0009867246742258813,
|
26518 |
+
"loss": 1.558,
|
26519 |
+
"step": 7540
|
26520 |
+
},
|
26521 |
+
{
|
26522 |
+
"epoch": 0.3353341336534614,
|
26523 |
+
"grad_norm": 0.08526033163070679,
|
26524 |
+
"learning_rate": 0.0009867165896684173,
|
26525 |
+
"loss": 1.5586,
|
26526 |
+
"step": 7542
|
26527 |
+
},
|
26528 |
+
{
|
26529 |
+
"epoch": 0.33542305811213374,
|
26530 |
+
"grad_norm": 0.0807008370757103,
|
26531 |
+
"learning_rate": 0.0009867085026831304,
|
26532 |
+
"loss": 1.563,
|
26533 |
+
"step": 7544
|
26534 |
+
},
|
26535 |
+
{
|
26536 |
+
"epoch": 0.3355119825708061,
|
26537 |
+
"grad_norm": 0.08265620470046997,
|
26538 |
+
"learning_rate": 0.0009867004132700606,
|
26539 |
+
"loss": 1.5492,
|
26540 |
+
"step": 7546
|
26541 |
+
},
|
26542 |
+
{
|
26543 |
+
"epoch": 0.3356009070294785,
|
26544 |
+
"grad_norm": 0.08214646577835083,
|
26545 |
+
"learning_rate": 0.0009866923214292482,
|
26546 |
+
"loss": 1.5571,
|
26547 |
+
"step": 7548
|
26548 |
+
},
|
26549 |
+
{
|
26550 |
+
"epoch": 0.3356898314881508,
|
26551 |
+
"grad_norm": 0.0798763632774353,
|
26552 |
+
"learning_rate": 0.0009866842271607336,
|
26553 |
+
"loss": 1.5533,
|
26554 |
+
"step": 7550
|
26555 |
+
},
|
26556 |
+
{
|
26557 |
+
"epoch": 0.33577875594682316,
|
26558 |
+
"grad_norm": 0.07971591502428055,
|
26559 |
+
"learning_rate": 0.0009866761304645575,
|
26560 |
+
"loss": 1.5582,
|
26561 |
+
"step": 7552
|
26562 |
+
},
|
26563 |
+
{
|
26564 |
+
"epoch": 0.33586768040549553,
|
26565 |
+
"grad_norm": 0.08346015959978104,
|
26566 |
+
"learning_rate": 0.00098666803134076,
|
26567 |
+
"loss": 1.5546,
|
26568 |
+
"step": 7554
|
26569 |
+
},
|
26570 |
+
{
|
26571 |
+
"epoch": 0.3359566048641679,
|
26572 |
+
"grad_norm": 0.08312725275754929,
|
26573 |
+
"learning_rate": 0.0009866599297893817,
|
26574 |
+
"loss": 1.5637,
|
26575 |
+
"step": 7556
|
26576 |
+
},
|
26577 |
+
{
|
26578 |
+
"epoch": 0.33604552932284026,
|
26579 |
+
"grad_norm": 0.07956881821155548,
|
26580 |
+
"learning_rate": 0.0009866518258104628,
|
26581 |
+
"loss": 1.5593,
|
26582 |
+
"step": 7558
|
26583 |
+
},
|
26584 |
+
{
|
26585 |
+
"epoch": 0.33613445378151263,
|
26586 |
+
"grad_norm": 0.08073802292346954,
|
26587 |
+
"learning_rate": 0.0009866437194040439,
|
26588 |
+
"loss": 1.5635,
|
26589 |
+
"step": 7560
|
26590 |
+
},
|
26591 |
+
{
|
26592 |
+
"epoch": 0.33622337824018494,
|
26593 |
+
"grad_norm": 0.08115088939666748,
|
26594 |
+
"learning_rate": 0.0009866356105701652,
|
26595 |
+
"loss": 1.5571,
|
26596 |
+
"step": 7562
|
26597 |
+
},
|
26598 |
+
{
|
26599 |
+
"epoch": 0.3363123026988573,
|
26600 |
+
"grad_norm": 0.08693643659353256,
|
26601 |
+
"learning_rate": 0.0009866274993088674,
|
26602 |
+
"loss": 1.5552,
|
26603 |
+
"step": 7564
|
26604 |
+
},
|
26605 |
+
{
|
26606 |
+
"epoch": 0.3364012271575297,
|
26607 |
+
"grad_norm": 0.0820782408118248,
|
26608 |
+
"learning_rate": 0.0009866193856201907,
|
26609 |
+
"loss": 1.5554,
|
26610 |
+
"step": 7566
|
26611 |
+
},
|
26612 |
+
{
|
26613 |
+
"epoch": 0.33649015161620205,
|
26614 |
+
"grad_norm": 0.08132241666316986,
|
26615 |
+
"learning_rate": 0.000986611269504176,
|
26616 |
+
"loss": 1.5558,
|
26617 |
+
"step": 7568
|
26618 |
+
},
|
26619 |
+
{
|
26620 |
+
"epoch": 0.3365790760748744,
|
26621 |
+
"grad_norm": 0.08526284992694855,
|
26622 |
+
"learning_rate": 0.0009866031509608633,
|
26623 |
+
"loss": 1.5552,
|
26624 |
+
"step": 7570
|
26625 |
+
},
|
26626 |
+
{
|
26627 |
+
"epoch": 0.33666800053354673,
|
26628 |
+
"grad_norm": 0.08132081478834152,
|
26629 |
+
"learning_rate": 0.0009865950299902935,
|
26630 |
+
"loss": 1.5619,
|
26631 |
+
"step": 7572
|
26632 |
+
},
|
26633 |
+
{
|
26634 |
+
"epoch": 0.3367569249922191,
|
26635 |
+
"grad_norm": 0.08150200545787811,
|
26636 |
+
"learning_rate": 0.0009865869065925068,
|
26637 |
+
"loss": 1.5563,
|
26638 |
+
"step": 7574
|
26639 |
+
},
|
26640 |
+
{
|
26641 |
+
"epoch": 0.33684584945089147,
|
26642 |
+
"grad_norm": 0.0802740678191185,
|
26643 |
+
"learning_rate": 0.0009865787807675438,
|
26644 |
+
"loss": 1.5508,
|
26645 |
+
"step": 7576
|
26646 |
+
},
|
26647 |
+
{
|
26648 |
+
"epoch": 0.33693477390956383,
|
26649 |
+
"grad_norm": 0.08373408019542694,
|
26650 |
+
"learning_rate": 0.0009865706525154453,
|
26651 |
+
"loss": 1.5592,
|
26652 |
+
"step": 7578
|
26653 |
+
},
|
26654 |
+
{
|
26655 |
+
"epoch": 0.3370236983682362,
|
26656 |
+
"grad_norm": 0.08197604864835739,
|
26657 |
+
"learning_rate": 0.0009865625218362514,
|
26658 |
+
"loss": 1.5523,
|
26659 |
+
"step": 7580
|
26660 |
+
},
|
26661 |
+
{
|
26662 |
+
"epoch": 0.3371126228269085,
|
26663 |
+
"grad_norm": 0.08425270020961761,
|
26664 |
+
"learning_rate": 0.0009865543887300028,
|
26665 |
+
"loss": 1.557,
|
26666 |
+
"step": 7582
|
26667 |
+
},
|
26668 |
+
{
|
26669 |
+
"epoch": 0.3372015472855809,
|
26670 |
+
"grad_norm": 0.08232254534959793,
|
26671 |
+
"learning_rate": 0.00098654625319674,
|
26672 |
+
"loss": 1.5567,
|
26673 |
+
"step": 7584
|
26674 |
+
},
|
26675 |
+
{
|
26676 |
+
"epoch": 0.33729047174425325,
|
26677 |
+
"grad_norm": 0.08390845358371735,
|
26678 |
+
"learning_rate": 0.0009865381152365043,
|
26679 |
+
"loss": 1.558,
|
26680 |
+
"step": 7586
|
26681 |
+
},
|
26682 |
+
{
|
26683 |
+
"epoch": 0.3373793962029256,
|
26684 |
+
"grad_norm": 0.08453506231307983,
|
26685 |
+
"learning_rate": 0.0009865299748493352,
|
26686 |
+
"loss": 1.5561,
|
26687 |
+
"step": 7588
|
26688 |
+
},
|
26689 |
+
{
|
26690 |
+
"epoch": 0.337468320661598,
|
26691 |
+
"grad_norm": 0.08450014144182205,
|
26692 |
+
"learning_rate": 0.0009865218320352742,
|
26693 |
+
"loss": 1.5515,
|
26694 |
+
"step": 7590
|
26695 |
+
},
|
26696 |
+
{
|
26697 |
+
"epoch": 0.33755724512027035,
|
26698 |
+
"grad_norm": 0.083348348736763,
|
26699 |
+
"learning_rate": 0.0009865136867943615,
|
26700 |
+
"loss": 1.5583,
|
26701 |
+
"step": 7592
|
26702 |
+
},
|
26703 |
+
{
|
26704 |
+
"epoch": 0.33764616957894267,
|
26705 |
+
"grad_norm": 0.0832839161157608,
|
26706 |
+
"learning_rate": 0.0009865055391266374,
|
26707 |
+
"loss": 1.5618,
|
26708 |
+
"step": 7594
|
26709 |
+
},
|
26710 |
+
{
|
26711 |
+
"epoch": 0.33773509403761504,
|
26712 |
+
"grad_norm": 0.07885057479143143,
|
26713 |
+
"learning_rate": 0.0009864973890321431,
|
26714 |
+
"loss": 1.5528,
|
26715 |
+
"step": 7596
|
26716 |
+
},
|
26717 |
+
{
|
26718 |
+
"epoch": 0.3378240184962874,
|
26719 |
+
"grad_norm": 0.0822066068649292,
|
26720 |
+
"learning_rate": 0.0009864892365109193,
|
26721 |
+
"loss": 1.5585,
|
26722 |
+
"step": 7598
|
26723 |
+
},
|
26724 |
+
{
|
26725 |
+
"epoch": 0.33791294295495977,
|
26726 |
+
"grad_norm": 0.08152958005666733,
|
26727 |
+
"learning_rate": 0.0009864810815630061,
|
26728 |
+
"loss": 1.559,
|
26729 |
+
"step": 7600
|
26730 |
+
},
|
26731 |
+
{
|
26732 |
+
"epoch": 0.33800186741363214,
|
26733 |
+
"grad_norm": 0.08215297013521194,
|
26734 |
+
"learning_rate": 0.0009864729241884447,
|
26735 |
+
"loss": 1.5569,
|
26736 |
+
"step": 7602
|
26737 |
+
},
|
26738 |
+
{
|
26739 |
+
"epoch": 0.33809079187230445,
|
26740 |
+
"grad_norm": 0.08144047111272812,
|
26741 |
+
"learning_rate": 0.0009864647643872757,
|
26742 |
+
"loss": 1.558,
|
26743 |
+
"step": 7604
|
26744 |
+
},
|
26745 |
+
{
|
26746 |
+
"epoch": 0.3381797163309768,
|
26747 |
+
"grad_norm": 0.08268721401691437,
|
26748 |
+
"learning_rate": 0.0009864566021595396,
|
26749 |
+
"loss": 1.5643,
|
26750 |
+
"step": 7606
|
26751 |
+
},
|
26752 |
+
{
|
26753 |
+
"epoch": 0.3382686407896492,
|
26754 |
+
"grad_norm": 0.08536458760499954,
|
26755 |
+
"learning_rate": 0.000986448437505277,
|
26756 |
+
"loss": 1.5528,
|
26757 |
+
"step": 7608
|
26758 |
+
},
|
26759 |
+
{
|
26760 |
+
"epoch": 0.33835756524832156,
|
26761 |
+
"grad_norm": 0.07940022647380829,
|
26762 |
+
"learning_rate": 0.0009864402704245292,
|
26763 |
+
"loss": 1.5514,
|
26764 |
+
"step": 7610
|
26765 |
+
},
|
26766 |
+
{
|
26767 |
+
"epoch": 0.3384464897069939,
|
26768 |
+
"grad_norm": 0.0803755670785904,
|
26769 |
+
"learning_rate": 0.0009864321009173365,
|
26770 |
+
"loss": 1.5582,
|
26771 |
+
"step": 7612
|
26772 |
+
},
|
26773 |
+
{
|
26774 |
+
"epoch": 0.3385354141656663,
|
26775 |
+
"grad_norm": 0.08060748130083084,
|
26776 |
+
"learning_rate": 0.0009864239289837395,
|
26777 |
+
"loss": 1.559,
|
26778 |
+
"step": 7614
|
26779 |
+
},
|
26780 |
+
{
|
26781 |
+
"epoch": 0.3386243386243386,
|
26782 |
+
"grad_norm": 0.07905225455760956,
|
26783 |
+
"learning_rate": 0.0009864157546237795,
|
26784 |
+
"loss": 1.5599,
|
26785 |
+
"step": 7616
|
26786 |
+
},
|
26787 |
+
{
|
26788 |
+
"epoch": 0.338713263083011,
|
26789 |
+
"grad_norm": 0.08046825975179672,
|
26790 |
+
"learning_rate": 0.0009864075778374967,
|
26791 |
+
"loss": 1.5617,
|
26792 |
+
"step": 7618
|
26793 |
+
},
|
26794 |
+
{
|
26795 |
+
"epoch": 0.33880218754168334,
|
26796 |
+
"grad_norm": 0.08096635341644287,
|
26797 |
+
"learning_rate": 0.0009863993986249325,
|
26798 |
+
"loss": 1.5552,
|
26799 |
+
"step": 7620
|
26800 |
+
},
|
26801 |
+
{
|
26802 |
+
"epoch": 0.3388911120003557,
|
26803 |
+
"grad_norm": 0.08172665536403656,
|
26804 |
+
"learning_rate": 0.000986391216986127,
|
26805 |
+
"loss": 1.5593,
|
26806 |
+
"step": 7622
|
26807 |
+
},
|
26808 |
+
{
|
26809 |
+
"epoch": 0.3389800364590281,
|
26810 |
+
"grad_norm": 0.0854748860001564,
|
26811 |
+
"learning_rate": 0.0009863830329211217,
|
26812 |
+
"loss": 1.5591,
|
26813 |
+
"step": 7624
|
26814 |
+
},
|
26815 |
+
{
|
26816 |
+
"epoch": 0.3390689609177004,
|
26817 |
+
"grad_norm": 0.0805579125881195,
|
26818 |
+
"learning_rate": 0.0009863748464299569,
|
26819 |
+
"loss": 1.5578,
|
26820 |
+
"step": 7626
|
26821 |
+
},
|
26822 |
+
{
|
26823 |
+
"epoch": 0.33915788537637276,
|
26824 |
+
"grad_norm": 0.07932641357183456,
|
26825 |
+
"learning_rate": 0.0009863666575126737,
|
26826 |
+
"loss": 1.5491,
|
26827 |
+
"step": 7628
|
26828 |
+
},
|
26829 |
+
{
|
26830 |
+
"epoch": 0.3392468098350451,
|
26831 |
+
"grad_norm": 0.0846443846821785,
|
26832 |
+
"learning_rate": 0.000986358466169313,
|
26833 |
+
"loss": 1.5591,
|
26834 |
+
"step": 7630
|
26835 |
+
},
|
26836 |
+
{
|
26837 |
+
"epoch": 0.3393357342937175,
|
26838 |
+
"grad_norm": 0.08422157168388367,
|
26839 |
+
"learning_rate": 0.0009863502723999153,
|
26840 |
+
"loss": 1.5564,
|
26841 |
+
"step": 7632
|
26842 |
+
},
|
26843 |
+
{
|
26844 |
+
"epoch": 0.33942465875238986,
|
26845 |
+
"grad_norm": 0.08966944366693497,
|
26846 |
+
"learning_rate": 0.0009863420762045217,
|
26847 |
+
"loss": 1.5575,
|
26848 |
+
"step": 7634
|
26849 |
+
},
|
26850 |
+
{
|
26851 |
+
"epoch": 0.3395135832110622,
|
26852 |
+
"grad_norm": 0.08580625057220459,
|
26853 |
+
"learning_rate": 0.000986333877583173,
|
26854 |
+
"loss": 1.5568,
|
26855 |
+
"step": 7636
|
26856 |
+
},
|
26857 |
+
{
|
26858 |
+
"epoch": 0.33960250766973454,
|
26859 |
+
"grad_norm": 0.08479603379964828,
|
26860 |
+
"learning_rate": 0.0009863256765359108,
|
26861 |
+
"loss": 1.5607,
|
26862 |
+
"step": 7638
|
26863 |
+
},
|
26864 |
+
{
|
26865 |
+
"epoch": 0.3396914321284069,
|
26866 |
+
"grad_norm": 0.0841260626912117,
|
26867 |
+
"learning_rate": 0.000986317473062775,
|
26868 |
+
"loss": 1.5547,
|
26869 |
+
"step": 7640
|
26870 |
+
},
|
26871 |
+
{
|
26872 |
+
"epoch": 0.3397803565870793,
|
26873 |
+
"grad_norm": 0.08255398273468018,
|
26874 |
+
"learning_rate": 0.0009863092671638068,
|
26875 |
+
"loss": 1.5613,
|
26876 |
+
"step": 7642
|
26877 |
+
},
|
26878 |
+
{
|
26879 |
+
"epoch": 0.33986928104575165,
|
26880 |
+
"grad_norm": 0.0812351405620575,
|
26881 |
+
"learning_rate": 0.0009863010588390473,
|
26882 |
+
"loss": 1.5575,
|
26883 |
+
"step": 7644
|
26884 |
+
},
|
26885 |
+
{
|
26886 |
+
"epoch": 0.339958205504424,
|
26887 |
+
"grad_norm": 0.0809001475572586,
|
26888 |
+
"learning_rate": 0.0009862928480885375,
|
26889 |
+
"loss": 1.5592,
|
26890 |
+
"step": 7646
|
26891 |
+
},
|
26892 |
+
{
|
26893 |
+
"epoch": 0.34004712996309633,
|
26894 |
+
"grad_norm": 0.0820452943444252,
|
26895 |
+
"learning_rate": 0.0009862846349123183,
|
26896 |
+
"loss": 1.5551,
|
26897 |
+
"step": 7648
|
26898 |
+
},
|
26899 |
+
{
|
26900 |
+
"epoch": 0.3401360544217687,
|
26901 |
+
"grad_norm": 0.08290841430425644,
|
26902 |
+
"learning_rate": 0.0009862764193104303,
|
26903 |
+
"loss": 1.5607,
|
26904 |
+
"step": 7650
|
26905 |
+
},
|
26906 |
+
{
|
26907 |
+
"epoch": 0.34022497888044106,
|
26908 |
+
"grad_norm": 0.08015654981136322,
|
26909 |
+
"learning_rate": 0.0009862682012829152,
|
26910 |
+
"loss": 1.5555,
|
26911 |
+
"step": 7652
|
26912 |
+
},
|
26913 |
+
{
|
26914 |
+
"epoch": 0.34031390333911343,
|
26915 |
+
"grad_norm": 0.08518238365650177,
|
26916 |
+
"learning_rate": 0.0009862599808298134,
|
26917 |
+
"loss": 1.5524,
|
26918 |
+
"step": 7654
|
26919 |
+
},
|
26920 |
+
{
|
26921 |
+
"epoch": 0.3404028277977858,
|
26922 |
+
"grad_norm": 0.08613689988851547,
|
26923 |
+
"learning_rate": 0.000986251757951166,
|
26924 |
+
"loss": 1.556,
|
26925 |
+
"step": 7656
|
26926 |
+
},
|
26927 |
+
{
|
26928 |
+
"epoch": 0.3404917522564581,
|
26929 |
+
"grad_norm": 0.08661675453186035,
|
26930 |
+
"learning_rate": 0.0009862435326470143,
|
26931 |
+
"loss": 1.5564,
|
26932 |
+
"step": 7658
|
26933 |
+
},
|
26934 |
+
{
|
26935 |
+
"epoch": 0.3405806767151305,
|
26936 |
+
"grad_norm": 0.07869388163089752,
|
26937 |
+
"learning_rate": 0.000986235304917399,
|
26938 |
+
"loss": 1.5577,
|
26939 |
+
"step": 7660
|
26940 |
+
},
|
26941 |
+
{
|
26942 |
+
"epoch": 0.34066960117380285,
|
26943 |
+
"grad_norm": 0.08099169284105301,
|
26944 |
+
"learning_rate": 0.0009862270747623616,
|
26945 |
+
"loss": 1.5534,
|
26946 |
+
"step": 7662
|
26947 |
+
},
|
26948 |
+
{
|
26949 |
+
"epoch": 0.3407585256324752,
|
26950 |
+
"grad_norm": 0.08418874442577362,
|
26951 |
+
"learning_rate": 0.0009862188421819425,
|
26952 |
+
"loss": 1.5575,
|
26953 |
+
"step": 7664
|
26954 |
+
},
|
26955 |
+
{
|
26956 |
+
"epoch": 0.3408474500911476,
|
26957 |
+
"grad_norm": 0.08251459896564484,
|
26958 |
+
"learning_rate": 0.0009862106071761834,
|
26959 |
+
"loss": 1.5578,
|
26960 |
+
"step": 7666
|
26961 |
+
},
|
26962 |
+
{
|
26963 |
+
"epoch": 0.34093637454981995,
|
26964 |
+
"grad_norm": 0.08254079520702362,
|
26965 |
+
"learning_rate": 0.0009862023697451248,
|
26966 |
+
"loss": 1.5507,
|
26967 |
+
"step": 7668
|
26968 |
+
},
|
26969 |
+
{
|
26970 |
+
"epoch": 0.34102529900849227,
|
26971 |
+
"grad_norm": 0.08215059340000153,
|
26972 |
+
"learning_rate": 0.0009861941298888082,
|
26973 |
+
"loss": 1.5562,
|
26974 |
+
"step": 7670
|
26975 |
+
},
|
26976 |
+
{
|
26977 |
+
"epoch": 0.34111422346716463,
|
26978 |
+
"grad_norm": 0.08373280614614487,
|
26979 |
+
"learning_rate": 0.0009861858876072747,
|
26980 |
+
"loss": 1.5594,
|
26981 |
+
"step": 7672
|
26982 |
+
},
|
26983 |
+
{
|
26984 |
+
"epoch": 0.341203147925837,
|
26985 |
+
"grad_norm": 0.07917515188455582,
|
26986 |
+
"learning_rate": 0.000986177642900565,
|
26987 |
+
"loss": 1.5542,
|
26988 |
+
"step": 7674
|
26989 |
+
},
|
26990 |
+
{
|
26991 |
+
"epoch": 0.34129207238450937,
|
26992 |
+
"grad_norm": 0.08267196267843246,
|
26993 |
+
"learning_rate": 0.0009861693957687208,
|
26994 |
+
"loss": 1.5612,
|
26995 |
+
"step": 7676
|
26996 |
+
},
|
26997 |
+
{
|
26998 |
+
"epoch": 0.34138099684318174,
|
26999 |
+
"grad_norm": 0.08618869632482529,
|
27000 |
+
"learning_rate": 0.0009861611462117829,
|
27001 |
+
"loss": 1.5604,
|
27002 |
+
"step": 7678
|
27003 |
+
},
|
27004 |
+
{
|
27005 |
+
"epoch": 0.34146992130185405,
|
27006 |
+
"grad_norm": 0.08248917013406754,
|
27007 |
+
"learning_rate": 0.0009861528942297923,
|
27008 |
+
"loss": 1.5624,
|
27009 |
+
"step": 7680
|
27010 |
+
},
|
27011 |
+
{
|
27012 |
+
"epoch": 0.3415588457605264,
|
27013 |
+
"grad_norm": 0.08377496898174286,
|
27014 |
+
"learning_rate": 0.0009861446398227904,
|
27015 |
+
"loss": 1.5614,
|
27016 |
+
"step": 7682
|
27017 |
+
},
|
27018 |
+
{
|
27019 |
+
"epoch": 0.3416477702191988,
|
27020 |
+
"grad_norm": 0.08078130334615707,
|
27021 |
+
"learning_rate": 0.0009861363829908186,
|
27022 |
+
"loss": 1.5528,
|
27023 |
+
"step": 7684
|
27024 |
+
},
|
27025 |
+
{
|
27026 |
+
"epoch": 0.34173669467787116,
|
27027 |
+
"grad_norm": 0.07990232855081558,
|
27028 |
+
"learning_rate": 0.0009861281237339176,
|
27029 |
+
"loss": 1.5554,
|
27030 |
+
"step": 7686
|
27031 |
+
},
|
27032 |
+
{
|
27033 |
+
"epoch": 0.3418256191365435,
|
27034 |
+
"grad_norm": 0.07614683359861374,
|
27035 |
+
"learning_rate": 0.0009861198620521288,
|
27036 |
+
"loss": 1.5562,
|
27037 |
+
"step": 7688
|
27038 |
+
},
|
27039 |
+
{
|
27040 |
+
"epoch": 0.3419145435952159,
|
27041 |
+
"grad_norm": 0.08139005303382874,
|
27042 |
+
"learning_rate": 0.0009861115979454935,
|
27043 |
+
"loss": 1.5544,
|
27044 |
+
"step": 7690
|
27045 |
+
},
|
27046 |
+
{
|
27047 |
+
"epoch": 0.3420034680538882,
|
27048 |
+
"grad_norm": 0.08052778244018555,
|
27049 |
+
"learning_rate": 0.000986103331414053,
|
27050 |
+
"loss": 1.5592,
|
27051 |
+
"step": 7692
|
27052 |
+
},
|
27053 |
+
{
|
27054 |
+
"epoch": 0.3420923925125606,
|
27055 |
+
"grad_norm": 0.08632178604602814,
|
27056 |
+
"learning_rate": 0.0009860950624578481,
|
27057 |
+
"loss": 1.5574,
|
27058 |
+
"step": 7694
|
27059 |
+
},
|
27060 |
+
{
|
27061 |
+
"epoch": 0.34218131697123294,
|
27062 |
+
"grad_norm": 0.0841991975903511,
|
27063 |
+
"learning_rate": 0.0009860867910769207,
|
27064 |
+
"loss": 1.5576,
|
27065 |
+
"step": 7696
|
27066 |
+
},
|
27067 |
+
{
|
27068 |
+
"epoch": 0.3422702414299053,
|
27069 |
+
"grad_norm": 0.08402567356824875,
|
27070 |
+
"learning_rate": 0.0009860785172713115,
|
27071 |
+
"loss": 1.5525,
|
27072 |
+
"step": 7698
|
27073 |
+
},
|
27074 |
+
{
|
27075 |
+
"epoch": 0.3423591658885777,
|
27076 |
+
"grad_norm": 0.08191527426242828,
|
27077 |
+
"learning_rate": 0.000986070241041062,
|
27078 |
+
"loss": 1.5613,
|
27079 |
+
"step": 7700
|
27080 |
+
},
|
27081 |
+
{
|
27082 |
+
"epoch": 0.34244809034725,
|
27083 |
+
"grad_norm": 0.08203962445259094,
|
27084 |
+
"learning_rate": 0.0009860619623862136,
|
27085 |
+
"loss": 1.5565,
|
27086 |
+
"step": 7702
|
27087 |
+
},
|
27088 |
+
{
|
27089 |
+
"epoch": 0.34253701480592236,
|
27090 |
+
"grad_norm": 0.07740230858325958,
|
27091 |
+
"learning_rate": 0.0009860536813068072,
|
27092 |
+
"loss": 1.5574,
|
27093 |
+
"step": 7704
|
27094 |
+
},
|
27095 |
+
{
|
27096 |
+
"epoch": 0.3426259392645947,
|
27097 |
+
"grad_norm": 0.07994545996189117,
|
27098 |
+
"learning_rate": 0.0009860453978028844,
|
27099 |
+
"loss": 1.5573,
|
27100 |
+
"step": 7706
|
27101 |
+
},
|
27102 |
+
{
|
27103 |
+
"epoch": 0.3427148637232671,
|
27104 |
+
"grad_norm": 0.08103641122579575,
|
27105 |
+
"learning_rate": 0.0009860371118744866,
|
27106 |
+
"loss": 1.5566,
|
27107 |
+
"step": 7708
|
27108 |
+
},
|
27109 |
+
{
|
27110 |
+
"epoch": 0.34280378818193946,
|
27111 |
+
"grad_norm": 0.08051252365112305,
|
27112 |
+
"learning_rate": 0.0009860288235216549,
|
27113 |
+
"loss": 1.5603,
|
27114 |
+
"step": 7710
|
27115 |
+
},
|
27116 |
+
{
|
27117 |
+
"epoch": 0.3428927126406118,
|
27118 |
+
"grad_norm": 0.08033636957406998,
|
27119 |
+
"learning_rate": 0.0009860205327444308,
|
27120 |
+
"loss": 1.5545,
|
27121 |
+
"step": 7712
|
27122 |
+
},
|
27123 |
+
{
|
27124 |
+
"epoch": 0.34298163709928414,
|
27125 |
+
"grad_norm": 0.08459188044071198,
|
27126 |
+
"learning_rate": 0.0009860122395428555,
|
27127 |
+
"loss": 1.5595,
|
27128 |
+
"step": 7714
|
27129 |
+
},
|
27130 |
+
{
|
27131 |
+
"epoch": 0.3430705615579565,
|
27132 |
+
"grad_norm": 0.08079506456851959,
|
27133 |
+
"learning_rate": 0.0009860039439169705,
|
27134 |
+
"loss": 1.5565,
|
27135 |
+
"step": 7716
|
27136 |
+
},
|
27137 |
+
{
|
27138 |
+
"epoch": 0.3431594860166289,
|
27139 |
+
"grad_norm": 0.08086486160755157,
|
27140 |
+
"learning_rate": 0.0009859956458668174,
|
27141 |
+
"loss": 1.5585,
|
27142 |
+
"step": 7718
|
27143 |
+
},
|
27144 |
+
{
|
27145 |
+
"epoch": 0.34324841047530125,
|
27146 |
+
"grad_norm": 0.0799533799290657,
|
27147 |
+
"learning_rate": 0.000985987345392437,
|
27148 |
+
"loss": 1.5492,
|
27149 |
+
"step": 7720
|
27150 |
+
},
|
27151 |
+
{
|
27152 |
+
"epoch": 0.3433373349339736,
|
27153 |
+
"grad_norm": 0.07945334911346436,
|
27154 |
+
"learning_rate": 0.0009859790424938714,
|
27155 |
+
"loss": 1.5486,
|
27156 |
+
"step": 7722
|
27157 |
+
},
|
27158 |
+
{
|
27159 |
+
"epoch": 0.3434262593926459,
|
27160 |
+
"grad_norm": 0.07963025569915771,
|
27161 |
+
"learning_rate": 0.0009859707371711614,
|
27162 |
+
"loss": 1.5606,
|
27163 |
+
"step": 7724
|
27164 |
+
},
|
27165 |
+
{
|
27166 |
+
"epoch": 0.3435151838513183,
|
27167 |
+
"grad_norm": 0.08368024230003357,
|
27168 |
+
"learning_rate": 0.0009859624294243487,
|
27169 |
+
"loss": 1.5599,
|
27170 |
+
"step": 7726
|
27171 |
+
},
|
27172 |
+
{
|
27173 |
+
"epoch": 0.34360410830999066,
|
27174 |
+
"grad_norm": 0.07972510904073715,
|
27175 |
+
"learning_rate": 0.000985954119253475,
|
27176 |
+
"loss": 1.5461,
|
27177 |
+
"step": 7728
|
27178 |
+
},
|
27179 |
+
{
|
27180 |
+
"epoch": 0.34369303276866303,
|
27181 |
+
"grad_norm": 0.08038385957479477,
|
27182 |
+
"learning_rate": 0.000985945806658581,
|
27183 |
+
"loss": 1.5568,
|
27184 |
+
"step": 7730
|
27185 |
+
},
|
27186 |
+
{
|
27187 |
+
"epoch": 0.3437819572273354,
|
27188 |
+
"grad_norm": 0.08230438828468323,
|
27189 |
+
"learning_rate": 0.0009859374916397093,
|
27190 |
+
"loss": 1.5553,
|
27191 |
+
"step": 7732
|
27192 |
+
},
|
27193 |
+
{
|
27194 |
+
"epoch": 0.3438708816860077,
|
27195 |
+
"grad_norm": 0.08172792196273804,
|
27196 |
+
"learning_rate": 0.0009859291741969004,
|
27197 |
+
"loss": 1.5544,
|
27198 |
+
"step": 7734
|
27199 |
+
},
|
27200 |
+
{
|
27201 |
+
"epoch": 0.3439598061446801,
|
27202 |
+
"grad_norm": 0.08643286675214767,
|
27203 |
+
"learning_rate": 0.0009859208543301962,
|
27204 |
+
"loss": 1.5551,
|
27205 |
+
"step": 7736
|
27206 |
+
},
|
27207 |
+
{
|
27208 |
+
"epoch": 0.34404873060335245,
|
27209 |
+
"grad_norm": 0.08269612491130829,
|
27210 |
+
"learning_rate": 0.0009859125320396379,
|
27211 |
+
"loss": 1.5538,
|
27212 |
+
"step": 7738
|
27213 |
+
},
|
27214 |
+
{
|
27215 |
+
"epoch": 0.3441376550620248,
|
27216 |
+
"grad_norm": 0.08148770779371262,
|
27217 |
+
"learning_rate": 0.0009859042073252673,
|
27218 |
+
"loss": 1.5511,
|
27219 |
+
"step": 7740
|
27220 |
+
},
|
27221 |
+
{
|
27222 |
+
"epoch": 0.3442265795206972,
|
27223 |
+
"grad_norm": 0.0830964595079422,
|
27224 |
+
"learning_rate": 0.0009858958801871257,
|
27225 |
+
"loss": 1.559,
|
27226 |
+
"step": 7742
|
27227 |
+
},
|
27228 |
+
{
|
27229 |
+
"epoch": 0.34431550397936955,
|
27230 |
+
"grad_norm": 0.08528967946767807,
|
27231 |
+
"learning_rate": 0.0009858875506252552,
|
27232 |
+
"loss": 1.5578,
|
27233 |
+
"step": 7744
|
27234 |
+
},
|
27235 |
+
{
|
27236 |
+
"epoch": 0.34440442843804187,
|
27237 |
+
"grad_norm": 0.08497735857963562,
|
27238 |
+
"learning_rate": 0.0009858792186396967,
|
27239 |
+
"loss": 1.5565,
|
27240 |
+
"step": 7746
|
27241 |
+
},
|
27242 |
+
{
|
27243 |
+
"epoch": 0.34449335289671423,
|
27244 |
+
"grad_norm": 0.07968219369649887,
|
27245 |
+
"learning_rate": 0.0009858708842304922,
|
27246 |
+
"loss": 1.5606,
|
27247 |
+
"step": 7748
|
27248 |
+
},
|
27249 |
+
{
|
27250 |
+
"epoch": 0.3445822773553866,
|
27251 |
+
"grad_norm": 0.08317109197378159,
|
27252 |
+
"learning_rate": 0.0009858625473976828,
|
27253 |
+
"loss": 1.5534,
|
27254 |
+
"step": 7750
|
27255 |
+
},
|
27256 |
+
{
|
27257 |
+
"epoch": 0.34467120181405897,
|
27258 |
+
"grad_norm": 0.07973136752843857,
|
27259 |
+
"learning_rate": 0.0009858542081413106,
|
27260 |
+
"loss": 1.5536,
|
27261 |
+
"step": 7752
|
27262 |
+
},
|
27263 |
+
{
|
27264 |
+
"epoch": 0.34476012627273134,
|
27265 |
+
"grad_norm": 0.07634979486465454,
|
27266 |
+
"learning_rate": 0.0009858458664614169,
|
27267 |
+
"loss": 1.5492,
|
27268 |
+
"step": 7754
|
27269 |
+
},
|
27270 |
+
{
|
27271 |
+
"epoch": 0.34484905073140365,
|
27272 |
+
"grad_norm": 0.07837677747011185,
|
27273 |
+
"learning_rate": 0.0009858375223580434,
|
27274 |
+
"loss": 1.5548,
|
27275 |
+
"step": 7756
|
27276 |
+
},
|
27277 |
+
{
|
27278 |
+
"epoch": 0.344937975190076,
|
27279 |
+
"grad_norm": 0.07845926284790039,
|
27280 |
+
"learning_rate": 0.0009858291758312317,
|
27281 |
+
"loss": 1.5537,
|
27282 |
+
"step": 7758
|
27283 |
+
},
|
27284 |
+
{
|
27285 |
+
"epoch": 0.3450268996487484,
|
27286 |
+
"grad_norm": 0.08067924529314041,
|
27287 |
+
"learning_rate": 0.0009858208268810232,
|
27288 |
+
"loss": 1.5546,
|
27289 |
+
"step": 7760
|
27290 |
+
},
|
27291 |
+
{
|
27292 |
+
"epoch": 0.34511582410742075,
|
27293 |
+
"grad_norm": 0.07944106310606003,
|
27294 |
+
"learning_rate": 0.00098581247550746,
|
27295 |
+
"loss": 1.5523,
|
27296 |
+
"step": 7762
|
27297 |
+
},
|
27298 |
+
{
|
27299 |
+
"epoch": 0.3452047485660931,
|
27300 |
+
"grad_norm": 0.08254885673522949,
|
27301 |
+
"learning_rate": 0.0009858041217105835,
|
27302 |
+
"loss": 1.5549,
|
27303 |
+
"step": 7764
|
27304 |
+
},
|
27305 |
+
{
|
27306 |
+
"epoch": 0.34529367302476544,
|
27307 |
+
"grad_norm": 0.08257626742124557,
|
27308 |
+
"learning_rate": 0.0009857957654904352,
|
27309 |
+
"loss": 1.5568,
|
27310 |
+
"step": 7766
|
27311 |
+
},
|
27312 |
+
{
|
27313 |
+
"epoch": 0.3453825974834378,
|
27314 |
+
"grad_norm": 0.07757585495710373,
|
27315 |
+
"learning_rate": 0.0009857874068470575,
|
27316 |
+
"loss": 1.5542,
|
27317 |
+
"step": 7768
|
27318 |
+
},
|
27319 |
+
{
|
27320 |
+
"epoch": 0.34547152194211017,
|
27321 |
+
"grad_norm": 0.07840435206890106,
|
27322 |
+
"learning_rate": 0.000985779045780491,
|
27323 |
+
"loss": 1.5541,
|
27324 |
+
"step": 7770
|
27325 |
+
},
|
27326 |
+
{
|
27327 |
+
"epoch": 0.34556044640078254,
|
27328 |
+
"grad_norm": 0.08048916608095169,
|
27329 |
+
"learning_rate": 0.0009857706822907783,
|
27330 |
+
"loss": 1.5509,
|
27331 |
+
"step": 7772
|
27332 |
+
},
|
27333 |
+
{
|
27334 |
+
"epoch": 0.3456493708594549,
|
27335 |
+
"grad_norm": 0.08014550805091858,
|
27336 |
+
"learning_rate": 0.0009857623163779606,
|
27337 |
+
"loss": 1.5515,
|
27338 |
+
"step": 7774
|
27339 |
+
},
|
27340 |
+
{
|
27341 |
+
"epoch": 0.3457382953181273,
|
27342 |
+
"grad_norm": 0.08095970749855042,
|
27343 |
+
"learning_rate": 0.00098575394804208,
|
27344 |
+
"loss": 1.5545,
|
27345 |
+
"step": 7776
|
27346 |
+
},
|
27347 |
+
{
|
27348 |
+
"epoch": 0.3458272197767996,
|
27349 |
+
"grad_norm": 0.0833306685090065,
|
27350 |
+
"learning_rate": 0.000985745577283178,
|
27351 |
+
"loss": 1.5512,
|
27352 |
+
"step": 7778
|
27353 |
+
},
|
27354 |
+
{
|
27355 |
+
"epoch": 0.34591614423547196,
|
27356 |
+
"grad_norm": 0.08204789459705353,
|
27357 |
+
"learning_rate": 0.0009857372041012966,
|
27358 |
+
"loss": 1.5512,
|
27359 |
+
"step": 7780
|
27360 |
+
},
|
27361 |
+
{
|
27362 |
+
"epoch": 0.3460050686941443,
|
27363 |
+
"grad_norm": 0.08149214088916779,
|
27364 |
+
"learning_rate": 0.000985728828496477,
|
27365 |
+
"loss": 1.5549,
|
27366 |
+
"step": 7782
|
27367 |
+
},
|
27368 |
+
{
|
27369 |
+
"epoch": 0.3460939931528167,
|
27370 |
+
"grad_norm": 0.08480332046747208,
|
27371 |
+
"learning_rate": 0.0009857204504687617,
|
27372 |
+
"loss": 1.5529,
|
27373 |
+
"step": 7784
|
27374 |
+
},
|
27375 |
+
{
|
27376 |
+
"epoch": 0.34618291761148906,
|
27377 |
+
"grad_norm": 0.0808916836977005,
|
27378 |
+
"learning_rate": 0.0009857120700181921,
|
27379 |
+
"loss": 1.5539,
|
27380 |
+
"step": 7786
|
27381 |
+
},
|
27382 |
+
{
|
27383 |
+
"epoch": 0.3462718420701614,
|
27384 |
+
"grad_norm": 0.08434953540563583,
|
27385 |
+
"learning_rate": 0.0009857036871448101,
|
27386 |
+
"loss": 1.5492,
|
27387 |
+
"step": 7788
|
27388 |
+
},
|
27389 |
+
{
|
27390 |
+
"epoch": 0.34636076652883374,
|
27391 |
+
"grad_norm": 0.077853262424469,
|
27392 |
+
"learning_rate": 0.0009856953018486576,
|
27393 |
+
"loss": 1.5522,
|
27394 |
+
"step": 7790
|
27395 |
+
},
|
27396 |
+
{
|
27397 |
+
"epoch": 0.3464496909875061,
|
27398 |
+
"grad_norm": 0.08755781501531601,
|
27399 |
+
"learning_rate": 0.0009856869141297762,
|
27400 |
+
"loss": 1.5561,
|
27401 |
+
"step": 7792
|
27402 |
+
},
|
27403 |
+
{
|
27404 |
+
"epoch": 0.3465386154461785,
|
27405 |
+
"grad_norm": 0.0840546116232872,
|
27406 |
+
"learning_rate": 0.0009856785239882079,
|
27407 |
+
"loss": 1.5522,
|
27408 |
+
"step": 7794
|
27409 |
+
},
|
27410 |
+
{
|
27411 |
+
"epoch": 0.34662753990485085,
|
27412 |
+
"grad_norm": 0.08388349413871765,
|
27413 |
+
"learning_rate": 0.0009856701314239942,
|
27414 |
+
"loss": 1.5519,
|
27415 |
+
"step": 7796
|
27416 |
+
},
|
27417 |
+
{
|
27418 |
+
"epoch": 0.3467164643635232,
|
27419 |
+
"grad_norm": 0.07955602556467056,
|
27420 |
+
"learning_rate": 0.0009856617364371776,
|
27421 |
+
"loss": 1.5584,
|
27422 |
+
"step": 7798
|
27423 |
+
},
|
27424 |
+
{
|
27425 |
+
"epoch": 0.3468053888221955,
|
27426 |
+
"grad_norm": 0.08307410031557083,
|
27427 |
+
"learning_rate": 0.0009856533390277995,
|
27428 |
+
"loss": 1.5527,
|
27429 |
+
"step": 7800
|
27430 |
+
},
|
27431 |
+
{
|
27432 |
+
"epoch": 0.3468943132808679,
|
27433 |
+
"grad_norm": 0.08166033774614334,
|
27434 |
+
"learning_rate": 0.000985644939195902,
|
27435 |
+
"loss": 1.5575,
|
27436 |
+
"step": 7802
|
27437 |
+
},
|
27438 |
+
{
|
27439 |
+
"epoch": 0.34698323773954026,
|
27440 |
+
"grad_norm": 0.08017229288816452,
|
27441 |
+
"learning_rate": 0.0009856365369415269,
|
27442 |
+
"loss": 1.5536,
|
27443 |
+
"step": 7804
|
27444 |
+
},
|
27445 |
+
{
|
27446 |
+
"epoch": 0.34707216219821263,
|
27447 |
+
"grad_norm": 0.08459261059761047,
|
27448 |
+
"learning_rate": 0.000985628132264716,
|
27449 |
+
"loss": 1.5624,
|
27450 |
+
"step": 7806
|
27451 |
+
},
|
27452 |
+
{
|
27453 |
+
"epoch": 0.347161086656885,
|
27454 |
+
"grad_norm": 0.07887223362922668,
|
27455 |
+
"learning_rate": 0.0009856197251655117,
|
27456 |
+
"loss": 1.5504,
|
27457 |
+
"step": 7808
|
27458 |
+
},
|
27459 |
+
{
|
27460 |
+
"epoch": 0.3472500111155573,
|
27461 |
+
"grad_norm": 0.08019403368234634,
|
27462 |
+
"learning_rate": 0.0009856113156439554,
|
27463 |
+
"loss": 1.5524,
|
27464 |
+
"step": 7810
|
27465 |
+
},
|
27466 |
+
{
|
27467 |
+
"epoch": 0.3473389355742297,
|
27468 |
+
"grad_norm": 0.0818062275648117,
|
27469 |
+
"learning_rate": 0.0009856029037000893,
|
27470 |
+
"loss": 1.5556,
|
27471 |
+
"step": 7812
|
27472 |
+
},
|
27473 |
+
{
|
27474 |
+
"epoch": 0.34742786003290205,
|
27475 |
+
"grad_norm": 0.07944361120462418,
|
27476 |
+
"learning_rate": 0.0009855944893339553,
|
27477 |
+
"loss": 1.5543,
|
27478 |
+
"step": 7814
|
27479 |
+
},
|
27480 |
+
{
|
27481 |
+
"epoch": 0.3475167844915744,
|
27482 |
+
"grad_norm": 0.07994072139263153,
|
27483 |
+
"learning_rate": 0.0009855860725455953,
|
27484 |
+
"loss": 1.5454,
|
27485 |
+
"step": 7816
|
27486 |
+
},
|
27487 |
+
{
|
27488 |
+
"epoch": 0.3476057089502468,
|
27489 |
+
"grad_norm": 0.07912899553775787,
|
27490 |
+
"learning_rate": 0.0009855776533350513,
|
27491 |
+
"loss": 1.5526,
|
27492 |
+
"step": 7818
|
27493 |
+
},
|
27494 |
+
{
|
27495 |
+
"epoch": 0.3476946334089191,
|
27496 |
+
"grad_norm": 0.08150925487279892,
|
27497 |
+
"learning_rate": 0.0009855692317023654,
|
27498 |
+
"loss": 1.5507,
|
27499 |
+
"step": 7820
|
27500 |
+
},
|
27501 |
+
{
|
27502 |
+
"epoch": 0.34778355786759146,
|
27503 |
+
"grad_norm": 0.0804237350821495,
|
27504 |
+
"learning_rate": 0.0009855608076475798,
|
27505 |
+
"loss": 1.5562,
|
27506 |
+
"step": 7822
|
27507 |
+
},
|
27508 |
+
{
|
27509 |
+
"epoch": 0.34787248232626383,
|
27510 |
+
"grad_norm": 0.07889696210622787,
|
27511 |
+
"learning_rate": 0.000985552381170736,
|
27512 |
+
"loss": 1.5572,
|
27513 |
+
"step": 7824
|
27514 |
+
},
|
27515 |
+
{
|
27516 |
+
"epoch": 0.3479614067849362,
|
27517 |
+
"grad_norm": 0.07692207396030426,
|
27518 |
+
"learning_rate": 0.0009855439522718766,
|
27519 |
+
"loss": 1.5534,
|
27520 |
+
"step": 7826
|
27521 |
+
},
|
27522 |
+
{
|
27523 |
+
"epoch": 0.34805033124360857,
|
27524 |
+
"grad_norm": 0.07672964036464691,
|
27525 |
+
"learning_rate": 0.0009855355209510432,
|
27526 |
+
"loss": 1.553,
|
27527 |
+
"step": 7828
|
27528 |
+
},
|
27529 |
+
{
|
27530 |
+
"epoch": 0.34813925570228094,
|
27531 |
+
"grad_norm": 0.07981939613819122,
|
27532 |
+
"learning_rate": 0.000985527087208278,
|
27533 |
+
"loss": 1.5549,
|
27534 |
+
"step": 7830
|
27535 |
+
},
|
27536 |
+
{
|
27537 |
+
"epoch": 0.34822818016095325,
|
27538 |
+
"grad_norm": 0.07698217779397964,
|
27539 |
+
"learning_rate": 0.0009855186510436232,
|
27540 |
+
"loss": 1.5567,
|
27541 |
+
"step": 7832
|
27542 |
+
},
|
27543 |
+
{
|
27544 |
+
"epoch": 0.3483171046196256,
|
27545 |
+
"grad_norm": 0.07928860932588577,
|
27546 |
+
"learning_rate": 0.000985510212457121,
|
27547 |
+
"loss": 1.5513,
|
27548 |
+
"step": 7834
|
27549 |
+
},
|
27550 |
+
{
|
27551 |
+
"epoch": 0.348406029078298,
|
27552 |
+
"grad_norm": 0.08210493624210358,
|
27553 |
+
"learning_rate": 0.0009855017714488128,
|
27554 |
+
"loss": 1.5503,
|
27555 |
+
"step": 7836
|
27556 |
+
},
|
27557 |
+
{
|
27558 |
+
"epoch": 0.34849495353697035,
|
27559 |
+
"grad_norm": 0.08063147217035294,
|
27560 |
+
"learning_rate": 0.0009854933280187416,
|
27561 |
+
"loss": 1.5549,
|
27562 |
+
"step": 7838
|
27563 |
+
},
|
27564 |
+
{
|
27565 |
+
"epoch": 0.3485838779956427,
|
27566 |
+
"grad_norm": 0.07979092001914978,
|
27567 |
+
"learning_rate": 0.0009854848821669487,
|
27568 |
+
"loss": 1.5482,
|
27569 |
+
"step": 7840
|
27570 |
+
},
|
27571 |
+
{
|
27572 |
+
"epoch": 0.34867280245431503,
|
27573 |
+
"grad_norm": 0.0807296484708786,
|
27574 |
+
"learning_rate": 0.0009854764338934768,
|
27575 |
+
"loss": 1.5546,
|
27576 |
+
"step": 7842
|
27577 |
+
},
|
27578 |
+
{
|
27579 |
+
"epoch": 0.3487617269129874,
|
27580 |
+
"grad_norm": 0.082137331366539,
|
27581 |
+
"learning_rate": 0.0009854679831983678,
|
27582 |
+
"loss": 1.5572,
|
27583 |
+
"step": 7844
|
27584 |
+
},
|
27585 |
+
{
|
27586 |
+
"epoch": 0.34885065137165977,
|
27587 |
+
"grad_norm": 0.0817376971244812,
|
27588 |
+
"learning_rate": 0.000985459530081664,
|
27589 |
+
"loss": 1.5512,
|
27590 |
+
"step": 7846
|
27591 |
+
},
|
27592 |
+
{
|
27593 |
+
"epoch": 0.34893957583033214,
|
27594 |
+
"grad_norm": 0.08156748861074448,
|
27595 |
+
"learning_rate": 0.0009854510745434075,
|
27596 |
+
"loss": 1.5563,
|
27597 |
+
"step": 7848
|
27598 |
+
},
|
27599 |
+
{
|
27600 |
+
"epoch": 0.3490285002890045,
|
27601 |
+
"grad_norm": 0.08468926697969437,
|
27602 |
+
"learning_rate": 0.0009854426165836403,
|
27603 |
+
"loss": 1.5537,
|
27604 |
+
"step": 7850
|
27605 |
+
},
|
27606 |
+
{
|
27607 |
+
"epoch": 0.3491174247476769,
|
27608 |
+
"grad_norm": 0.0817328691482544,
|
27609 |
+
"learning_rate": 0.000985434156202405,
|
27610 |
+
"loss": 1.559,
|
27611 |
+
"step": 7852
|
27612 |
+
},
|
27613 |
+
{
|
27614 |
+
"epoch": 0.3492063492063492,
|
27615 |
+
"grad_norm": 0.07793274521827698,
|
27616 |
+
"learning_rate": 0.0009854256933997435,
|
27617 |
+
"loss": 1.5527,
|
27618 |
+
"step": 7854
|
27619 |
+
},
|
27620 |
+
{
|
27621 |
+
"epoch": 0.34929527366502156,
|
27622 |
+
"grad_norm": 0.07918530702590942,
|
27623 |
+
"learning_rate": 0.0009854172281756979,
|
27624 |
+
"loss": 1.554,
|
27625 |
+
"step": 7856
|
27626 |
+
},
|
27627 |
+
{
|
27628 |
+
"epoch": 0.3493841981236939,
|
27629 |
+
"grad_norm": 0.08301304280757904,
|
27630 |
+
"learning_rate": 0.0009854087605303105,
|
27631 |
+
"loss": 1.554,
|
27632 |
+
"step": 7858
|
27633 |
+
},
|
27634 |
+
{
|
27635 |
+
"epoch": 0.3494731225823663,
|
27636 |
+
"grad_norm": 0.07774464040994644,
|
27637 |
+
"learning_rate": 0.0009854002904636238,
|
27638 |
+
"loss": 1.5517,
|
27639 |
+
"step": 7860
|
27640 |
+
},
|
27641 |
+
{
|
27642 |
+
"epoch": 0.34956204704103866,
|
27643 |
+
"grad_norm": 0.08034802228212357,
|
27644 |
+
"learning_rate": 0.00098539181797568,
|
27645 |
+
"loss": 1.5544,
|
27646 |
+
"step": 7862
|
27647 |
+
},
|
27648 |
+
{
|
27649 |
+
"epoch": 0.34965097149971097,
|
27650 |
+
"grad_norm": 0.07757385820150375,
|
27651 |
+
"learning_rate": 0.0009853833430665212,
|
27652 |
+
"loss": 1.5548,
|
27653 |
+
"step": 7864
|
27654 |
+
},
|
27655 |
+
{
|
27656 |
+
"epoch": 0.34973989595838334,
|
27657 |
+
"grad_norm": 0.08288813382387161,
|
27658 |
+
"learning_rate": 0.0009853748657361896,
|
27659 |
+
"loss": 1.5527,
|
27660 |
+
"step": 7866
|
27661 |
+
},
|
27662 |
+
{
|
27663 |
+
"epoch": 0.3498288204170557,
|
27664 |
+
"grad_norm": 0.08583445101976395,
|
27665 |
+
"learning_rate": 0.0009853663859847276,
|
27666 |
+
"loss": 1.5544,
|
27667 |
+
"step": 7868
|
27668 |
+
},
|
27669 |
+
{
|
27670 |
+
"epoch": 0.3499177448757281,
|
27671 |
+
"grad_norm": 0.07990463078022003,
|
27672 |
+
"learning_rate": 0.0009853579038121775,
|
27673 |
+
"loss": 1.5537,
|
27674 |
+
"step": 7870
|
27675 |
+
},
|
27676 |
+
{
|
27677 |
+
"epoch": 0.35000666933440044,
|
27678 |
+
"grad_norm": 0.08092455565929413,
|
27679 |
+
"learning_rate": 0.0009853494192185817,
|
27680 |
+
"loss": 1.5494,
|
27681 |
+
"step": 7872
|
27682 |
+
},
|
27683 |
+
{
|
27684 |
+
"epoch": 0.3500955937930728,
|
27685 |
+
"grad_norm": 0.07774668186903,
|
27686 |
+
"learning_rate": 0.0009853409322039823,
|
27687 |
+
"loss": 1.5512,
|
27688 |
+
"step": 7874
|
27689 |
+
},
|
27690 |
+
{
|
27691 |
+
"epoch": 0.3501845182517451,
|
27692 |
+
"grad_norm": 0.08283083885908127,
|
27693 |
+
"learning_rate": 0.000985332442768422,
|
27694 |
+
"loss": 1.5538,
|
27695 |
+
"step": 7876
|
27696 |
+
},
|
27697 |
+
{
|
27698 |
+
"epoch": 0.3502734427104175,
|
27699 |
+
"grad_norm": 0.08365800231695175,
|
27700 |
+
"learning_rate": 0.0009853239509119427,
|
27701 |
+
"loss": 1.5525,
|
27702 |
+
"step": 7878
|
27703 |
+
},
|
27704 |
+
{
|
27705 |
+
"epoch": 0.35036236716908986,
|
27706 |
+
"grad_norm": 0.077533058822155,
|
27707 |
+
"learning_rate": 0.000985315456634587,
|
27708 |
+
"loss": 1.5596,
|
27709 |
+
"step": 7880
|
27710 |
+
},
|
27711 |
+
{
|
27712 |
+
"epoch": 0.35045129162776223,
|
27713 |
+
"grad_norm": 0.08021321892738342,
|
27714 |
+
"learning_rate": 0.0009853069599363973,
|
27715 |
+
"loss": 1.5468,
|
27716 |
+
"step": 7882
|
27717 |
+
},
|
27718 |
+
{
|
27719 |
+
"epoch": 0.3505402160864346,
|
27720 |
+
"grad_norm": 0.11996711045503616,
|
27721 |
+
"learning_rate": 0.0009852984608174158,
|
27722 |
+
"loss": 1.5533,
|
27723 |
+
"step": 7884
|
27724 |
+
},
|
27725 |
+
{
|
27726 |
+
"epoch": 0.3506291405451069,
|
27727 |
+
"grad_norm": 0.3232218623161316,
|
27728 |
+
"learning_rate": 0.0009852899592776852,
|
27729 |
+
"loss": 1.6783,
|
27730 |
+
"step": 7886
|
27731 |
+
},
|
27732 |
+
{
|
27733 |
+
"epoch": 0.3507180650037793,
|
27734 |
+
"grad_norm": 4.166322231292725,
|
27735 |
+
"learning_rate": 0.0009852814553172476,
|
27736 |
+
"loss": 2.4033,
|
27737 |
+
"step": 7888
|
27738 |
+
},
|
27739 |
+
{
|
27740 |
+
"epoch": 0.35080698946245165,
|
27741 |
+
"grad_norm": 105.5245132446289,
|
27742 |
+
"learning_rate": 0.0009852729489361457,
|
27743 |
+
"loss": 3.5532,
|
27744 |
+
"step": 7890
|
27745 |
+
},
|
27746 |
+
{
|
27747 |
+
"epoch": 0.350895913921124,
|
27748 |
+
"grad_norm": 5.243870258331299,
|
27749 |
+
"learning_rate": 0.0009852644401344218,
|
27750 |
+
"loss": 7.1315,
|
27751 |
+
"step": 7892
|
27752 |
+
},
|
27753 |
+
{
|
27754 |
+
"epoch": 0.3509848383797964,
|
27755 |
+
"grad_norm": 6.443427085876465,
|
27756 |
+
"learning_rate": 0.0009852559289121182,
|
27757 |
+
"loss": 7.392,
|
27758 |
+
"step": 7894
|
27759 |
+
},
|
27760 |
+
{
|
27761 |
+
"epoch": 0.3510737628384687,
|
27762 |
+
"grad_norm": 11.5606050491333,
|
27763 |
+
"learning_rate": 0.0009852474152692774,
|
27764 |
+
"loss": 7.5255,
|
27765 |
+
"step": 7896
|
27766 |
+
},
|
27767 |
+
{
|
27768 |
+
"epoch": 0.35116268729714106,
|
27769 |
+
"grad_norm": 2.048408031463623,
|
27770 |
+
"learning_rate": 0.000985238899205942,
|
27771 |
+
"loss": 7.2354,
|
27772 |
+
"step": 7898
|
27773 |
+
},
|
27774 |
+
{
|
27775 |
+
"epoch": 0.35125161175581343,
|
27776 |
+
"grad_norm": 0.9983798265457153,
|
27777 |
+
"learning_rate": 0.0009852303807221545,
|
27778 |
+
"loss": 6.5717,
|
27779 |
+
"step": 7900
|
27780 |
+
},
|
27781 |
+
{
|
27782 |
+
"epoch": 0.3513405362144858,
|
27783 |
+
"grad_norm": 6.880504608154297,
|
27784 |
+
"learning_rate": 0.0009852218598179572,
|
27785 |
+
"loss": 6.7333,
|
27786 |
+
"step": 7902
|
27787 |
+
},
|
27788 |
+
{
|
27789 |
+
"epoch": 0.35142946067315817,
|
27790 |
+
"grad_norm": 0.685417890548706,
|
27791 |
+
"learning_rate": 0.0009852133364933928,
|
27792 |
+
"loss": 6.3766,
|
27793 |
+
"step": 7904
|
27794 |
+
},
|
27795 |
+
{
|
27796 |
+
"epoch": 0.35151838513183054,
|
27797 |
+
"grad_norm": 1.0315533876419067,
|
27798 |
+
"learning_rate": 0.0009852048107485039,
|
27799 |
+
"loss": 6.1477,
|
27800 |
+
"step": 7906
|
27801 |
+
},
|
27802 |
+
{
|
27803 |
+
"epoch": 0.35160730959050285,
|
27804 |
+
"grad_norm": 0.4206119179725647,
|
27805 |
+
"learning_rate": 0.0009851962825833324,
|
27806 |
+
"loss": 6.0279,
|
27807 |
+
"step": 7908
|
27808 |
+
},
|
27809 |
+
{
|
27810 |
+
"epoch": 0.3516962340491752,
|
27811 |
+
"grad_norm": 0.34929385781288147,
|
27812 |
+
"learning_rate": 0.0009851877519979218,
|
27813 |
+
"loss": 5.9475,
|
27814 |
+
"step": 7910
|
27815 |
+
},
|
27816 |
+
{
|
27817 |
+
"epoch": 0.3517851585078476,
|
27818 |
+
"grad_norm": 0.33057817816734314,
|
27819 |
+
"learning_rate": 0.000985179218992314,
|
27820 |
+
"loss": 5.8579,
|
27821 |
+
"step": 7912
|
27822 |
+
},
|
27823 |
+
{
|
27824 |
+
"epoch": 0.35187408296651995,
|
27825 |
+
"grad_norm": 0.2894933819770813,
|
27826 |
+
"learning_rate": 0.0009851706835665515,
|
27827 |
+
"loss": 5.8023,
|
27828 |
+
"step": 7914
|
27829 |
+
},
|
27830 |
+
{
|
27831 |
+
"epoch": 0.3519630074251923,
|
27832 |
+
"grad_norm": 0.21461133658885956,
|
27833 |
+
"learning_rate": 0.0009851621457206774,
|
27834 |
+
"loss": 5.7603,
|
27835 |
+
"step": 7916
|
27836 |
+
},
|
27837 |
+
{
|
27838 |
+
"epoch": 0.35205193188386463,
|
27839 |
+
"grad_norm": 0.21909675002098083,
|
27840 |
+
"learning_rate": 0.0009851536054547338,
|
27841 |
+
"loss": 5.7207,
|
27842 |
+
"step": 7918
|
27843 |
+
},
|
27844 |
+
{
|
27845 |
+
"epoch": 0.352140856342537,
|
27846 |
+
"grad_norm": 0.3142562508583069,
|
27847 |
+
"learning_rate": 0.0009851450627687635,
|
27848 |
+
"loss": 5.6821,
|
27849 |
+
"step": 7920
|
27850 |
+
},
|
27851 |
+
{
|
27852 |
+
"epoch": 0.35222978080120937,
|
27853 |
+
"grad_norm": 1.3621059656143188,
|
27854 |
+
"learning_rate": 0.000985136517662809,
|
27855 |
+
"loss": 5.6821,
|
27856 |
+
"step": 7922
|
27857 |
+
},
|
27858 |
+
{
|
27859 |
+
"epoch": 0.35231870525988174,
|
27860 |
+
"grad_norm": 0.7668919563293457,
|
27861 |
+
"learning_rate": 0.0009851279701369134,
|
27862 |
+
"loss": 5.6241,
|
27863 |
+
"step": 7924
|
27864 |
+
},
|
27865 |
+
{
|
27866 |
+
"epoch": 0.3524076297185541,
|
27867 |
+
"grad_norm": 0.3618534207344055,
|
27868 |
+
"learning_rate": 0.0009851194201911187,
|
27869 |
+
"loss": 5.5482,
|
27870 |
+
"step": 7926
|
27871 |
+
},
|
27872 |
+
{
|
27873 |
+
"epoch": 0.3524965541772265,
|
27874 |
+
"grad_norm": 0.2870240807533264,
|
27875 |
+
"learning_rate": 0.0009851108678254677,
|
27876 |
+
"loss": 5.5032,
|
27877 |
+
"step": 7928
|
27878 |
+
},
|
27879 |
+
{
|
27880 |
+
"epoch": 0.3525854786358988,
|
27881 |
+
"grad_norm": 0.485879510641098,
|
27882 |
+
"learning_rate": 0.0009851023130400035,
|
27883 |
+
"loss": 5.4567,
|
27884 |
+
"step": 7930
|
27885 |
+
},
|
27886 |
+
{
|
27887 |
+
"epoch": 0.35267440309457115,
|
27888 |
+
"grad_norm": 0.2687022387981415,
|
27889 |
+
"learning_rate": 0.000985093755834768,
|
27890 |
+
"loss": 5.3988,
|
27891 |
+
"step": 7932
|
27892 |
+
},
|
27893 |
+
{
|
27894 |
+
"epoch": 0.3527633275532435,
|
27895 |
+
"grad_norm": 0.3306155204772949,
|
27896 |
+
"learning_rate": 0.0009850851962098046,
|
27897 |
+
"loss": 5.3285,
|
27898 |
+
"step": 7934
|
27899 |
+
},
|
27900 |
+
{
|
27901 |
+
"epoch": 0.3528522520119159,
|
27902 |
+
"grad_norm": 0.7935633063316345,
|
27903 |
+
"learning_rate": 0.0009850766341651556,
|
27904 |
+
"loss": 5.2442,
|
27905 |
+
"step": 7936
|
27906 |
+
},
|
27907 |
+
{
|
27908 |
+
"epoch": 0.35294117647058826,
|
27909 |
+
"grad_norm": 1.1152472496032715,
|
27910 |
+
"learning_rate": 0.000985068069700864,
|
27911 |
+
"loss": 5.2263,
|
27912 |
+
"step": 7938
|
27913 |
+
},
|
27914 |
+
{
|
27915 |
+
"epoch": 0.35303010092926057,
|
27916 |
+
"grad_norm": 1.460869550704956,
|
27917 |
+
"learning_rate": 0.0009850595028169722,
|
27918 |
+
"loss": 5.2365,
|
27919 |
+
"step": 7940
|
27920 |
+
},
|
27921 |
+
{
|
27922 |
+
"epoch": 0.35311902538793294,
|
27923 |
+
"grad_norm": 1.1066856384277344,
|
27924 |
+
"learning_rate": 0.0009850509335135231,
|
27925 |
+
"loss": 5.1598,
|
27926 |
+
"step": 7942
|
27927 |
+
},
|
27928 |
+
{
|
27929 |
+
"epoch": 0.3532079498466053,
|
27930 |
+
"grad_norm": 0.8395753502845764,
|
27931 |
+
"learning_rate": 0.0009850423617905595,
|
27932 |
+
"loss": 5.0372,
|
27933 |
+
"step": 7944
|
27934 |
+
},
|
27935 |
+
{
|
27936 |
+
"epoch": 0.3532968743052777,
|
27937 |
+
"grad_norm": 0.550400972366333,
|
27938 |
+
"learning_rate": 0.0009850337876481242,
|
27939 |
+
"loss": 4.9278,
|
27940 |
+
"step": 7946
|
27941 |
+
},
|
27942 |
+
{
|
27943 |
+
"epoch": 0.35338579876395004,
|
27944 |
+
"grad_norm": 0.7241742014884949,
|
27945 |
+
"learning_rate": 0.0009850252110862596,
|
27946 |
+
"loss": 4.8471,
|
27947 |
+
"step": 7948
|
27948 |
+
},
|
27949 |
+
{
|
27950 |
+
"epoch": 0.35347472322262236,
|
27951 |
+
"grad_norm": 1.1229575872421265,
|
27952 |
+
"learning_rate": 0.000985016632105009,
|
27953 |
+
"loss": 4.7441,
|
27954 |
+
"step": 7950
|
27955 |
+
},
|
27956 |
+
{
|
27957 |
+
"epoch": 0.3535636476812947,
|
27958 |
+
"grad_norm": 0.5880857110023499,
|
27959 |
+
"learning_rate": 0.0009850080507044147,
|
27960 |
+
"loss": 4.6106,
|
27961 |
+
"step": 7952
|
27962 |
+
},
|
27963 |
+
{
|
27964 |
+
"epoch": 0.3536525721399671,
|
27965 |
+
"grad_norm": 0.6695611476898193,
|
27966 |
+
"learning_rate": 0.0009849994668845196,
|
27967 |
+
"loss": 4.4359,
|
27968 |
+
"step": 7954
|
27969 |
+
},
|
27970 |
+
{
|
27971 |
+
"epoch": 0.35374149659863946,
|
27972 |
+
"grad_norm": 0.4951477646827698,
|
27973 |
+
"learning_rate": 0.000984990880645367,
|
27974 |
+
"loss": 4.3121,
|
27975 |
+
"step": 7956
|
27976 |
+
},
|
27977 |
+
{
|
27978 |
+
"epoch": 0.35383042105731183,
|
27979 |
+
"grad_norm": 1.49424147605896,
|
27980 |
+
"learning_rate": 0.0009849822919869994,
|
27981 |
+
"loss": 4.3655,
|
27982 |
+
"step": 7958
|
27983 |
+
},
|
27984 |
+
{
|
27985 |
+
"epoch": 0.3539193455159842,
|
27986 |
+
"grad_norm": 0.6731409430503845,
|
27987 |
+
"learning_rate": 0.0009849737009094595,
|
27988 |
+
"loss": 4.1954,
|
27989 |
+
"step": 7960
|
27990 |
+
},
|
27991 |
+
{
|
27992 |
+
"epoch": 0.3540082699746565,
|
27993 |
+
"grad_norm": 0.657111644744873,
|
27994 |
+
"learning_rate": 0.00098496510741279,
|
27995 |
+
"loss": 3.9788,
|
27996 |
+
"step": 7962
|
27997 |
+
},
|
27998 |
+
{
|
27999 |
+
"epoch": 0.3540971944333289,
|
28000 |
+
"grad_norm": 1.2278788089752197,
|
28001 |
+
"learning_rate": 0.0009849565114970346,
|
28002 |
+
"loss": 3.8825,
|
28003 |
+
"step": 7964
|
28004 |
+
},
|
28005 |
+
{
|
28006 |
+
"epoch": 0.35418611889200124,
|
28007 |
+
"grad_norm": 1.1738439798355103,
|
28008 |
+
"learning_rate": 0.0009849479131622352,
|
28009 |
+
"loss": 3.7363,
|
28010 |
+
"step": 7966
|
28011 |
+
},
|
28012 |
+
{
|
28013 |
+
"epoch": 0.3542750433506736,
|
28014 |
+
"grad_norm": 0.9582828283309937,
|
28015 |
+
"learning_rate": 0.000984939312408435,
|
28016 |
+
"loss": 3.5467,
|
28017 |
+
"step": 7968
|
28018 |
+
},
|
28019 |
+
{
|
28020 |
+
"epoch": 0.354363967809346,
|
28021 |
+
"grad_norm": 0.9599164724349976,
|
28022 |
+
"learning_rate": 0.0009849307092356773,
|
28023 |
+
"loss": 3.3789,
|
28024 |
+
"step": 7970
|
28025 |
+
},
|
28026 |
+
{
|
28027 |
+
"epoch": 0.3544528922680183,
|
28028 |
+
"grad_norm": 0.8798990249633789,
|
28029 |
+
"learning_rate": 0.0009849221036440049,
|
28030 |
+
"loss": 3.201,
|
28031 |
+
"step": 7972
|
28032 |
+
},
|
28033 |
+
{
|
28034 |
+
"epoch": 0.35454181672669066,
|
28035 |
+
"grad_norm": 0.837184488773346,
|
28036 |
+
"learning_rate": 0.0009849134956334603,
|
28037 |
+
"loss": 3.0279,
|
28038 |
+
"step": 7974
|
28039 |
+
},
|
28040 |
+
{
|
28041 |
+
"epoch": 0.35463074118536303,
|
28042 |
+
"grad_norm": 0.869229793548584,
|
28043 |
+
"learning_rate": 0.0009849048852040867,
|
28044 |
+
"loss": 2.9193,
|
28045 |
+
"step": 7976
|
28046 |
+
},
|
28047 |
+
{
|
28048 |
+
"epoch": 0.3547196656440354,
|
28049 |
+
"grad_norm": 1.0861220359802246,
|
28050 |
+
"learning_rate": 0.000984896272355927,
|
28051 |
+
"loss": 2.7289,
|
28052 |
+
"step": 7978
|
28053 |
+
},
|
28054 |
+
{
|
28055 |
+
"epoch": 0.35480859010270777,
|
28056 |
+
"grad_norm": 0.8610190749168396,
|
28057 |
+
"learning_rate": 0.000984887657089024,
|
28058 |
+
"loss": 2.5584,
|
28059 |
+
"step": 7980
|
28060 |
+
},
|
28061 |
+
{
|
28062 |
+
"epoch": 0.35489751456138013,
|
28063 |
+
"grad_norm": 0.6836897134780884,
|
28064 |
+
"learning_rate": 0.0009848790394034214,
|
28065 |
+
"loss": 2.4055,
|
28066 |
+
"step": 7982
|
28067 |
+
},
|
28068 |
+
{
|
28069 |
+
"epoch": 0.35498643902005245,
|
28070 |
+
"grad_norm": 0.7401456832885742,
|
28071 |
+
"learning_rate": 0.0009848704192991613,
|
28072 |
+
"loss": 2.2528,
|
28073 |
+
"step": 7984
|
28074 |
+
},
|
28075 |
+
{
|
28076 |
+
"epoch": 0.3550753634787248,
|
28077 |
+
"grad_norm": 0.9028481245040894,
|
28078 |
+
"learning_rate": 0.0009848617967762872,
|
28079 |
+
"loss": 2.3112,
|
28080 |
+
"step": 7986
|
28081 |
+
},
|
28082 |
+
{
|
28083 |
+
"epoch": 0.3551642879373972,
|
28084 |
+
"grad_norm": 0.3118695318698883,
|
28085 |
+
"learning_rate": 0.000984853171834842,
|
28086 |
+
"loss": 2.1296,
|
28087 |
+
"step": 7988
|
28088 |
+
},
|
28089 |
+
{
|
28090 |
+
"epoch": 0.35525321239606955,
|
28091 |
+
"grad_norm": 0.30334171652793884,
|
28092 |
+
"learning_rate": 0.0009848445444748685,
|
28093 |
+
"loss": 2.0048,
|
28094 |
+
"step": 7990
|
28095 |
+
},
|
28096 |
+
{
|
28097 |
+
"epoch": 0.3553421368547419,
|
28098 |
+
"grad_norm": 0.19824688136577606,
|
28099 |
+
"learning_rate": 0.0009848359146964103,
|
28100 |
+
"loss": 1.9316,
|
28101 |
+
"step": 7992
|
28102 |
+
},
|
28103 |
+
{
|
28104 |
+
"epoch": 0.35543106131341423,
|
28105 |
+
"grad_norm": 0.2128942757844925,
|
28106 |
+
"learning_rate": 0.0009848272824995096,
|
28107 |
+
"loss": 1.8792,
|
28108 |
+
"step": 7994
|
28109 |
+
},
|
28110 |
+
{
|
28111 |
+
"epoch": 0.3555199857720866,
|
28112 |
+
"grad_norm": 0.15974895656108856,
|
28113 |
+
"learning_rate": 0.0009848186478842104,
|
28114 |
+
"loss": 1.8306,
|
28115 |
+
"step": 7996
|
28116 |
+
},
|
28117 |
+
{
|
28118 |
+
"epoch": 0.35560891023075897,
|
28119 |
+
"grad_norm": 0.13069583475589752,
|
28120 |
+
"learning_rate": 0.000984810010850555,
|
28121 |
+
"loss": 1.7945,
|
28122 |
+
"step": 7998
|
28123 |
+
},
|
28124 |
+
{
|
28125 |
+
"epoch": 0.35569783468943134,
|
28126 |
+
"grad_norm": 0.13678595423698425,
|
28127 |
+
"learning_rate": 0.000984801371398587,
|
28128 |
+
"loss": 1.7696,
|
28129 |
+
"step": 8000
|
28130 |
+
},
|
28131 |
+
{
|
28132 |
+
"epoch": 0.35569783468943134,
|
28133 |
+
"eval_loss": 1.6807708740234375,
|
28134 |
+
"eval_runtime": 12.3592,
|
28135 |
+
"eval_samples_per_second": 559.097,
|
28136 |
+
"eval_steps_per_second": 69.907,
|
28137 |
+
"step": 8000
|
28138 |
}
|
28139 |
],
|
28140 |
"logging_steps": 2,
|
|
|
28154 |
"attributes": {}
|
28155 |
}
|
28156 |
},
|
28157 |
+
"total_flos": 1.711784985624576e+19,
|
28158 |
"train_batch_size": 768,
|
28159 |
"trial_name": null,
|
28160 |
"trial_params": null
|