Training in progress, step 9000, checkpoint
Browse files
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1856040378
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3168371b4d34bcda066e6f74a82bfec8b457ef9d00edf98c5480d2293d8f7e3
|
3 |
size 1856040378
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 928000378
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fd0ab7b3c558e973628879af87172e59cc108ce97049f2ac5663634d4e2a338
|
3 |
size 928000378
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3816ae240b6071ab7e92dda2e725d12e18bf87285ee93c87739a1a68be57ea7
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bff2269a00d40d32d61117bbc42ac1ebc05e60fa114002712151b20a202ac481
|
3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "model/chessformer-3/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -28135,6 +28135,3522 @@
|
|
28135 |
"eval_samples_per_second": 559.097,
|
28136 |
"eval_steps_per_second": 69.907,
|
28137 |
"step": 8000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28138 |
}
|
28139 |
],
|
28140 |
"logging_steps": 2,
|
@@ -28154,7 +31670,7 @@
|
|
28154 |
"attributes": {}
|
28155 |
}
|
28156 |
},
|
28157 |
-
"total_flos": 1.
|
28158 |
"train_batch_size": 768,
|
28159 |
"trial_name": null,
|
28160 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.51895010471344,
|
3 |
+
"best_model_checkpoint": "model/chessformer-3/checkpoint-9000",
|
4 |
+
"epoch": 0.4001600640256102,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 9000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
28135 |
"eval_samples_per_second": 559.097,
|
28136 |
"eval_steps_per_second": 69.907,
|
28137 |
"step": 8000
|
28138 |
+
},
|
28139 |
+
{
|
28140 |
+
"epoch": 0.3557867591481037,
|
28141 |
+
"grad_norm": 0.12552303075790405,
|
28142 |
+
"learning_rate": 0.0009847927295283491,
|
28143 |
+
"loss": 1.7432,
|
28144 |
+
"step": 8002
|
28145 |
+
},
|
28146 |
+
{
|
28147 |
+
"epoch": 0.355875683606776,
|
28148 |
+
"grad_norm": 0.12398079037666321,
|
28149 |
+
"learning_rate": 0.0009847840852398846,
|
28150 |
+
"loss": 1.7169,
|
28151 |
+
"step": 8004
|
28152 |
+
},
|
28153 |
+
{
|
28154 |
+
"epoch": 0.3559646080654484,
|
28155 |
+
"grad_norm": 0.11141873896121979,
|
28156 |
+
"learning_rate": 0.0009847754385332369,
|
28157 |
+
"loss": 1.6993,
|
28158 |
+
"step": 8006
|
28159 |
+
},
|
28160 |
+
{
|
28161 |
+
"epoch": 0.35605353252412075,
|
28162 |
+
"grad_norm": 0.1098860651254654,
|
28163 |
+
"learning_rate": 0.0009847667894084485,
|
28164 |
+
"loss": 1.6952,
|
28165 |
+
"step": 8008
|
28166 |
+
},
|
28167 |
+
{
|
28168 |
+
"epoch": 0.3561424569827931,
|
28169 |
+
"grad_norm": 0.10826461762189865,
|
28170 |
+
"learning_rate": 0.000984758137865563,
|
28171 |
+
"loss": 1.6817,
|
28172 |
+
"step": 8010
|
28173 |
+
},
|
28174 |
+
{
|
28175 |
+
"epoch": 0.3562313814414655,
|
28176 |
+
"grad_norm": 0.10351142287254333,
|
28177 |
+
"learning_rate": 0.0009847494839046234,
|
28178 |
+
"loss": 1.6737,
|
28179 |
+
"step": 8012
|
28180 |
+
},
|
28181 |
+
{
|
28182 |
+
"epoch": 0.35632030590013786,
|
28183 |
+
"grad_norm": 0.09795521944761276,
|
28184 |
+
"learning_rate": 0.000984740827525673,
|
28185 |
+
"loss": 1.6679,
|
28186 |
+
"step": 8014
|
28187 |
+
},
|
28188 |
+
{
|
28189 |
+
"epoch": 0.35640923035881017,
|
28190 |
+
"grad_norm": 0.0955873504281044,
|
28191 |
+
"learning_rate": 0.000984732168728755,
|
28192 |
+
"loss": 1.6526,
|
28193 |
+
"step": 8016
|
28194 |
+
},
|
28195 |
+
{
|
28196 |
+
"epoch": 0.35649815481748254,
|
28197 |
+
"grad_norm": 0.09694499522447586,
|
28198 |
+
"learning_rate": 0.0009847235075139123,
|
28199 |
+
"loss": 1.6522,
|
28200 |
+
"step": 8018
|
28201 |
+
},
|
28202 |
+
{
|
28203 |
+
"epoch": 0.3565870792761549,
|
28204 |
+
"grad_norm": 0.09016109257936478,
|
28205 |
+
"learning_rate": 0.0009847148438811883,
|
28206 |
+
"loss": 1.6439,
|
28207 |
+
"step": 8020
|
28208 |
+
},
|
28209 |
+
{
|
28210 |
+
"epoch": 0.3566760037348273,
|
28211 |
+
"grad_norm": 0.08890122175216675,
|
28212 |
+
"learning_rate": 0.0009847061778306262,
|
28213 |
+
"loss": 1.6345,
|
28214 |
+
"step": 8022
|
28215 |
+
},
|
28216 |
+
{
|
28217 |
+
"epoch": 0.35676492819349964,
|
28218 |
+
"grad_norm": 0.09018497169017792,
|
28219 |
+
"learning_rate": 0.0009846975093622692,
|
28220 |
+
"loss": 1.6403,
|
28221 |
+
"step": 8024
|
28222 |
+
},
|
28223 |
+
{
|
28224 |
+
"epoch": 0.35685385265217195,
|
28225 |
+
"grad_norm": 0.0861065536737442,
|
28226 |
+
"learning_rate": 0.0009846888384761607,
|
28227 |
+
"loss": 1.6304,
|
28228 |
+
"step": 8026
|
28229 |
+
},
|
28230 |
+
{
|
28231 |
+
"epoch": 0.3569427771108443,
|
28232 |
+
"grad_norm": 0.08618265390396118,
|
28233 |
+
"learning_rate": 0.0009846801651723436,
|
28234 |
+
"loss": 1.6258,
|
28235 |
+
"step": 8028
|
28236 |
+
},
|
28237 |
+
{
|
28238 |
+
"epoch": 0.3570317015695167,
|
28239 |
+
"grad_norm": 0.0936831384897232,
|
28240 |
+
"learning_rate": 0.0009846714894508617,
|
28241 |
+
"loss": 1.6244,
|
28242 |
+
"step": 8030
|
28243 |
+
},
|
28244 |
+
{
|
28245 |
+
"epoch": 0.35712062602818906,
|
28246 |
+
"grad_norm": 0.08654095232486725,
|
28247 |
+
"learning_rate": 0.0009846628113117577,
|
28248 |
+
"loss": 1.6206,
|
28249 |
+
"step": 8032
|
28250 |
+
},
|
28251 |
+
{
|
28252 |
+
"epoch": 0.3572095504868614,
|
28253 |
+
"grad_norm": 0.09224793314933777,
|
28254 |
+
"learning_rate": 0.0009846541307550753,
|
28255 |
+
"loss": 1.6189,
|
28256 |
+
"step": 8034
|
28257 |
+
},
|
28258 |
+
{
|
28259 |
+
"epoch": 0.3572984749455338,
|
28260 |
+
"grad_norm": 0.08937020599842072,
|
28261 |
+
"learning_rate": 0.0009846454477808575,
|
28262 |
+
"loss": 1.6198,
|
28263 |
+
"step": 8036
|
28264 |
+
},
|
28265 |
+
{
|
28266 |
+
"epoch": 0.3573873994042061,
|
28267 |
+
"grad_norm": 0.09124995023012161,
|
28268 |
+
"learning_rate": 0.0009846367623891478,
|
28269 |
+
"loss": 1.6108,
|
28270 |
+
"step": 8038
|
28271 |
+
},
|
28272 |
+
{
|
28273 |
+
"epoch": 0.3574763238628785,
|
28274 |
+
"grad_norm": 0.08880987763404846,
|
28275 |
+
"learning_rate": 0.0009846280745799895,
|
28276 |
+
"loss": 1.6126,
|
28277 |
+
"step": 8040
|
28278 |
+
},
|
28279 |
+
{
|
28280 |
+
"epoch": 0.35756524832155084,
|
28281 |
+
"grad_norm": 0.0875345841050148,
|
28282 |
+
"learning_rate": 0.000984619384353426,
|
28283 |
+
"loss": 1.6134,
|
28284 |
+
"step": 8042
|
28285 |
+
},
|
28286 |
+
{
|
28287 |
+
"epoch": 0.3576541727802232,
|
28288 |
+
"grad_norm": 0.08219029754400253,
|
28289 |
+
"learning_rate": 0.0009846106917095006,
|
28290 |
+
"loss": 1.6066,
|
28291 |
+
"step": 8044
|
28292 |
+
},
|
28293 |
+
{
|
28294 |
+
"epoch": 0.3577430972388956,
|
28295 |
+
"grad_norm": 0.08467015624046326,
|
28296 |
+
"learning_rate": 0.0009846019966482564,
|
28297 |
+
"loss": 1.6128,
|
28298 |
+
"step": 8046
|
28299 |
+
},
|
28300 |
+
{
|
28301 |
+
"epoch": 0.3578320216975679,
|
28302 |
+
"grad_norm": 0.08793745934963226,
|
28303 |
+
"learning_rate": 0.0009845932991697373,
|
28304 |
+
"loss": 1.6093,
|
28305 |
+
"step": 8048
|
28306 |
+
},
|
28307 |
+
{
|
28308 |
+
"epoch": 0.35792094615624026,
|
28309 |
+
"grad_norm": 0.08105975389480591,
|
28310 |
+
"learning_rate": 0.000984584599273986,
|
28311 |
+
"loss": 1.602,
|
28312 |
+
"step": 8050
|
28313 |
+
},
|
28314 |
+
{
|
28315 |
+
"epoch": 0.35800987061491263,
|
28316 |
+
"grad_norm": 0.08754699677228928,
|
28317 |
+
"learning_rate": 0.0009845758969610466,
|
28318 |
+
"loss": 1.596,
|
28319 |
+
"step": 8052
|
28320 |
+
},
|
28321 |
+
{
|
28322 |
+
"epoch": 0.358098795073585,
|
28323 |
+
"grad_norm": 0.08423270285129547,
|
28324 |
+
"learning_rate": 0.000984567192230962,
|
28325 |
+
"loss": 1.5994,
|
28326 |
+
"step": 8054
|
28327 |
+
},
|
28328 |
+
{
|
28329 |
+
"epoch": 0.35818771953225736,
|
28330 |
+
"grad_norm": 0.08522899448871613,
|
28331 |
+
"learning_rate": 0.000984558485083776,
|
28332 |
+
"loss": 1.6003,
|
28333 |
+
"step": 8056
|
28334 |
+
},
|
28335 |
+
{
|
28336 |
+
"epoch": 0.35827664399092973,
|
28337 |
+
"grad_norm": 0.08278319239616394,
|
28338 |
+
"learning_rate": 0.0009845497755195318,
|
28339 |
+
"loss": 1.6026,
|
28340 |
+
"step": 8058
|
28341 |
+
},
|
28342 |
+
{
|
28343 |
+
"epoch": 0.35836556844960205,
|
28344 |
+
"grad_norm": 0.08185625076293945,
|
28345 |
+
"learning_rate": 0.0009845410635382728,
|
28346 |
+
"loss": 1.5918,
|
28347 |
+
"step": 8060
|
28348 |
+
},
|
28349 |
+
{
|
28350 |
+
"epoch": 0.3584544929082744,
|
28351 |
+
"grad_norm": 0.08351411670446396,
|
28352 |
+
"learning_rate": 0.0009845323491400426,
|
28353 |
+
"loss": 1.6026,
|
28354 |
+
"step": 8062
|
28355 |
+
},
|
28356 |
+
{
|
28357 |
+
"epoch": 0.3585434173669468,
|
28358 |
+
"grad_norm": 0.08093686401844025,
|
28359 |
+
"learning_rate": 0.0009845236323248845,
|
28360 |
+
"loss": 1.6011,
|
28361 |
+
"step": 8064
|
28362 |
+
},
|
28363 |
+
{
|
28364 |
+
"epoch": 0.35863234182561915,
|
28365 |
+
"grad_norm": 0.08070489019155502,
|
28366 |
+
"learning_rate": 0.0009845149130928421,
|
28367 |
+
"loss": 1.5963,
|
28368 |
+
"step": 8066
|
28369 |
+
},
|
28370 |
+
{
|
28371 |
+
"epoch": 0.3587212662842915,
|
28372 |
+
"grad_norm": 0.08184243738651276,
|
28373 |
+
"learning_rate": 0.0009845061914439592,
|
28374 |
+
"loss": 1.5887,
|
28375 |
+
"step": 8068
|
28376 |
+
},
|
28377 |
+
{
|
28378 |
+
"epoch": 0.35881019074296383,
|
28379 |
+
"grad_norm": 0.08118681609630585,
|
28380 |
+
"learning_rate": 0.0009844974673782786,
|
28381 |
+
"loss": 1.5864,
|
28382 |
+
"step": 8070
|
28383 |
+
},
|
28384 |
+
{
|
28385 |
+
"epoch": 0.3588991152016362,
|
28386 |
+
"grad_norm": 0.08214083313941956,
|
28387 |
+
"learning_rate": 0.0009844887408958444,
|
28388 |
+
"loss": 1.5933,
|
28389 |
+
"step": 8072
|
28390 |
+
},
|
28391 |
+
{
|
28392 |
+
"epoch": 0.35898803966030857,
|
28393 |
+
"grad_norm": 0.07901991903781891,
|
28394 |
+
"learning_rate": 0.0009844800119967,
|
28395 |
+
"loss": 1.5918,
|
28396 |
+
"step": 8074
|
28397 |
+
},
|
28398 |
+
{
|
28399 |
+
"epoch": 0.35907696411898093,
|
28400 |
+
"grad_norm": 0.0798892080783844,
|
28401 |
+
"learning_rate": 0.000984471280680889,
|
28402 |
+
"loss": 1.5866,
|
28403 |
+
"step": 8076
|
28404 |
+
},
|
28405 |
+
{
|
28406 |
+
"epoch": 0.3591658885776533,
|
28407 |
+
"grad_norm": 0.08176968991756439,
|
28408 |
+
"learning_rate": 0.0009844625469484546,
|
28409 |
+
"loss": 1.5841,
|
28410 |
+
"step": 8078
|
28411 |
+
},
|
28412 |
+
{
|
28413 |
+
"epoch": 0.3592548130363256,
|
28414 |
+
"grad_norm": 0.0794360563158989,
|
28415 |
+
"learning_rate": 0.0009844538107994405,
|
28416 |
+
"loss": 1.589,
|
28417 |
+
"step": 8080
|
28418 |
+
},
|
28419 |
+
{
|
28420 |
+
"epoch": 0.359343737494998,
|
28421 |
+
"grad_norm": 0.08003082126379013,
|
28422 |
+
"learning_rate": 0.0009844450722338906,
|
28423 |
+
"loss": 1.5884,
|
28424 |
+
"step": 8082
|
28425 |
+
},
|
28426 |
+
{
|
28427 |
+
"epoch": 0.35943266195367035,
|
28428 |
+
"grad_norm": 0.08419719338417053,
|
28429 |
+
"learning_rate": 0.000984436331251848,
|
28430 |
+
"loss": 1.5887,
|
28431 |
+
"step": 8084
|
28432 |
+
},
|
28433 |
+
{
|
28434 |
+
"epoch": 0.3595215864123427,
|
28435 |
+
"grad_norm": 0.07977907359600067,
|
28436 |
+
"learning_rate": 0.0009844275878533569,
|
28437 |
+
"loss": 1.5834,
|
28438 |
+
"step": 8086
|
28439 |
+
},
|
28440 |
+
{
|
28441 |
+
"epoch": 0.3596105108710151,
|
28442 |
+
"grad_norm": 0.08345074206590652,
|
28443 |
+
"learning_rate": 0.0009844188420384604,
|
28444 |
+
"loss": 1.584,
|
28445 |
+
"step": 8088
|
28446 |
+
},
|
28447 |
+
{
|
28448 |
+
"epoch": 0.35969943532968746,
|
28449 |
+
"grad_norm": 0.08102136850357056,
|
28450 |
+
"learning_rate": 0.0009844100938072023,
|
28451 |
+
"loss": 1.5883,
|
28452 |
+
"step": 8090
|
28453 |
+
},
|
28454 |
+
{
|
28455 |
+
"epoch": 0.35978835978835977,
|
28456 |
+
"grad_norm": 0.08167117834091187,
|
28457 |
+
"learning_rate": 0.0009844013431596263,
|
28458 |
+
"loss": 1.5822,
|
28459 |
+
"step": 8092
|
28460 |
+
},
|
28461 |
+
{
|
28462 |
+
"epoch": 0.35987728424703214,
|
28463 |
+
"grad_norm": 0.0804857537150383,
|
28464 |
+
"learning_rate": 0.000984392590095776,
|
28465 |
+
"loss": 1.5835,
|
28466 |
+
"step": 8094
|
28467 |
+
},
|
28468 |
+
{
|
28469 |
+
"epoch": 0.3599662087057045,
|
28470 |
+
"grad_norm": 0.08060716092586517,
|
28471 |
+
"learning_rate": 0.000984383834615695,
|
28472 |
+
"loss": 1.5853,
|
28473 |
+
"step": 8096
|
28474 |
+
},
|
28475 |
+
{
|
28476 |
+
"epoch": 0.3600551331643769,
|
28477 |
+
"grad_norm": 0.08844839036464691,
|
28478 |
+
"learning_rate": 0.000984375076719427,
|
28479 |
+
"loss": 1.582,
|
28480 |
+
"step": 8098
|
28481 |
+
},
|
28482 |
+
{
|
28483 |
+
"epoch": 0.36014405762304924,
|
28484 |
+
"grad_norm": 0.07595451176166534,
|
28485 |
+
"learning_rate": 0.0009843663164070158,
|
28486 |
+
"loss": 1.5848,
|
28487 |
+
"step": 8100
|
28488 |
+
},
|
28489 |
+
{
|
28490 |
+
"epoch": 0.36023298208172155,
|
28491 |
+
"grad_norm": 0.08353257924318314,
|
28492 |
+
"learning_rate": 0.000984357553678505,
|
28493 |
+
"loss": 1.5796,
|
28494 |
+
"step": 8102
|
28495 |
+
},
|
28496 |
+
{
|
28497 |
+
"epoch": 0.3603219065403939,
|
28498 |
+
"grad_norm": 0.08055184781551361,
|
28499 |
+
"learning_rate": 0.0009843487885339384,
|
28500 |
+
"loss": 1.5814,
|
28501 |
+
"step": 8104
|
28502 |
+
},
|
28503 |
+
{
|
28504 |
+
"epoch": 0.3604108309990663,
|
28505 |
+
"grad_norm": 0.07868727296590805,
|
28506 |
+
"learning_rate": 0.0009843400209733593,
|
28507 |
+
"loss": 1.5861,
|
28508 |
+
"step": 8106
|
28509 |
+
},
|
28510 |
+
{
|
28511 |
+
"epoch": 0.36049975545773866,
|
28512 |
+
"grad_norm": 0.08025962114334106,
|
28513 |
+
"learning_rate": 0.000984331250996812,
|
28514 |
+
"loss": 1.5845,
|
28515 |
+
"step": 8108
|
28516 |
+
},
|
28517 |
+
{
|
28518 |
+
"epoch": 0.360588679916411,
|
28519 |
+
"grad_norm": 0.07819036394357681,
|
28520 |
+
"learning_rate": 0.0009843224786043401,
|
28521 |
+
"loss": 1.5807,
|
28522 |
+
"step": 8110
|
28523 |
+
},
|
28524 |
+
{
|
28525 |
+
"epoch": 0.3606776043750834,
|
28526 |
+
"grad_norm": 0.07715727388858795,
|
28527 |
+
"learning_rate": 0.0009843137037959873,
|
28528 |
+
"loss": 1.5842,
|
28529 |
+
"step": 8112
|
28530 |
+
},
|
28531 |
+
{
|
28532 |
+
"epoch": 0.3607665288337557,
|
28533 |
+
"grad_norm": 0.08035559952259064,
|
28534 |
+
"learning_rate": 0.0009843049265717975,
|
28535 |
+
"loss": 1.5746,
|
28536 |
+
"step": 8114
|
28537 |
+
},
|
28538 |
+
{
|
28539 |
+
"epoch": 0.3608554532924281,
|
28540 |
+
"grad_norm": 0.08042401075363159,
|
28541 |
+
"learning_rate": 0.000984296146931814,
|
28542 |
+
"loss": 1.5779,
|
28543 |
+
"step": 8116
|
28544 |
+
},
|
28545 |
+
{
|
28546 |
+
"epoch": 0.36094437775110044,
|
28547 |
+
"grad_norm": 0.0793127790093422,
|
28548 |
+
"learning_rate": 0.000984287364876081,
|
28549 |
+
"loss": 1.5806,
|
28550 |
+
"step": 8118
|
28551 |
+
},
|
28552 |
+
{
|
28553 |
+
"epoch": 0.3610333022097728,
|
28554 |
+
"grad_norm": 0.07924724370241165,
|
28555 |
+
"learning_rate": 0.0009842785804046424,
|
28556 |
+
"loss": 1.5812,
|
28557 |
+
"step": 8120
|
28558 |
+
},
|
28559 |
+
{
|
28560 |
+
"epoch": 0.3611222266684452,
|
28561 |
+
"grad_norm": 0.08291906118392944,
|
28562 |
+
"learning_rate": 0.0009842697935175417,
|
28563 |
+
"loss": 1.5734,
|
28564 |
+
"step": 8122
|
28565 |
+
},
|
28566 |
+
{
|
28567 |
+
"epoch": 0.3612111511271175,
|
28568 |
+
"grad_norm": 0.08052452653646469,
|
28569 |
+
"learning_rate": 0.0009842610042148229,
|
28570 |
+
"loss": 1.575,
|
28571 |
+
"step": 8124
|
28572 |
+
},
|
28573 |
+
{
|
28574 |
+
"epoch": 0.36130007558578986,
|
28575 |
+
"grad_norm": 0.07844741642475128,
|
28576 |
+
"learning_rate": 0.0009842522124965298,
|
28577 |
+
"loss": 1.5802,
|
28578 |
+
"step": 8126
|
28579 |
+
},
|
28580 |
+
{
|
28581 |
+
"epoch": 0.3613890000444622,
|
28582 |
+
"grad_norm": 0.08405445516109467,
|
28583 |
+
"learning_rate": 0.0009842434183627063,
|
28584 |
+
"loss": 1.571,
|
28585 |
+
"step": 8128
|
28586 |
+
},
|
28587 |
+
{
|
28588 |
+
"epoch": 0.3614779245031346,
|
28589 |
+
"grad_norm": 0.07999279350042343,
|
28590 |
+
"learning_rate": 0.0009842346218133964,
|
28591 |
+
"loss": 1.5703,
|
28592 |
+
"step": 8130
|
28593 |
+
},
|
28594 |
+
{
|
28595 |
+
"epoch": 0.36156684896180696,
|
28596 |
+
"grad_norm": 0.08422758430242538,
|
28597 |
+
"learning_rate": 0.0009842258228486436,
|
28598 |
+
"loss": 1.5774,
|
28599 |
+
"step": 8132
|
28600 |
+
},
|
28601 |
+
{
|
28602 |
+
"epoch": 0.3616557734204793,
|
28603 |
+
"grad_norm": 0.07723760604858398,
|
28604 |
+
"learning_rate": 0.0009842170214684923,
|
28605 |
+
"loss": 1.5756,
|
28606 |
+
"step": 8134
|
28607 |
+
},
|
28608 |
+
{
|
28609 |
+
"epoch": 0.36174469787915164,
|
28610 |
+
"grad_norm": 0.07770794630050659,
|
28611 |
+
"learning_rate": 0.0009842082176729858,
|
28612 |
+
"loss": 1.5674,
|
28613 |
+
"step": 8136
|
28614 |
+
},
|
28615 |
+
{
|
28616 |
+
"epoch": 0.361833622337824,
|
28617 |
+
"grad_norm": 0.07809870690107346,
|
28618 |
+
"learning_rate": 0.0009841994114621686,
|
28619 |
+
"loss": 1.5741,
|
28620 |
+
"step": 8138
|
28621 |
+
},
|
28622 |
+
{
|
28623 |
+
"epoch": 0.3619225467964964,
|
28624 |
+
"grad_norm": 0.08160432428121567,
|
28625 |
+
"learning_rate": 0.0009841906028360842,
|
28626 |
+
"loss": 1.5726,
|
28627 |
+
"step": 8140
|
28628 |
+
},
|
28629 |
+
{
|
28630 |
+
"epoch": 0.36201147125516875,
|
28631 |
+
"grad_norm": 0.08070631325244904,
|
28632 |
+
"learning_rate": 0.0009841817917947767,
|
28633 |
+
"loss": 1.5736,
|
28634 |
+
"step": 8142
|
28635 |
+
},
|
28636 |
+
{
|
28637 |
+
"epoch": 0.3621003957138411,
|
28638 |
+
"grad_norm": 0.07993360608816147,
|
28639 |
+
"learning_rate": 0.0009841729783382903,
|
28640 |
+
"loss": 1.5757,
|
28641 |
+
"step": 8144
|
28642 |
+
},
|
28643 |
+
{
|
28644 |
+
"epoch": 0.36218932017251343,
|
28645 |
+
"grad_norm": 0.07653278112411499,
|
28646 |
+
"learning_rate": 0.0009841641624666684,
|
28647 |
+
"loss": 1.5737,
|
28648 |
+
"step": 8146
|
28649 |
+
},
|
28650 |
+
{
|
28651 |
+
"epoch": 0.3622782446311858,
|
28652 |
+
"grad_norm": 0.07840976864099503,
|
28653 |
+
"learning_rate": 0.0009841553441799554,
|
28654 |
+
"loss": 1.5702,
|
28655 |
+
"step": 8148
|
28656 |
+
},
|
28657 |
+
{
|
28658 |
+
"epoch": 0.36236716908985817,
|
28659 |
+
"grad_norm": 0.07674574106931686,
|
28660 |
+
"learning_rate": 0.0009841465234781952,
|
28661 |
+
"loss": 1.5761,
|
28662 |
+
"step": 8150
|
28663 |
+
},
|
28664 |
+
{
|
28665 |
+
"epoch": 0.36245609354853053,
|
28666 |
+
"grad_norm": 0.08122869580984116,
|
28667 |
+
"learning_rate": 0.0009841377003614318,
|
28668 |
+
"loss": 1.5696,
|
28669 |
+
"step": 8152
|
28670 |
+
},
|
28671 |
+
{
|
28672 |
+
"epoch": 0.3625450180072029,
|
28673 |
+
"grad_norm": 0.07849126309156418,
|
28674 |
+
"learning_rate": 0.0009841288748297092,
|
28675 |
+
"loss": 1.5692,
|
28676 |
+
"step": 8154
|
28677 |
+
},
|
28678 |
+
{
|
28679 |
+
"epoch": 0.3626339424658752,
|
28680 |
+
"grad_norm": 0.0775504857301712,
|
28681 |
+
"learning_rate": 0.0009841200468830714,
|
28682 |
+
"loss": 1.5748,
|
28683 |
+
"step": 8156
|
28684 |
+
},
|
28685 |
+
{
|
28686 |
+
"epoch": 0.3627228669245476,
|
28687 |
+
"grad_norm": 0.07808183133602142,
|
28688 |
+
"learning_rate": 0.0009841112165215624,
|
28689 |
+
"loss": 1.5702,
|
28690 |
+
"step": 8158
|
28691 |
+
},
|
28692 |
+
{
|
28693 |
+
"epoch": 0.36281179138321995,
|
28694 |
+
"grad_norm": 0.0813973993062973,
|
28695 |
+
"learning_rate": 0.0009841023837452263,
|
28696 |
+
"loss": 1.5721,
|
28697 |
+
"step": 8160
|
28698 |
+
},
|
28699 |
+
{
|
28700 |
+
"epoch": 0.3629007158418923,
|
28701 |
+
"grad_norm": 0.07763282209634781,
|
28702 |
+
"learning_rate": 0.0009840935485541072,
|
28703 |
+
"loss": 1.5666,
|
28704 |
+
"step": 8162
|
28705 |
+
},
|
28706 |
+
{
|
28707 |
+
"epoch": 0.3629896403005647,
|
28708 |
+
"grad_norm": 0.07707773894071579,
|
28709 |
+
"learning_rate": 0.000984084710948249,
|
28710 |
+
"loss": 1.5699,
|
28711 |
+
"step": 8164
|
28712 |
+
},
|
28713 |
+
{
|
28714 |
+
"epoch": 0.36307856475923705,
|
28715 |
+
"grad_norm": 0.07617053389549255,
|
28716 |
+
"learning_rate": 0.0009840758709276961,
|
28717 |
+
"loss": 1.5731,
|
28718 |
+
"step": 8166
|
28719 |
+
},
|
28720 |
+
{
|
28721 |
+
"epoch": 0.36316748921790937,
|
28722 |
+
"grad_norm": 0.07609335333108902,
|
28723 |
+
"learning_rate": 0.0009840670284924921,
|
28724 |
+
"loss": 1.5734,
|
28725 |
+
"step": 8168
|
28726 |
+
},
|
28727 |
+
{
|
28728 |
+
"epoch": 0.36325641367658174,
|
28729 |
+
"grad_norm": 0.07503112405538559,
|
28730 |
+
"learning_rate": 0.0009840581836426817,
|
28731 |
+
"loss": 1.5689,
|
28732 |
+
"step": 8170
|
28733 |
+
},
|
28734 |
+
{
|
28735 |
+
"epoch": 0.3633453381352541,
|
28736 |
+
"grad_norm": 0.07643923163414001,
|
28737 |
+
"learning_rate": 0.0009840493363783085,
|
28738 |
+
"loss": 1.5717,
|
28739 |
+
"step": 8172
|
28740 |
+
},
|
28741 |
+
{
|
28742 |
+
"epoch": 0.36343426259392647,
|
28743 |
+
"grad_norm": 0.07574044167995453,
|
28744 |
+
"learning_rate": 0.0009840404866994171,
|
28745 |
+
"loss": 1.569,
|
28746 |
+
"step": 8174
|
28747 |
+
},
|
28748 |
+
{
|
28749 |
+
"epoch": 0.36352318705259884,
|
28750 |
+
"grad_norm": 0.07417095452547073,
|
28751 |
+
"learning_rate": 0.0009840316346060513,
|
28752 |
+
"loss": 1.569,
|
28753 |
+
"step": 8176
|
28754 |
+
},
|
28755 |
+
{
|
28756 |
+
"epoch": 0.36361211151127115,
|
28757 |
+
"grad_norm": 0.07547099888324738,
|
28758 |
+
"learning_rate": 0.0009840227800982553,
|
28759 |
+
"loss": 1.5751,
|
28760 |
+
"step": 8178
|
28761 |
+
},
|
28762 |
+
{
|
28763 |
+
"epoch": 0.3637010359699435,
|
28764 |
+
"grad_norm": 0.07645308971405029,
|
28765 |
+
"learning_rate": 0.0009840139231760735,
|
28766 |
+
"loss": 1.5648,
|
28767 |
+
"step": 8180
|
28768 |
+
},
|
28769 |
+
{
|
28770 |
+
"epoch": 0.3637899604286159,
|
28771 |
+
"grad_norm": 0.07473346590995789,
|
28772 |
+
"learning_rate": 0.0009840050638395498,
|
28773 |
+
"loss": 1.5661,
|
28774 |
+
"step": 8182
|
28775 |
+
},
|
28776 |
+
{
|
28777 |
+
"epoch": 0.36387888488728826,
|
28778 |
+
"grad_norm": 0.07807636260986328,
|
28779 |
+
"learning_rate": 0.0009839962020887286,
|
28780 |
+
"loss": 1.5724,
|
28781 |
+
"step": 8184
|
28782 |
+
},
|
28783 |
+
{
|
28784 |
+
"epoch": 0.3639678093459606,
|
28785 |
+
"grad_norm": 0.07300268113613129,
|
28786 |
+
"learning_rate": 0.0009839873379236537,
|
28787 |
+
"loss": 1.571,
|
28788 |
+
"step": 8186
|
28789 |
+
},
|
28790 |
+
{
|
28791 |
+
"epoch": 0.36405673380463294,
|
28792 |
+
"grad_norm": 0.07703215628862381,
|
28793 |
+
"learning_rate": 0.00098397847134437,
|
28794 |
+
"loss": 1.5722,
|
28795 |
+
"step": 8188
|
28796 |
+
},
|
28797 |
+
{
|
28798 |
+
"epoch": 0.3641456582633053,
|
28799 |
+
"grad_norm": 0.07669734954833984,
|
28800 |
+
"learning_rate": 0.000983969602350921,
|
28801 |
+
"loss": 1.5676,
|
28802 |
+
"step": 8190
|
28803 |
+
},
|
28804 |
+
{
|
28805 |
+
"epoch": 0.3642345827219777,
|
28806 |
+
"grad_norm": 0.07603123784065247,
|
28807 |
+
"learning_rate": 0.0009839607309433515,
|
28808 |
+
"loss": 1.5688,
|
28809 |
+
"step": 8192
|
28810 |
+
},
|
28811 |
+
{
|
28812 |
+
"epoch": 0.36432350718065004,
|
28813 |
+
"grad_norm": 0.07759499549865723,
|
28814 |
+
"learning_rate": 0.0009839518571217055,
|
28815 |
+
"loss": 1.5643,
|
28816 |
+
"step": 8194
|
28817 |
+
},
|
28818 |
+
{
|
28819 |
+
"epoch": 0.3644124316393224,
|
28820 |
+
"grad_norm": 0.07855424284934998,
|
28821 |
+
"learning_rate": 0.0009839429808860274,
|
28822 |
+
"loss": 1.5641,
|
28823 |
+
"step": 8196
|
28824 |
+
},
|
28825 |
+
{
|
28826 |
+
"epoch": 0.3645013560979948,
|
28827 |
+
"grad_norm": 0.07806675881147385,
|
28828 |
+
"learning_rate": 0.0009839341022363612,
|
28829 |
+
"loss": 1.572,
|
28830 |
+
"step": 8198
|
28831 |
+
},
|
28832 |
+
{
|
28833 |
+
"epoch": 0.3645902805566671,
|
28834 |
+
"grad_norm": 0.08267048001289368,
|
28835 |
+
"learning_rate": 0.0009839252211727514,
|
28836 |
+
"loss": 1.5711,
|
28837 |
+
"step": 8200
|
28838 |
+
},
|
28839 |
+
{
|
28840 |
+
"epoch": 0.36467920501533946,
|
28841 |
+
"grad_norm": 0.07525893300771713,
|
28842 |
+
"learning_rate": 0.0009839163376952425,
|
28843 |
+
"loss": 1.5687,
|
28844 |
+
"step": 8202
|
28845 |
+
},
|
28846 |
+
{
|
28847 |
+
"epoch": 0.3647681294740118,
|
28848 |
+
"grad_norm": 0.07745590060949326,
|
28849 |
+
"learning_rate": 0.0009839074518038784,
|
28850 |
+
"loss": 1.5629,
|
28851 |
+
"step": 8204
|
28852 |
+
},
|
28853 |
+
{
|
28854 |
+
"epoch": 0.3648570539326842,
|
28855 |
+
"grad_norm": 0.07969274371862411,
|
28856 |
+
"learning_rate": 0.0009838985634987036,
|
28857 |
+
"loss": 1.5702,
|
28858 |
+
"step": 8206
|
28859 |
+
},
|
28860 |
+
{
|
28861 |
+
"epoch": 0.36494597839135656,
|
28862 |
+
"grad_norm": 0.0816626101732254,
|
28863 |
+
"learning_rate": 0.0009838896727797624,
|
28864 |
+
"loss": 1.5599,
|
28865 |
+
"step": 8208
|
28866 |
+
},
|
28867 |
+
{
|
28868 |
+
"epoch": 0.3650349028500289,
|
28869 |
+
"grad_norm": 0.07653602957725525,
|
28870 |
+
"learning_rate": 0.0009838807796470992,
|
28871 |
+
"loss": 1.5618,
|
28872 |
+
"step": 8210
|
28873 |
+
},
|
28874 |
+
{
|
28875 |
+
"epoch": 0.36512382730870124,
|
28876 |
+
"grad_norm": 0.08516646176576614,
|
28877 |
+
"learning_rate": 0.0009838718841007586,
|
28878 |
+
"loss": 1.5642,
|
28879 |
+
"step": 8212
|
28880 |
+
},
|
28881 |
+
{
|
28882 |
+
"epoch": 0.3652127517673736,
|
28883 |
+
"grad_norm": 0.07569476962089539,
|
28884 |
+
"learning_rate": 0.0009838629861407843,
|
28885 |
+
"loss": 1.5597,
|
28886 |
+
"step": 8214
|
28887 |
+
},
|
28888 |
+
{
|
28889 |
+
"epoch": 0.365301676226046,
|
28890 |
+
"grad_norm": 0.07699557393789291,
|
28891 |
+
"learning_rate": 0.0009838540857672214,
|
28892 |
+
"loss": 1.5649,
|
28893 |
+
"step": 8216
|
28894 |
+
},
|
28895 |
+
{
|
28896 |
+
"epoch": 0.36539060068471835,
|
28897 |
+
"grad_norm": 0.07943487912416458,
|
28898 |
+
"learning_rate": 0.000983845182980114,
|
28899 |
+
"loss": 1.5606,
|
28900 |
+
"step": 8218
|
28901 |
+
},
|
28902 |
+
{
|
28903 |
+
"epoch": 0.3654795251433907,
|
28904 |
+
"grad_norm": 0.07496616989374161,
|
28905 |
+
"learning_rate": 0.0009838362777795065,
|
28906 |
+
"loss": 1.5649,
|
28907 |
+
"step": 8220
|
28908 |
+
},
|
28909 |
+
{
|
28910 |
+
"epoch": 0.36556844960206303,
|
28911 |
+
"grad_norm": 0.07536393404006958,
|
28912 |
+
"learning_rate": 0.0009838273701654433,
|
28913 |
+
"loss": 1.5696,
|
28914 |
+
"step": 8222
|
28915 |
+
},
|
28916 |
+
{
|
28917 |
+
"epoch": 0.3656573740607354,
|
28918 |
+
"grad_norm": 0.07574569433927536,
|
28919 |
+
"learning_rate": 0.0009838184601379688,
|
28920 |
+
"loss": 1.5623,
|
28921 |
+
"step": 8224
|
28922 |
+
},
|
28923 |
+
{
|
28924 |
+
"epoch": 0.36574629851940776,
|
28925 |
+
"grad_norm": 0.07682286202907562,
|
28926 |
+
"learning_rate": 0.0009838095476971274,
|
28927 |
+
"loss": 1.5676,
|
28928 |
+
"step": 8226
|
28929 |
+
},
|
28930 |
+
{
|
28931 |
+
"epoch": 0.36583522297808013,
|
28932 |
+
"grad_norm": 0.08097031712532043,
|
28933 |
+
"learning_rate": 0.000983800632842964,
|
28934 |
+
"loss": 1.5641,
|
28935 |
+
"step": 8228
|
28936 |
+
},
|
28937 |
+
{
|
28938 |
+
"epoch": 0.3659241474367525,
|
28939 |
+
"grad_norm": 0.07592246681451797,
|
28940 |
+
"learning_rate": 0.0009837917155755226,
|
28941 |
+
"loss": 1.5562,
|
28942 |
+
"step": 8230
|
28943 |
+
},
|
28944 |
+
{
|
28945 |
+
"epoch": 0.3660130718954248,
|
28946 |
+
"grad_norm": 0.07768161594867706,
|
28947 |
+
"learning_rate": 0.0009837827958948477,
|
28948 |
+
"loss": 1.5638,
|
28949 |
+
"step": 8232
|
28950 |
+
},
|
28951 |
+
{
|
28952 |
+
"epoch": 0.3661019963540972,
|
28953 |
+
"grad_norm": 0.07560420036315918,
|
28954 |
+
"learning_rate": 0.0009837738738009841,
|
28955 |
+
"loss": 1.5625,
|
28956 |
+
"step": 8234
|
28957 |
+
},
|
28958 |
+
{
|
28959 |
+
"epoch": 0.36619092081276955,
|
28960 |
+
"grad_norm": 0.07644996047019958,
|
28961 |
+
"learning_rate": 0.000983764949293976,
|
28962 |
+
"loss": 1.5602,
|
28963 |
+
"step": 8236
|
28964 |
+
},
|
28965 |
+
{
|
28966 |
+
"epoch": 0.3662798452714419,
|
28967 |
+
"grad_norm": 0.07451751828193665,
|
28968 |
+
"learning_rate": 0.0009837560223738679,
|
28969 |
+
"loss": 1.5608,
|
28970 |
+
"step": 8238
|
28971 |
+
},
|
28972 |
+
{
|
28973 |
+
"epoch": 0.3663687697301143,
|
28974 |
+
"grad_norm": 0.0761575847864151,
|
28975 |
+
"learning_rate": 0.0009837470930407046,
|
28976 |
+
"loss": 1.5542,
|
28977 |
+
"step": 8240
|
28978 |
+
},
|
28979 |
+
{
|
28980 |
+
"epoch": 0.36645769418878665,
|
28981 |
+
"grad_norm": 0.0770341232419014,
|
28982 |
+
"learning_rate": 0.0009837381612945305,
|
28983 |
+
"loss": 1.5584,
|
28984 |
+
"step": 8242
|
28985 |
+
},
|
28986 |
+
{
|
28987 |
+
"epoch": 0.36654661864745897,
|
28988 |
+
"grad_norm": 0.07663694024085999,
|
28989 |
+
"learning_rate": 0.0009837292271353902,
|
28990 |
+
"loss": 1.5646,
|
28991 |
+
"step": 8244
|
28992 |
+
},
|
28993 |
+
{
|
28994 |
+
"epoch": 0.36663554310613133,
|
28995 |
+
"grad_norm": 0.0755605474114418,
|
28996 |
+
"learning_rate": 0.0009837202905633282,
|
28997 |
+
"loss": 1.5621,
|
28998 |
+
"step": 8246
|
28999 |
+
},
|
29000 |
+
{
|
29001 |
+
"epoch": 0.3667244675648037,
|
29002 |
+
"grad_norm": 0.07471001893281937,
|
29003 |
+
"learning_rate": 0.0009837113515783892,
|
29004 |
+
"loss": 1.5641,
|
29005 |
+
"step": 8248
|
29006 |
+
},
|
29007 |
+
{
|
29008 |
+
"epoch": 0.36681339202347607,
|
29009 |
+
"grad_norm": 0.07854770869016647,
|
29010 |
+
"learning_rate": 0.0009837024101806173,
|
29011 |
+
"loss": 1.5628,
|
29012 |
+
"step": 8250
|
29013 |
+
},
|
29014 |
+
{
|
29015 |
+
"epoch": 0.36690231648214844,
|
29016 |
+
"grad_norm": 0.07697580009698868,
|
29017 |
+
"learning_rate": 0.000983693466370058,
|
29018 |
+
"loss": 1.561,
|
29019 |
+
"step": 8252
|
29020 |
+
},
|
29021 |
+
{
|
29022 |
+
"epoch": 0.36699124094082075,
|
29023 |
+
"grad_norm": 0.07174623012542725,
|
29024 |
+
"learning_rate": 0.000983684520146755,
|
29025 |
+
"loss": 1.5571,
|
29026 |
+
"step": 8254
|
29027 |
+
},
|
29028 |
+
{
|
29029 |
+
"epoch": 0.3670801653994931,
|
29030 |
+
"grad_norm": 0.0765925943851471,
|
29031 |
+
"learning_rate": 0.0009836755715107535,
|
29032 |
+
"loss": 1.5634,
|
29033 |
+
"step": 8256
|
29034 |
+
},
|
29035 |
+
{
|
29036 |
+
"epoch": 0.3671690898581655,
|
29037 |
+
"grad_norm": 0.07561610639095306,
|
29038 |
+
"learning_rate": 0.000983666620462098,
|
29039 |
+
"loss": 1.5588,
|
29040 |
+
"step": 8258
|
29041 |
+
},
|
29042 |
+
{
|
29043 |
+
"epoch": 0.36725801431683786,
|
29044 |
+
"grad_norm": 0.07434527575969696,
|
29045 |
+
"learning_rate": 0.000983657667000833,
|
29046 |
+
"loss": 1.5617,
|
29047 |
+
"step": 8260
|
29048 |
+
},
|
29049 |
+
{
|
29050 |
+
"epoch": 0.3673469387755102,
|
29051 |
+
"grad_norm": 0.07405678927898407,
|
29052 |
+
"learning_rate": 0.0009836487111270034,
|
29053 |
+
"loss": 1.5646,
|
29054 |
+
"step": 8262
|
29055 |
+
},
|
29056 |
+
{
|
29057 |
+
"epoch": 0.36743586323418254,
|
29058 |
+
"grad_norm": 0.07639020681381226,
|
29059 |
+
"learning_rate": 0.0009836397528406534,
|
29060 |
+
"loss": 1.5586,
|
29061 |
+
"step": 8264
|
29062 |
+
},
|
29063 |
+
{
|
29064 |
+
"epoch": 0.3675247876928549,
|
29065 |
+
"grad_norm": 0.07487154006958008,
|
29066 |
+
"learning_rate": 0.0009836307921418284,
|
29067 |
+
"loss": 1.5547,
|
29068 |
+
"step": 8266
|
29069 |
+
},
|
29070 |
+
{
|
29071 |
+
"epoch": 0.36761371215152727,
|
29072 |
+
"grad_norm": 0.07496780157089233,
|
29073 |
+
"learning_rate": 0.0009836218290305728,
|
29074 |
+
"loss": 1.5611,
|
29075 |
+
"step": 8268
|
29076 |
+
},
|
29077 |
+
{
|
29078 |
+
"epoch": 0.36770263661019964,
|
29079 |
+
"grad_norm": 0.07398813217878342,
|
29080 |
+
"learning_rate": 0.000983612863506931,
|
29081 |
+
"loss": 1.558,
|
29082 |
+
"step": 8270
|
29083 |
+
},
|
29084 |
+
{
|
29085 |
+
"epoch": 0.367791561068872,
|
29086 |
+
"grad_norm": 0.07573290914297104,
|
29087 |
+
"learning_rate": 0.000983603895570948,
|
29088 |
+
"loss": 1.5595,
|
29089 |
+
"step": 8272
|
29090 |
+
},
|
29091 |
+
{
|
29092 |
+
"epoch": 0.3678804855275444,
|
29093 |
+
"grad_norm": 0.07165679335594177,
|
29094 |
+
"learning_rate": 0.0009835949252226686,
|
29095 |
+
"loss": 1.5649,
|
29096 |
+
"step": 8274
|
29097 |
+
},
|
29098 |
+
{
|
29099 |
+
"epoch": 0.3679694099862167,
|
29100 |
+
"grad_norm": 0.07558693736791611,
|
29101 |
+
"learning_rate": 0.0009835859524621373,
|
29102 |
+
"loss": 1.5568,
|
29103 |
+
"step": 8276
|
29104 |
+
},
|
29105 |
+
{
|
29106 |
+
"epoch": 0.36805833444488906,
|
29107 |
+
"grad_norm": 0.0720970556139946,
|
29108 |
+
"learning_rate": 0.000983576977289399,
|
29109 |
+
"loss": 1.5583,
|
29110 |
+
"step": 8278
|
29111 |
+
},
|
29112 |
+
{
|
29113 |
+
"epoch": 0.3681472589035614,
|
29114 |
+
"grad_norm": 0.07370097190141678,
|
29115 |
+
"learning_rate": 0.0009835679997044985,
|
29116 |
+
"loss": 1.5612,
|
29117 |
+
"step": 8280
|
29118 |
+
},
|
29119 |
+
{
|
29120 |
+
"epoch": 0.3682361833622338,
|
29121 |
+
"grad_norm": 0.07401144504547119,
|
29122 |
+
"learning_rate": 0.0009835590197074806,
|
29123 |
+
"loss": 1.5596,
|
29124 |
+
"step": 8282
|
29125 |
+
},
|
29126 |
+
{
|
29127 |
+
"epoch": 0.36832510782090616,
|
29128 |
+
"grad_norm": 0.07251115888357162,
|
29129 |
+
"learning_rate": 0.0009835500372983902,
|
29130 |
+
"loss": 1.5569,
|
29131 |
+
"step": 8284
|
29132 |
+
},
|
29133 |
+
{
|
29134 |
+
"epoch": 0.3684140322795785,
|
29135 |
+
"grad_norm": 0.07301607728004456,
|
29136 |
+
"learning_rate": 0.0009835410524772718,
|
29137 |
+
"loss": 1.5591,
|
29138 |
+
"step": 8286
|
29139 |
+
},
|
29140 |
+
{
|
29141 |
+
"epoch": 0.36850295673825084,
|
29142 |
+
"grad_norm": 0.07856374233961105,
|
29143 |
+
"learning_rate": 0.0009835320652441702,
|
29144 |
+
"loss": 1.5658,
|
29145 |
+
"step": 8288
|
29146 |
+
},
|
29147 |
+
{
|
29148 |
+
"epoch": 0.3685918811969232,
|
29149 |
+
"grad_norm": 0.07428453117609024,
|
29150 |
+
"learning_rate": 0.0009835230755991305,
|
29151 |
+
"loss": 1.5631,
|
29152 |
+
"step": 8290
|
29153 |
+
},
|
29154 |
+
{
|
29155 |
+
"epoch": 0.3686808056555956,
|
29156 |
+
"grad_norm": 0.07632127404212952,
|
29157 |
+
"learning_rate": 0.0009835140835421977,
|
29158 |
+
"loss": 1.5659,
|
29159 |
+
"step": 8292
|
29160 |
+
},
|
29161 |
+
{
|
29162 |
+
"epoch": 0.36876973011426795,
|
29163 |
+
"grad_norm": 0.07355497777462006,
|
29164 |
+
"learning_rate": 0.0009835050890734161,
|
29165 |
+
"loss": 1.5582,
|
29166 |
+
"step": 8294
|
29167 |
+
},
|
29168 |
+
{
|
29169 |
+
"epoch": 0.3688586545729403,
|
29170 |
+
"grad_norm": 0.07494645565748215,
|
29171 |
+
"learning_rate": 0.0009834960921928308,
|
29172 |
+
"loss": 1.5584,
|
29173 |
+
"step": 8296
|
29174 |
+
},
|
29175 |
+
{
|
29176 |
+
"epoch": 0.3689475790316126,
|
29177 |
+
"grad_norm": 0.07737577706575394,
|
29178 |
+
"learning_rate": 0.000983487092900487,
|
29179 |
+
"loss": 1.5562,
|
29180 |
+
"step": 8298
|
29181 |
+
},
|
29182 |
+
{
|
29183 |
+
"epoch": 0.369036503490285,
|
29184 |
+
"grad_norm": 0.07639828324317932,
|
29185 |
+
"learning_rate": 0.000983478091196429,
|
29186 |
+
"loss": 1.5646,
|
29187 |
+
"step": 8300
|
29188 |
+
},
|
29189 |
+
{
|
29190 |
+
"epoch": 0.36912542794895736,
|
29191 |
+
"grad_norm": 0.07329989224672318,
|
29192 |
+
"learning_rate": 0.0009834690870807023,
|
29193 |
+
"loss": 1.5579,
|
29194 |
+
"step": 8302
|
29195 |
+
},
|
29196 |
+
{
|
29197 |
+
"epoch": 0.36921435240762973,
|
29198 |
+
"grad_norm": 0.07461026310920715,
|
29199 |
+
"learning_rate": 0.0009834600805533516,
|
29200 |
+
"loss": 1.5522,
|
29201 |
+
"step": 8304
|
29202 |
+
},
|
29203 |
+
{
|
29204 |
+
"epoch": 0.3693032768663021,
|
29205 |
+
"grad_norm": 0.07623513787984848,
|
29206 |
+
"learning_rate": 0.0009834510716144214,
|
29207 |
+
"loss": 1.5643,
|
29208 |
+
"step": 8306
|
29209 |
+
},
|
29210 |
+
{
|
29211 |
+
"epoch": 0.3693922013249744,
|
29212 |
+
"grad_norm": 0.07590184360742569,
|
29213 |
+
"learning_rate": 0.0009834420602639574,
|
29214 |
+
"loss": 1.5595,
|
29215 |
+
"step": 8308
|
29216 |
+
},
|
29217 |
+
{
|
29218 |
+
"epoch": 0.3694811257836468,
|
29219 |
+
"grad_norm": 0.07395032793283463,
|
29220 |
+
"learning_rate": 0.0009834330465020038,
|
29221 |
+
"loss": 1.5598,
|
29222 |
+
"step": 8310
|
29223 |
+
},
|
29224 |
+
{
|
29225 |
+
"epoch": 0.36957005024231915,
|
29226 |
+
"grad_norm": 0.07140004634857178,
|
29227 |
+
"learning_rate": 0.0009834240303286063,
|
29228 |
+
"loss": 1.5628,
|
29229 |
+
"step": 8312
|
29230 |
+
},
|
29231 |
+
{
|
29232 |
+
"epoch": 0.3696589747009915,
|
29233 |
+
"grad_norm": 0.0748644694685936,
|
29234 |
+
"learning_rate": 0.0009834150117438091,
|
29235 |
+
"loss": 1.5601,
|
29236 |
+
"step": 8314
|
29237 |
+
},
|
29238 |
+
{
|
29239 |
+
"epoch": 0.3697478991596639,
|
29240 |
+
"grad_norm": 0.07483812421560287,
|
29241 |
+
"learning_rate": 0.0009834059907476579,
|
29242 |
+
"loss": 1.5533,
|
29243 |
+
"step": 8316
|
29244 |
+
},
|
29245 |
+
{
|
29246 |
+
"epoch": 0.3698368236183362,
|
29247 |
+
"grad_norm": 0.07460799068212509,
|
29248 |
+
"learning_rate": 0.0009833969673401972,
|
29249 |
+
"loss": 1.5583,
|
29250 |
+
"step": 8318
|
29251 |
+
},
|
29252 |
+
{
|
29253 |
+
"epoch": 0.36992574807700856,
|
29254 |
+
"grad_norm": 0.0767635703086853,
|
29255 |
+
"learning_rate": 0.0009833879415214722,
|
29256 |
+
"loss": 1.5656,
|
29257 |
+
"step": 8320
|
29258 |
+
},
|
29259 |
+
{
|
29260 |
+
"epoch": 0.37001467253568093,
|
29261 |
+
"grad_norm": 0.07315431535243988,
|
29262 |
+
"learning_rate": 0.0009833789132915282,
|
29263 |
+
"loss": 1.5562,
|
29264 |
+
"step": 8322
|
29265 |
+
},
|
29266 |
+
{
|
29267 |
+
"epoch": 0.3701035969943533,
|
29268 |
+
"grad_norm": 0.07698900997638702,
|
29269 |
+
"learning_rate": 0.0009833698826504096,
|
29270 |
+
"loss": 1.5598,
|
29271 |
+
"step": 8324
|
29272 |
+
},
|
29273 |
+
{
|
29274 |
+
"epoch": 0.37019252145302567,
|
29275 |
+
"grad_norm": 0.07581165432929993,
|
29276 |
+
"learning_rate": 0.0009833608495981618,
|
29277 |
+
"loss": 1.5574,
|
29278 |
+
"step": 8326
|
29279 |
+
},
|
29280 |
+
{
|
29281 |
+
"epoch": 0.37028144591169804,
|
29282 |
+
"grad_norm": 0.07427161186933517,
|
29283 |
+
"learning_rate": 0.0009833518141348298,
|
29284 |
+
"loss": 1.5555,
|
29285 |
+
"step": 8328
|
29286 |
+
},
|
29287 |
+
{
|
29288 |
+
"epoch": 0.37037037037037035,
|
29289 |
+
"grad_norm": 0.07862924039363861,
|
29290 |
+
"learning_rate": 0.000983342776260459,
|
29291 |
+
"loss": 1.5601,
|
29292 |
+
"step": 8330
|
29293 |
+
},
|
29294 |
+
{
|
29295 |
+
"epoch": 0.3704592948290427,
|
29296 |
+
"grad_norm": 0.07485569268465042,
|
29297 |
+
"learning_rate": 0.000983333735975094,
|
29298 |
+
"loss": 1.5526,
|
29299 |
+
"step": 8332
|
29300 |
+
},
|
29301 |
+
{
|
29302 |
+
"epoch": 0.3705482192877151,
|
29303 |
+
"grad_norm": 0.07545112073421478,
|
29304 |
+
"learning_rate": 0.0009833246932787801,
|
29305 |
+
"loss": 1.5548,
|
29306 |
+
"step": 8334
|
29307 |
+
},
|
29308 |
+
{
|
29309 |
+
"epoch": 0.37063714374638745,
|
29310 |
+
"grad_norm": 0.07797663658857346,
|
29311 |
+
"learning_rate": 0.0009833156481715624,
|
29312 |
+
"loss": 1.5603,
|
29313 |
+
"step": 8336
|
29314 |
+
},
|
29315 |
+
{
|
29316 |
+
"epoch": 0.3707260682050598,
|
29317 |
+
"grad_norm": 0.07360795885324478,
|
29318 |
+
"learning_rate": 0.0009833066006534861,
|
29319 |
+
"loss": 1.5535,
|
29320 |
+
"step": 8338
|
29321 |
+
},
|
29322 |
+
{
|
29323 |
+
"epoch": 0.37081499266373213,
|
29324 |
+
"grad_norm": 0.07508852332830429,
|
29325 |
+
"learning_rate": 0.0009832975507245963,
|
29326 |
+
"loss": 1.562,
|
29327 |
+
"step": 8340
|
29328 |
+
},
|
29329 |
+
{
|
29330 |
+
"epoch": 0.3709039171224045,
|
29331 |
+
"grad_norm": 0.07691474258899689,
|
29332 |
+
"learning_rate": 0.000983288498384938,
|
29333 |
+
"loss": 1.5559,
|
29334 |
+
"step": 8342
|
29335 |
+
},
|
29336 |
+
{
|
29337 |
+
"epoch": 0.37099284158107687,
|
29338 |
+
"grad_norm": 0.07170414924621582,
|
29339 |
+
"learning_rate": 0.0009832794436345565,
|
29340 |
+
"loss": 1.5595,
|
29341 |
+
"step": 8344
|
29342 |
+
},
|
29343 |
+
{
|
29344 |
+
"epoch": 0.37108176603974924,
|
29345 |
+
"grad_norm": 0.07313521206378937,
|
29346 |
+
"learning_rate": 0.000983270386473497,
|
29347 |
+
"loss": 1.5568,
|
29348 |
+
"step": 8346
|
29349 |
+
},
|
29350 |
+
{
|
29351 |
+
"epoch": 0.3711706904984216,
|
29352 |
+
"grad_norm": 0.07443317025899887,
|
29353 |
+
"learning_rate": 0.0009832613269018043,
|
29354 |
+
"loss": 1.5551,
|
29355 |
+
"step": 8348
|
29356 |
+
},
|
29357 |
+
{
|
29358 |
+
"epoch": 0.371259614957094,
|
29359 |
+
"grad_norm": 0.0727194994688034,
|
29360 |
+
"learning_rate": 0.000983252264919524,
|
29361 |
+
"loss": 1.5558,
|
29362 |
+
"step": 8350
|
29363 |
+
},
|
29364 |
+
{
|
29365 |
+
"epoch": 0.3713485394157663,
|
29366 |
+
"grad_norm": 0.07491002231836319,
|
29367 |
+
"learning_rate": 0.0009832432005267013,
|
29368 |
+
"loss": 1.5553,
|
29369 |
+
"step": 8352
|
29370 |
+
},
|
29371 |
+
{
|
29372 |
+
"epoch": 0.37143746387443866,
|
29373 |
+
"grad_norm": 0.07130132615566254,
|
29374 |
+
"learning_rate": 0.0009832341337233813,
|
29375 |
+
"loss": 1.5515,
|
29376 |
+
"step": 8354
|
29377 |
+
},
|
29378 |
+
{
|
29379 |
+
"epoch": 0.371526388333111,
|
29380 |
+
"grad_norm": 0.0775056853890419,
|
29381 |
+
"learning_rate": 0.0009832250645096091,
|
29382 |
+
"loss": 1.5612,
|
29383 |
+
"step": 8356
|
29384 |
+
},
|
29385 |
+
{
|
29386 |
+
"epoch": 0.3716153127917834,
|
29387 |
+
"grad_norm": 0.0759596973657608,
|
29388 |
+
"learning_rate": 0.00098321599288543,
|
29389 |
+
"loss": 1.554,
|
29390 |
+
"step": 8358
|
29391 |
+
},
|
29392 |
+
{
|
29393 |
+
"epoch": 0.37170423725045576,
|
29394 |
+
"grad_norm": 0.0767047107219696,
|
29395 |
+
"learning_rate": 0.0009832069188508896,
|
29396 |
+
"loss": 1.5613,
|
29397 |
+
"step": 8360
|
29398 |
+
},
|
29399 |
+
{
|
29400 |
+
"epoch": 0.3717931617091281,
|
29401 |
+
"grad_norm": 0.07511959224939346,
|
29402 |
+
"learning_rate": 0.0009831978424060328,
|
29403 |
+
"loss": 1.5553,
|
29404 |
+
"step": 8362
|
29405 |
+
},
|
29406 |
+
{
|
29407 |
+
"epoch": 0.37188208616780044,
|
29408 |
+
"grad_norm": 0.07674729824066162,
|
29409 |
+
"learning_rate": 0.0009831887635509049,
|
29410 |
+
"loss": 1.5505,
|
29411 |
+
"step": 8364
|
29412 |
+
},
|
29413 |
+
{
|
29414 |
+
"epoch": 0.3719710106264728,
|
29415 |
+
"grad_norm": 0.07360707968473434,
|
29416 |
+
"learning_rate": 0.0009831796822855512,
|
29417 |
+
"loss": 1.556,
|
29418 |
+
"step": 8366
|
29419 |
+
},
|
29420 |
+
{
|
29421 |
+
"epoch": 0.3720599350851452,
|
29422 |
+
"grad_norm": 0.0746261477470398,
|
29423 |
+
"learning_rate": 0.000983170598610017,
|
29424 |
+
"loss": 1.5553,
|
29425 |
+
"step": 8368
|
29426 |
+
},
|
29427 |
+
{
|
29428 |
+
"epoch": 0.37214885954381755,
|
29429 |
+
"grad_norm": 0.07465831935405731,
|
29430 |
+
"learning_rate": 0.0009831615125243478,
|
29431 |
+
"loss": 1.55,
|
29432 |
+
"step": 8370
|
29433 |
+
},
|
29434 |
+
{
|
29435 |
+
"epoch": 0.37223778400248986,
|
29436 |
+
"grad_norm": 0.07530109584331512,
|
29437 |
+
"learning_rate": 0.0009831524240285887,
|
29438 |
+
"loss": 1.5556,
|
29439 |
+
"step": 8372
|
29440 |
+
},
|
29441 |
+
{
|
29442 |
+
"epoch": 0.3723267084611622,
|
29443 |
+
"grad_norm": 0.07392653822898865,
|
29444 |
+
"learning_rate": 0.0009831433331227851,
|
29445 |
+
"loss": 1.5582,
|
29446 |
+
"step": 8374
|
29447 |
+
},
|
29448 |
+
{
|
29449 |
+
"epoch": 0.3724156329198346,
|
29450 |
+
"grad_norm": 0.0778929591178894,
|
29451 |
+
"learning_rate": 0.0009831342398069823,
|
29452 |
+
"loss": 1.5604,
|
29453 |
+
"step": 8376
|
29454 |
+
},
|
29455 |
+
{
|
29456 |
+
"epoch": 0.37250455737850696,
|
29457 |
+
"grad_norm": 0.07217849791049957,
|
29458 |
+
"learning_rate": 0.000983125144081226,
|
29459 |
+
"loss": 1.5528,
|
29460 |
+
"step": 8378
|
29461 |
+
},
|
29462 |
+
{
|
29463 |
+
"epoch": 0.37259348183717933,
|
29464 |
+
"grad_norm": 0.07852315157651901,
|
29465 |
+
"learning_rate": 0.000983116045945561,
|
29466 |
+
"loss": 1.56,
|
29467 |
+
"step": 8380
|
29468 |
+
},
|
29469 |
+
{
|
29470 |
+
"epoch": 0.3726824062958517,
|
29471 |
+
"grad_norm": 0.07284464687108994,
|
29472 |
+
"learning_rate": 0.000983106945400033,
|
29473 |
+
"loss": 1.5594,
|
29474 |
+
"step": 8382
|
29475 |
+
},
|
29476 |
+
{
|
29477 |
+
"epoch": 0.372771330754524,
|
29478 |
+
"grad_norm": 0.07296974211931229,
|
29479 |
+
"learning_rate": 0.0009830978424446874,
|
29480 |
+
"loss": 1.5558,
|
29481 |
+
"step": 8384
|
29482 |
+
},
|
29483 |
+
{
|
29484 |
+
"epoch": 0.3728602552131964,
|
29485 |
+
"grad_norm": 0.07269974797964096,
|
29486 |
+
"learning_rate": 0.0009830887370795695,
|
29487 |
+
"loss": 1.5563,
|
29488 |
+
"step": 8386
|
29489 |
+
},
|
29490 |
+
{
|
29491 |
+
"epoch": 0.37294917967186875,
|
29492 |
+
"grad_norm": 0.07310895621776581,
|
29493 |
+
"learning_rate": 0.0009830796293047249,
|
29494 |
+
"loss": 1.559,
|
29495 |
+
"step": 8388
|
29496 |
+
},
|
29497 |
+
{
|
29498 |
+
"epoch": 0.3730381041305411,
|
29499 |
+
"grad_norm": 0.07368705421686172,
|
29500 |
+
"learning_rate": 0.000983070519120199,
|
29501 |
+
"loss": 1.5569,
|
29502 |
+
"step": 8390
|
29503 |
+
},
|
29504 |
+
{
|
29505 |
+
"epoch": 0.3731270285892135,
|
29506 |
+
"grad_norm": 0.07440350204706192,
|
29507 |
+
"learning_rate": 0.000983061406526037,
|
29508 |
+
"loss": 1.5542,
|
29509 |
+
"step": 8392
|
29510 |
+
},
|
29511 |
+
{
|
29512 |
+
"epoch": 0.3732159530478858,
|
29513 |
+
"grad_norm": 0.07380738109350204,
|
29514 |
+
"learning_rate": 0.0009830522915222845,
|
29515 |
+
"loss": 1.5558,
|
29516 |
+
"step": 8394
|
29517 |
+
},
|
29518 |
+
{
|
29519 |
+
"epoch": 0.37330487750655816,
|
29520 |
+
"grad_norm": 0.0736425444483757,
|
29521 |
+
"learning_rate": 0.000983043174108987,
|
29522 |
+
"loss": 1.5493,
|
29523 |
+
"step": 8396
|
29524 |
+
},
|
29525 |
+
{
|
29526 |
+
"epoch": 0.37339380196523053,
|
29527 |
+
"grad_norm": 0.07320426404476166,
|
29528 |
+
"learning_rate": 0.0009830340542861902,
|
29529 |
+
"loss": 1.5516,
|
29530 |
+
"step": 8398
|
29531 |
+
},
|
29532 |
+
{
|
29533 |
+
"epoch": 0.3734827264239029,
|
29534 |
+
"grad_norm": 0.07398249953985214,
|
29535 |
+
"learning_rate": 0.0009830249320539391,
|
29536 |
+
"loss": 1.5563,
|
29537 |
+
"step": 8400
|
29538 |
+
},
|
29539 |
+
{
|
29540 |
+
"epoch": 0.37357165088257527,
|
29541 |
+
"grad_norm": 0.0731641873717308,
|
29542 |
+
"learning_rate": 0.0009830158074122795,
|
29543 |
+
"loss": 1.5547,
|
29544 |
+
"step": 8402
|
29545 |
+
},
|
29546 |
+
{
|
29547 |
+
"epoch": 0.37366057534124764,
|
29548 |
+
"grad_norm": 0.07476949691772461,
|
29549 |
+
"learning_rate": 0.000983006680361257,
|
29550 |
+
"loss": 1.5563,
|
29551 |
+
"step": 8404
|
29552 |
+
},
|
29553 |
+
{
|
29554 |
+
"epoch": 0.37374949979991995,
|
29555 |
+
"grad_norm": 0.07096338272094727,
|
29556 |
+
"learning_rate": 0.000982997550900917,
|
29557 |
+
"loss": 1.5579,
|
29558 |
+
"step": 8406
|
29559 |
+
},
|
29560 |
+
{
|
29561 |
+
"epoch": 0.3738384242585923,
|
29562 |
+
"grad_norm": 0.0731477290391922,
|
29563 |
+
"learning_rate": 0.0009829884190313048,
|
29564 |
+
"loss": 1.5571,
|
29565 |
+
"step": 8408
|
29566 |
+
},
|
29567 |
+
{
|
29568 |
+
"epoch": 0.3739273487172647,
|
29569 |
+
"grad_norm": 0.07443249970674515,
|
29570 |
+
"learning_rate": 0.0009829792847524663,
|
29571 |
+
"loss": 1.5524,
|
29572 |
+
"step": 8410
|
29573 |
+
},
|
29574 |
+
{
|
29575 |
+
"epoch": 0.37401627317593705,
|
29576 |
+
"grad_norm": 0.07522628456354141,
|
29577 |
+
"learning_rate": 0.000982970148064447,
|
29578 |
+
"loss": 1.5515,
|
29579 |
+
"step": 8412
|
29580 |
+
},
|
29581 |
+
{
|
29582 |
+
"epoch": 0.3741051976346094,
|
29583 |
+
"grad_norm": 0.08042244613170624,
|
29584 |
+
"learning_rate": 0.0009829610089672925,
|
29585 |
+
"loss": 1.552,
|
29586 |
+
"step": 8414
|
29587 |
+
},
|
29588 |
+
{
|
29589 |
+
"epoch": 0.37419412209328173,
|
29590 |
+
"grad_norm": 0.0764075294137001,
|
29591 |
+
"learning_rate": 0.0009829518674610482,
|
29592 |
+
"loss": 1.5509,
|
29593 |
+
"step": 8416
|
29594 |
+
},
|
29595 |
+
{
|
29596 |
+
"epoch": 0.3742830465519541,
|
29597 |
+
"grad_norm": 0.07679541409015656,
|
29598 |
+
"learning_rate": 0.0009829427235457599,
|
29599 |
+
"loss": 1.5558,
|
29600 |
+
"step": 8418
|
29601 |
+
},
|
29602 |
+
{
|
29603 |
+
"epoch": 0.37437197101062647,
|
29604 |
+
"grad_norm": 0.07601416856050491,
|
29605 |
+
"learning_rate": 0.0009829335772214732,
|
29606 |
+
"loss": 1.5551,
|
29607 |
+
"step": 8420
|
29608 |
+
},
|
29609 |
+
{
|
29610 |
+
"epoch": 0.37446089546929884,
|
29611 |
+
"grad_norm": 0.07198473066091537,
|
29612 |
+
"learning_rate": 0.0009829244284882336,
|
29613 |
+
"loss": 1.5515,
|
29614 |
+
"step": 8422
|
29615 |
+
},
|
29616 |
+
{
|
29617 |
+
"epoch": 0.3745498199279712,
|
29618 |
+
"grad_norm": 0.0754043385386467,
|
29619 |
+
"learning_rate": 0.0009829152773460869,
|
29620 |
+
"loss": 1.5522,
|
29621 |
+
"step": 8424
|
29622 |
+
},
|
29623 |
+
{
|
29624 |
+
"epoch": 0.3746387443866436,
|
29625 |
+
"grad_norm": 0.07835698872804642,
|
29626 |
+
"learning_rate": 0.0009829061237950783,
|
29627 |
+
"loss": 1.5558,
|
29628 |
+
"step": 8426
|
29629 |
+
},
|
29630 |
+
{
|
29631 |
+
"epoch": 0.3747276688453159,
|
29632 |
+
"grad_norm": 0.07373791188001633,
|
29633 |
+
"learning_rate": 0.000982896967835254,
|
29634 |
+
"loss": 1.5485,
|
29635 |
+
"step": 8428
|
29636 |
+
},
|
29637 |
+
{
|
29638 |
+
"epoch": 0.37481659330398825,
|
29639 |
+
"grad_norm": 0.07449112087488174,
|
29640 |
+
"learning_rate": 0.0009828878094666595,
|
29641 |
+
"loss": 1.5524,
|
29642 |
+
"step": 8430
|
29643 |
+
},
|
29644 |
+
{
|
29645 |
+
"epoch": 0.3749055177626606,
|
29646 |
+
"grad_norm": 0.07234968990087509,
|
29647 |
+
"learning_rate": 0.0009828786486893405,
|
29648 |
+
"loss": 1.5579,
|
29649 |
+
"step": 8432
|
29650 |
+
},
|
29651 |
+
{
|
29652 |
+
"epoch": 0.374994442221333,
|
29653 |
+
"grad_norm": 0.07672835886478424,
|
29654 |
+
"learning_rate": 0.0009828694855033425,
|
29655 |
+
"loss": 1.5522,
|
29656 |
+
"step": 8434
|
29657 |
+
},
|
29658 |
+
{
|
29659 |
+
"epoch": 0.37508336668000536,
|
29660 |
+
"grad_norm": 0.07505165040493011,
|
29661 |
+
"learning_rate": 0.0009828603199087114,
|
29662 |
+
"loss": 1.5515,
|
29663 |
+
"step": 8436
|
29664 |
+
},
|
29665 |
+
{
|
29666 |
+
"epoch": 0.37517229113867767,
|
29667 |
+
"grad_norm": 0.07347539067268372,
|
29668 |
+
"learning_rate": 0.0009828511519054928,
|
29669 |
+
"loss": 1.552,
|
29670 |
+
"step": 8438
|
29671 |
+
},
|
29672 |
+
{
|
29673 |
+
"epoch": 0.37526121559735004,
|
29674 |
+
"grad_norm": 0.07479852437973022,
|
29675 |
+
"learning_rate": 0.0009828419814937326,
|
29676 |
+
"loss": 1.5485,
|
29677 |
+
"step": 8440
|
29678 |
+
},
|
29679 |
+
{
|
29680 |
+
"epoch": 0.3753501400560224,
|
29681 |
+
"grad_norm": 0.0721561387181282,
|
29682 |
+
"learning_rate": 0.0009828328086734766,
|
29683 |
+
"loss": 1.5542,
|
29684 |
+
"step": 8442
|
29685 |
+
},
|
29686 |
+
{
|
29687 |
+
"epoch": 0.3754390645146948,
|
29688 |
+
"grad_norm": 0.07447963207960129,
|
29689 |
+
"learning_rate": 0.0009828236334447702,
|
29690 |
+
"loss": 1.5502,
|
29691 |
+
"step": 8444
|
29692 |
+
},
|
29693 |
+
{
|
29694 |
+
"epoch": 0.37552798897336714,
|
29695 |
+
"grad_norm": 0.07521109282970428,
|
29696 |
+
"learning_rate": 0.0009828144558076595,
|
29697 |
+
"loss": 1.5478,
|
29698 |
+
"step": 8446
|
29699 |
+
},
|
29700 |
+
{
|
29701 |
+
"epoch": 0.37561691343203946,
|
29702 |
+
"grad_norm": 0.08029814064502716,
|
29703 |
+
"learning_rate": 0.00098280527576219,
|
29704 |
+
"loss": 1.5552,
|
29705 |
+
"step": 8448
|
29706 |
+
},
|
29707 |
+
{
|
29708 |
+
"epoch": 0.3757058378907118,
|
29709 |
+
"grad_norm": 0.07279892265796661,
|
29710 |
+
"learning_rate": 0.0009827960933084076,
|
29711 |
+
"loss": 1.549,
|
29712 |
+
"step": 8450
|
29713 |
+
},
|
29714 |
+
{
|
29715 |
+
"epoch": 0.3757947623493842,
|
29716 |
+
"grad_norm": 0.07226986438035965,
|
29717 |
+
"learning_rate": 0.0009827869084463583,
|
29718 |
+
"loss": 1.5512,
|
29719 |
+
"step": 8452
|
29720 |
+
},
|
29721 |
+
{
|
29722 |
+
"epoch": 0.37588368680805656,
|
29723 |
+
"grad_norm": 0.07428599148988724,
|
29724 |
+
"learning_rate": 0.0009827777211760878,
|
29725 |
+
"loss": 1.5563,
|
29726 |
+
"step": 8454
|
29727 |
+
},
|
29728 |
+
{
|
29729 |
+
"epoch": 0.37597261126672893,
|
29730 |
+
"grad_norm": 0.07335691154003143,
|
29731 |
+
"learning_rate": 0.0009827685314976419,
|
29732 |
+
"loss": 1.5522,
|
29733 |
+
"step": 8456
|
29734 |
+
},
|
29735 |
+
{
|
29736 |
+
"epoch": 0.3760615357254013,
|
29737 |
+
"grad_norm": 0.0702577456831932,
|
29738 |
+
"learning_rate": 0.000982759339411066,
|
29739 |
+
"loss": 1.5493,
|
29740 |
+
"step": 8458
|
29741 |
+
},
|
29742 |
+
{
|
29743 |
+
"epoch": 0.3761504601840736,
|
29744 |
+
"grad_norm": 0.07333244383335114,
|
29745 |
+
"learning_rate": 0.000982750144916407,
|
29746 |
+
"loss": 1.5525,
|
29747 |
+
"step": 8460
|
29748 |
+
},
|
29749 |
+
{
|
29750 |
+
"epoch": 0.376239384642746,
|
29751 |
+
"grad_norm": 0.07639877498149872,
|
29752 |
+
"learning_rate": 0.0009827409480137098,
|
29753 |
+
"loss": 1.5517,
|
29754 |
+
"step": 8462
|
29755 |
+
},
|
29756 |
+
{
|
29757 |
+
"epoch": 0.37632830910141835,
|
29758 |
+
"grad_norm": 0.07263181358575821,
|
29759 |
+
"learning_rate": 0.0009827317487030207,
|
29760 |
+
"loss": 1.549,
|
29761 |
+
"step": 8464
|
29762 |
+
},
|
29763 |
+
{
|
29764 |
+
"epoch": 0.3764172335600907,
|
29765 |
+
"grad_norm": 0.07310689985752106,
|
29766 |
+
"learning_rate": 0.0009827225469843854,
|
29767 |
+
"loss": 1.5462,
|
29768 |
+
"step": 8466
|
29769 |
+
},
|
29770 |
+
{
|
29771 |
+
"epoch": 0.3765061580187631,
|
29772 |
+
"grad_norm": 0.07368572801351547,
|
29773 |
+
"learning_rate": 0.00098271334285785,
|
29774 |
+
"loss": 1.5532,
|
29775 |
+
"step": 8468
|
29776 |
+
},
|
29777 |
+
{
|
29778 |
+
"epoch": 0.3765950824774354,
|
29779 |
+
"grad_norm": 0.07141252607107162,
|
29780 |
+
"learning_rate": 0.0009827041363234604,
|
29781 |
+
"loss": 1.5565,
|
29782 |
+
"step": 8470
|
29783 |
+
},
|
29784 |
+
{
|
29785 |
+
"epoch": 0.37668400693610776,
|
29786 |
+
"grad_norm": 0.07508658617734909,
|
29787 |
+
"learning_rate": 0.0009826949273812622,
|
29788 |
+
"loss": 1.5514,
|
29789 |
+
"step": 8472
|
29790 |
+
},
|
29791 |
+
{
|
29792 |
+
"epoch": 0.37677293139478013,
|
29793 |
+
"grad_norm": 0.07459234446287155,
|
29794 |
+
"learning_rate": 0.0009826857160313018,
|
29795 |
+
"loss": 1.5596,
|
29796 |
+
"step": 8474
|
29797 |
+
},
|
29798 |
+
{
|
29799 |
+
"epoch": 0.3768618558534525,
|
29800 |
+
"grad_norm": 0.07409926503896713,
|
29801 |
+
"learning_rate": 0.0009826765022736249,
|
29802 |
+
"loss": 1.5514,
|
29803 |
+
"step": 8476
|
29804 |
+
},
|
29805 |
+
{
|
29806 |
+
"epoch": 0.37695078031212487,
|
29807 |
+
"grad_norm": 0.07309357076883316,
|
29808 |
+
"learning_rate": 0.0009826672861082775,
|
29809 |
+
"loss": 1.5485,
|
29810 |
+
"step": 8478
|
29811 |
+
},
|
29812 |
+
{
|
29813 |
+
"epoch": 0.37703970477079723,
|
29814 |
+
"grad_norm": 0.07403724640607834,
|
29815 |
+
"learning_rate": 0.0009826580675353054,
|
29816 |
+
"loss": 1.5558,
|
29817 |
+
"step": 8480
|
29818 |
+
},
|
29819 |
+
{
|
29820 |
+
"epoch": 0.37712862922946955,
|
29821 |
+
"grad_norm": 0.07526498287916183,
|
29822 |
+
"learning_rate": 0.000982648846554755,
|
29823 |
+
"loss": 1.5511,
|
29824 |
+
"step": 8482
|
29825 |
+
},
|
29826 |
+
{
|
29827 |
+
"epoch": 0.3772175536881419,
|
29828 |
+
"grad_norm": 0.07380390167236328,
|
29829 |
+
"learning_rate": 0.0009826396231666717,
|
29830 |
+
"loss": 1.5511,
|
29831 |
+
"step": 8484
|
29832 |
+
},
|
29833 |
+
{
|
29834 |
+
"epoch": 0.3773064781468143,
|
29835 |
+
"grad_norm": 0.07699756324291229,
|
29836 |
+
"learning_rate": 0.0009826303973711021,
|
29837 |
+
"loss": 1.5507,
|
29838 |
+
"step": 8486
|
29839 |
+
},
|
29840 |
+
{
|
29841 |
+
"epoch": 0.37739540260548665,
|
29842 |
+
"grad_norm": 0.07383593916893005,
|
29843 |
+
"learning_rate": 0.000982621169168092,
|
29844 |
+
"loss": 1.5523,
|
29845 |
+
"step": 8488
|
29846 |
+
},
|
29847 |
+
{
|
29848 |
+
"epoch": 0.377484327064159,
|
29849 |
+
"grad_norm": 0.07610490173101425,
|
29850 |
+
"learning_rate": 0.0009826119385576873,
|
29851 |
+
"loss": 1.5494,
|
29852 |
+
"step": 8490
|
29853 |
+
},
|
29854 |
+
{
|
29855 |
+
"epoch": 0.37757325152283133,
|
29856 |
+
"grad_norm": 0.07537056505680084,
|
29857 |
+
"learning_rate": 0.000982602705539934,
|
29858 |
+
"loss": 1.5563,
|
29859 |
+
"step": 8492
|
29860 |
+
},
|
29861 |
+
{
|
29862 |
+
"epoch": 0.3776621759815037,
|
29863 |
+
"grad_norm": 0.07339107990264893,
|
29864 |
+
"learning_rate": 0.0009825934701148784,
|
29865 |
+
"loss": 1.5511,
|
29866 |
+
"step": 8494
|
29867 |
+
},
|
29868 |
+
{
|
29869 |
+
"epoch": 0.37775110044017607,
|
29870 |
+
"grad_norm": 0.0728975385427475,
|
29871 |
+
"learning_rate": 0.0009825842322825668,
|
29872 |
+
"loss": 1.5491,
|
29873 |
+
"step": 8496
|
29874 |
+
},
|
29875 |
+
{
|
29876 |
+
"epoch": 0.37784002489884844,
|
29877 |
+
"grad_norm": 0.0715377926826477,
|
29878 |
+
"learning_rate": 0.0009825749920430444,
|
29879 |
+
"loss": 1.5493,
|
29880 |
+
"step": 8498
|
29881 |
+
},
|
29882 |
+
{
|
29883 |
+
"epoch": 0.3779289493575208,
|
29884 |
+
"grad_norm": 0.07521523535251617,
|
29885 |
+
"learning_rate": 0.000982565749396358,
|
29886 |
+
"loss": 1.5483,
|
29887 |
+
"step": 8500
|
29888 |
+
},
|
29889 |
+
{
|
29890 |
+
"epoch": 0.3779289493575208,
|
29891 |
+
"eval_loss": 1.525511622428894,
|
29892 |
+
"eval_runtime": 12.3728,
|
29893 |
+
"eval_samples_per_second": 558.484,
|
29894 |
+
"eval_steps_per_second": 69.831,
|
29895 |
+
"step": 8500
|
29896 |
+
},
|
29897 |
+
{
|
29898 |
+
"epoch": 0.3780178738161931,
|
29899 |
+
"grad_norm": 0.07613959908485413,
|
29900 |
+
"learning_rate": 0.0009825565043425537,
|
29901 |
+
"loss": 1.5473,
|
29902 |
+
"step": 8502
|
29903 |
+
},
|
29904 |
+
{
|
29905 |
+
"epoch": 0.3781067982748655,
|
29906 |
+
"grad_norm": 0.07284019142389297,
|
29907 |
+
"learning_rate": 0.0009825472568816775,
|
29908 |
+
"loss": 1.5514,
|
29909 |
+
"step": 8504
|
29910 |
+
},
|
29911 |
+
{
|
29912 |
+
"epoch": 0.37819572273353785,
|
29913 |
+
"grad_norm": 0.07196670770645142,
|
29914 |
+
"learning_rate": 0.0009825380070137752,
|
29915 |
+
"loss": 1.557,
|
29916 |
+
"step": 8506
|
29917 |
+
},
|
29918 |
+
{
|
29919 |
+
"epoch": 0.3782846471922102,
|
29920 |
+
"grad_norm": 0.07932493090629578,
|
29921 |
+
"learning_rate": 0.0009825287547388934,
|
29922 |
+
"loss": 1.5504,
|
29923 |
+
"step": 8508
|
29924 |
+
},
|
29925 |
+
{
|
29926 |
+
"epoch": 0.3783735716508826,
|
29927 |
+
"grad_norm": 0.07043527066707611,
|
29928 |
+
"learning_rate": 0.000982519500057078,
|
29929 |
+
"loss": 1.5411,
|
29930 |
+
"step": 8510
|
29931 |
+
},
|
29932 |
+
{
|
29933 |
+
"epoch": 0.37846249610955496,
|
29934 |
+
"grad_norm": 0.07196062058210373,
|
29935 |
+
"learning_rate": 0.0009825102429683755,
|
29936 |
+
"loss": 1.5507,
|
29937 |
+
"step": 8512
|
29938 |
+
},
|
29939 |
+
{
|
29940 |
+
"epoch": 0.37855142056822727,
|
29941 |
+
"grad_norm": 0.07716414332389832,
|
29942 |
+
"learning_rate": 0.0009825009834728315,
|
29943 |
+
"loss": 1.5461,
|
29944 |
+
"step": 8514
|
29945 |
+
},
|
29946 |
+
{
|
29947 |
+
"epoch": 0.37864034502689964,
|
29948 |
+
"grad_norm": 0.06990046054124832,
|
29949 |
+
"learning_rate": 0.0009824917215704926,
|
29950 |
+
"loss": 1.5522,
|
29951 |
+
"step": 8516
|
29952 |
+
},
|
29953 |
+
{
|
29954 |
+
"epoch": 0.378729269485572,
|
29955 |
+
"grad_norm": 0.07274769991636276,
|
29956 |
+
"learning_rate": 0.000982482457261405,
|
29957 |
+
"loss": 1.5507,
|
29958 |
+
"step": 8518
|
29959 |
+
},
|
29960 |
+
{
|
29961 |
+
"epoch": 0.3788181939442444,
|
29962 |
+
"grad_norm": 0.07060352712869644,
|
29963 |
+
"learning_rate": 0.0009824731905456149,
|
29964 |
+
"loss": 1.5491,
|
29965 |
+
"step": 8520
|
29966 |
+
},
|
29967 |
+
{
|
29968 |
+
"epoch": 0.37890711840291674,
|
29969 |
+
"grad_norm": 0.07348603010177612,
|
29970 |
+
"learning_rate": 0.000982463921423168,
|
29971 |
+
"loss": 1.5473,
|
29972 |
+
"step": 8522
|
29973 |
+
},
|
29974 |
+
{
|
29975 |
+
"epoch": 0.37899604286158906,
|
29976 |
+
"grad_norm": 0.07370968163013458,
|
29977 |
+
"learning_rate": 0.0009824546498941114,
|
29978 |
+
"loss": 1.5564,
|
29979 |
+
"step": 8524
|
29980 |
+
},
|
29981 |
+
{
|
29982 |
+
"epoch": 0.3790849673202614,
|
29983 |
+
"grad_norm": 0.07592252641916275,
|
29984 |
+
"learning_rate": 0.0009824453759584907,
|
29985 |
+
"loss": 1.5535,
|
29986 |
+
"step": 8526
|
29987 |
+
},
|
29988 |
+
{
|
29989 |
+
"epoch": 0.3791738917789338,
|
29990 |
+
"grad_norm": 0.07057470828294754,
|
29991 |
+
"learning_rate": 0.0009824360996163524,
|
29992 |
+
"loss": 1.5567,
|
29993 |
+
"step": 8528
|
29994 |
+
},
|
29995 |
+
{
|
29996 |
+
"epoch": 0.37926281623760616,
|
29997 |
+
"grad_norm": 0.07211954891681671,
|
29998 |
+
"learning_rate": 0.000982426820867743,
|
29999 |
+
"loss": 1.5548,
|
30000 |
+
"step": 8530
|
30001 |
+
},
|
30002 |
+
{
|
30003 |
+
"epoch": 0.3793517406962785,
|
30004 |
+
"grad_norm": 0.06937380880117416,
|
30005 |
+
"learning_rate": 0.0009824175397127083,
|
30006 |
+
"loss": 1.5457,
|
30007 |
+
"step": 8532
|
30008 |
+
},
|
30009 |
+
{
|
30010 |
+
"epoch": 0.3794406651549509,
|
30011 |
+
"grad_norm": 0.07182077318429947,
|
30012 |
+
"learning_rate": 0.0009824082561512948,
|
30013 |
+
"loss": 1.5589,
|
30014 |
+
"step": 8534
|
30015 |
+
},
|
30016 |
+
{
|
30017 |
+
"epoch": 0.3795295896136232,
|
30018 |
+
"grad_norm": 0.07062894105911255,
|
30019 |
+
"learning_rate": 0.000982398970183549,
|
30020 |
+
"loss": 1.5499,
|
30021 |
+
"step": 8536
|
30022 |
+
},
|
30023 |
+
{
|
30024 |
+
"epoch": 0.3796185140722956,
|
30025 |
+
"grad_norm": 0.07044274359941483,
|
30026 |
+
"learning_rate": 0.000982389681809517,
|
30027 |
+
"loss": 1.5555,
|
30028 |
+
"step": 8538
|
30029 |
+
},
|
30030 |
+
{
|
30031 |
+
"epoch": 0.37970743853096794,
|
30032 |
+
"grad_norm": 0.0717586874961853,
|
30033 |
+
"learning_rate": 0.0009823803910292452,
|
30034 |
+
"loss": 1.5452,
|
30035 |
+
"step": 8540
|
30036 |
+
},
|
30037 |
+
{
|
30038 |
+
"epoch": 0.3797963629896403,
|
30039 |
+
"grad_norm": 0.07334475219249725,
|
30040 |
+
"learning_rate": 0.0009823710978427798,
|
30041 |
+
"loss": 1.5576,
|
30042 |
+
"step": 8542
|
30043 |
+
},
|
30044 |
+
{
|
30045 |
+
"epoch": 0.3798852874483127,
|
30046 |
+
"grad_norm": 0.07173279672861099,
|
30047 |
+
"learning_rate": 0.0009823618022501675,
|
30048 |
+
"loss": 1.5451,
|
30049 |
+
"step": 8544
|
30050 |
+
},
|
30051 |
+
{
|
30052 |
+
"epoch": 0.379974211906985,
|
30053 |
+
"grad_norm": 0.07294796407222748,
|
30054 |
+
"learning_rate": 0.0009823525042514543,
|
30055 |
+
"loss": 1.5547,
|
30056 |
+
"step": 8546
|
30057 |
+
},
|
30058 |
+
{
|
30059 |
+
"epoch": 0.38006313636565736,
|
30060 |
+
"grad_norm": 0.0728803277015686,
|
30061 |
+
"learning_rate": 0.0009823432038466868,
|
30062 |
+
"loss": 1.5461,
|
30063 |
+
"step": 8548
|
30064 |
+
},
|
30065 |
+
{
|
30066 |
+
"epoch": 0.38015206082432973,
|
30067 |
+
"grad_norm": 0.070876345038414,
|
30068 |
+
"learning_rate": 0.0009823339010359114,
|
30069 |
+
"loss": 1.5525,
|
30070 |
+
"step": 8550
|
30071 |
+
},
|
30072 |
+
{
|
30073 |
+
"epoch": 0.3802409852830021,
|
30074 |
+
"grad_norm": 0.07284954190254211,
|
30075 |
+
"learning_rate": 0.0009823245958191745,
|
30076 |
+
"loss": 1.5486,
|
30077 |
+
"step": 8552
|
30078 |
+
},
|
30079 |
+
{
|
30080 |
+
"epoch": 0.38032990974167447,
|
30081 |
+
"grad_norm": 0.0750528946518898,
|
30082 |
+
"learning_rate": 0.0009823152881965223,
|
30083 |
+
"loss": 1.5507,
|
30084 |
+
"step": 8554
|
30085 |
+
},
|
30086 |
+
{
|
30087 |
+
"epoch": 0.3804188342003468,
|
30088 |
+
"grad_norm": 0.07205671817064285,
|
30089 |
+
"learning_rate": 0.0009823059781680014,
|
30090 |
+
"loss": 1.5513,
|
30091 |
+
"step": 8556
|
30092 |
+
},
|
30093 |
+
{
|
30094 |
+
"epoch": 0.38050775865901915,
|
30095 |
+
"grad_norm": 0.07429669797420502,
|
30096 |
+
"learning_rate": 0.0009822966657336582,
|
30097 |
+
"loss": 1.5503,
|
30098 |
+
"step": 8558
|
30099 |
+
},
|
30100 |
+
{
|
30101 |
+
"epoch": 0.3805966831176915,
|
30102 |
+
"grad_norm": 0.0736270323395729,
|
30103 |
+
"learning_rate": 0.0009822873508935394,
|
30104 |
+
"loss": 1.5513,
|
30105 |
+
"step": 8560
|
30106 |
+
},
|
30107 |
+
{
|
30108 |
+
"epoch": 0.3806856075763639,
|
30109 |
+
"grad_norm": 0.0691053494811058,
|
30110 |
+
"learning_rate": 0.0009822780336476908,
|
30111 |
+
"loss": 1.5482,
|
30112 |
+
"step": 8562
|
30113 |
+
},
|
30114 |
+
{
|
30115 |
+
"epoch": 0.38077453203503625,
|
30116 |
+
"grad_norm": 0.0720055103302002,
|
30117 |
+
"learning_rate": 0.0009822687139961596,
|
30118 |
+
"loss": 1.5469,
|
30119 |
+
"step": 8564
|
30120 |
+
},
|
30121 |
+
{
|
30122 |
+
"epoch": 0.3808634564937086,
|
30123 |
+
"grad_norm": 0.07141195237636566,
|
30124 |
+
"learning_rate": 0.0009822593919389922,
|
30125 |
+
"loss": 1.5539,
|
30126 |
+
"step": 8566
|
30127 |
+
},
|
30128 |
+
{
|
30129 |
+
"epoch": 0.38095238095238093,
|
30130 |
+
"grad_norm": 0.07182452082633972,
|
30131 |
+
"learning_rate": 0.0009822500674762345,
|
30132 |
+
"loss": 1.5476,
|
30133 |
+
"step": 8568
|
30134 |
+
},
|
30135 |
+
{
|
30136 |
+
"epoch": 0.3810413054110533,
|
30137 |
+
"grad_norm": 0.07438525557518005,
|
30138 |
+
"learning_rate": 0.0009822407406079336,
|
30139 |
+
"loss": 1.5486,
|
30140 |
+
"step": 8570
|
30141 |
+
},
|
30142 |
+
{
|
30143 |
+
"epoch": 0.38113022986972567,
|
30144 |
+
"grad_norm": 0.07334297895431519,
|
30145 |
+
"learning_rate": 0.0009822314113341359,
|
30146 |
+
"loss": 1.549,
|
30147 |
+
"step": 8572
|
30148 |
+
},
|
30149 |
+
{
|
30150 |
+
"epoch": 0.38121915432839804,
|
30151 |
+
"grad_norm": 0.07467876374721527,
|
30152 |
+
"learning_rate": 0.0009822220796548878,
|
30153 |
+
"loss": 1.5524,
|
30154 |
+
"step": 8574
|
30155 |
+
},
|
30156 |
+
{
|
30157 |
+
"epoch": 0.3813080787870704,
|
30158 |
+
"grad_norm": 0.0723448172211647,
|
30159 |
+
"learning_rate": 0.0009822127455702359,
|
30160 |
+
"loss": 1.5459,
|
30161 |
+
"step": 8576
|
30162 |
+
},
|
30163 |
+
{
|
30164 |
+
"epoch": 0.3813970032457427,
|
30165 |
+
"grad_norm": 0.07537683844566345,
|
30166 |
+
"learning_rate": 0.0009822034090802268,
|
30167 |
+
"loss": 1.5465,
|
30168 |
+
"step": 8578
|
30169 |
+
},
|
30170 |
+
{
|
30171 |
+
"epoch": 0.3814859277044151,
|
30172 |
+
"grad_norm": 0.07435756176710129,
|
30173 |
+
"learning_rate": 0.0009821940701849072,
|
30174 |
+
"loss": 1.5497,
|
30175 |
+
"step": 8580
|
30176 |
+
},
|
30177 |
+
{
|
30178 |
+
"epoch": 0.38157485216308745,
|
30179 |
+
"grad_norm": 0.07319629937410355,
|
30180 |
+
"learning_rate": 0.0009821847288843232,
|
30181 |
+
"loss": 1.5454,
|
30182 |
+
"step": 8582
|
30183 |
+
},
|
30184 |
+
{
|
30185 |
+
"epoch": 0.3816637766217598,
|
30186 |
+
"grad_norm": 0.07365921884775162,
|
30187 |
+
"learning_rate": 0.000982175385178522,
|
30188 |
+
"loss": 1.5457,
|
30189 |
+
"step": 8584
|
30190 |
+
},
|
30191 |
+
{
|
30192 |
+
"epoch": 0.3817527010804322,
|
30193 |
+
"grad_norm": 0.06942031532526016,
|
30194 |
+
"learning_rate": 0.00098216603906755,
|
30195 |
+
"loss": 1.5484,
|
30196 |
+
"step": 8586
|
30197 |
+
},
|
30198 |
+
{
|
30199 |
+
"epoch": 0.38184162553910456,
|
30200 |
+
"grad_norm": 0.07433439046144485,
|
30201 |
+
"learning_rate": 0.0009821566905514536,
|
30202 |
+
"loss": 1.5524,
|
30203 |
+
"step": 8588
|
30204 |
+
},
|
30205 |
+
{
|
30206 |
+
"epoch": 0.38193054999777687,
|
30207 |
+
"grad_norm": 0.07098887860774994,
|
30208 |
+
"learning_rate": 0.0009821473396302797,
|
30209 |
+
"loss": 1.5419,
|
30210 |
+
"step": 8590
|
30211 |
+
},
|
30212 |
+
{
|
30213 |
+
"epoch": 0.38201947445644924,
|
30214 |
+
"grad_norm": 0.07336100190877914,
|
30215 |
+
"learning_rate": 0.0009821379863040748,
|
30216 |
+
"loss": 1.5489,
|
30217 |
+
"step": 8592
|
30218 |
+
},
|
30219 |
+
{
|
30220 |
+
"epoch": 0.3821083989151216,
|
30221 |
+
"grad_norm": 0.07159991562366486,
|
30222 |
+
"learning_rate": 0.0009821286305728854,
|
30223 |
+
"loss": 1.5495,
|
30224 |
+
"step": 8594
|
30225 |
+
},
|
30226 |
+
{
|
30227 |
+
"epoch": 0.382197323373794,
|
30228 |
+
"grad_norm": 0.07283421605825424,
|
30229 |
+
"learning_rate": 0.0009821192724367585,
|
30230 |
+
"loss": 1.5474,
|
30231 |
+
"step": 8596
|
30232 |
+
},
|
30233 |
+
{
|
30234 |
+
"epoch": 0.38228624783246634,
|
30235 |
+
"grad_norm": 0.07494281232357025,
|
30236 |
+
"learning_rate": 0.0009821099118957407,
|
30237 |
+
"loss": 1.5481,
|
30238 |
+
"step": 8598
|
30239 |
+
},
|
30240 |
+
{
|
30241 |
+
"epoch": 0.38237517229113865,
|
30242 |
+
"grad_norm": 0.07011394947767258,
|
30243 |
+
"learning_rate": 0.0009821005489498787,
|
30244 |
+
"loss": 1.544,
|
30245 |
+
"step": 8600
|
30246 |
+
},
|
30247 |
+
{
|
30248 |
+
"epoch": 0.382464096749811,
|
30249 |
+
"grad_norm": 0.07218565046787262,
|
30250 |
+
"learning_rate": 0.000982091183599219,
|
30251 |
+
"loss": 1.5442,
|
30252 |
+
"step": 8602
|
30253 |
+
},
|
30254 |
+
{
|
30255 |
+
"epoch": 0.3825530212084834,
|
30256 |
+
"grad_norm": 0.07564288377761841,
|
30257 |
+
"learning_rate": 0.0009820818158438084,
|
30258 |
+
"loss": 1.5573,
|
30259 |
+
"step": 8604
|
30260 |
+
},
|
30261 |
+
{
|
30262 |
+
"epoch": 0.38264194566715576,
|
30263 |
+
"grad_norm": 0.07179772108793259,
|
30264 |
+
"learning_rate": 0.0009820724456836938,
|
30265 |
+
"loss": 1.5556,
|
30266 |
+
"step": 8606
|
30267 |
+
},
|
30268 |
+
{
|
30269 |
+
"epoch": 0.3827308701258281,
|
30270 |
+
"grad_norm": 0.07555480301380157,
|
30271 |
+
"learning_rate": 0.0009820630731189218,
|
30272 |
+
"loss": 1.5486,
|
30273 |
+
"step": 8608
|
30274 |
+
},
|
30275 |
+
{
|
30276 |
+
"epoch": 0.3828197945845005,
|
30277 |
+
"grad_norm": 0.07220586389303207,
|
30278 |
+
"learning_rate": 0.0009820536981495391,
|
30279 |
+
"loss": 1.5496,
|
30280 |
+
"step": 8610
|
30281 |
+
},
|
30282 |
+
{
|
30283 |
+
"epoch": 0.3829087190431728,
|
30284 |
+
"grad_norm": 0.07378540188074112,
|
30285 |
+
"learning_rate": 0.0009820443207755928,
|
30286 |
+
"loss": 1.5498,
|
30287 |
+
"step": 8612
|
30288 |
+
},
|
30289 |
+
{
|
30290 |
+
"epoch": 0.3829976435018452,
|
30291 |
+
"grad_norm": 0.07111547887325287,
|
30292 |
+
"learning_rate": 0.000982034940997129,
|
30293 |
+
"loss": 1.5494,
|
30294 |
+
"step": 8614
|
30295 |
+
},
|
30296 |
+
{
|
30297 |
+
"epoch": 0.38308656796051754,
|
30298 |
+
"grad_norm": 0.07289877533912659,
|
30299 |
+
"learning_rate": 0.000982025558814195,
|
30300 |
+
"loss": 1.5496,
|
30301 |
+
"step": 8616
|
30302 |
+
},
|
30303 |
+
{
|
30304 |
+
"epoch": 0.3831754924191899,
|
30305 |
+
"grad_norm": 0.07126215100288391,
|
30306 |
+
"learning_rate": 0.0009820161742268379,
|
30307 |
+
"loss": 1.5444,
|
30308 |
+
"step": 8618
|
30309 |
+
},
|
30310 |
+
{
|
30311 |
+
"epoch": 0.3832644168778623,
|
30312 |
+
"grad_norm": 0.07101909816265106,
|
30313 |
+
"learning_rate": 0.0009820067872351038,
|
30314 |
+
"loss": 1.5456,
|
30315 |
+
"step": 8620
|
30316 |
+
},
|
30317 |
+
{
|
30318 |
+
"epoch": 0.3833533413365346,
|
30319 |
+
"grad_norm": 0.0723530650138855,
|
30320 |
+
"learning_rate": 0.0009819973978390398,
|
30321 |
+
"loss": 1.5452,
|
30322 |
+
"step": 8622
|
30323 |
+
},
|
30324 |
+
{
|
30325 |
+
"epoch": 0.38344226579520696,
|
30326 |
+
"grad_norm": 0.07396049052476883,
|
30327 |
+
"learning_rate": 0.0009819880060386928,
|
30328 |
+
"loss": 1.5498,
|
30329 |
+
"step": 8624
|
30330 |
+
},
|
30331 |
+
{
|
30332 |
+
"epoch": 0.38353119025387933,
|
30333 |
+
"grad_norm": 0.07335137575864792,
|
30334 |
+
"learning_rate": 0.0009819786118341097,
|
30335 |
+
"loss": 1.554,
|
30336 |
+
"step": 8626
|
30337 |
+
},
|
30338 |
+
{
|
30339 |
+
"epoch": 0.3836201147125517,
|
30340 |
+
"grad_norm": 0.07116411626338959,
|
30341 |
+
"learning_rate": 0.0009819692152253372,
|
30342 |
+
"loss": 1.5461,
|
30343 |
+
"step": 8628
|
30344 |
+
},
|
30345 |
+
{
|
30346 |
+
"epoch": 0.38370903917122406,
|
30347 |
+
"grad_norm": 0.07290813326835632,
|
30348 |
+
"learning_rate": 0.0009819598162124224,
|
30349 |
+
"loss": 1.5431,
|
30350 |
+
"step": 8630
|
30351 |
+
},
|
30352 |
+
{
|
30353 |
+
"epoch": 0.3837979636298964,
|
30354 |
+
"grad_norm": 0.07139547169208527,
|
30355 |
+
"learning_rate": 0.0009819504147954119,
|
30356 |
+
"loss": 1.5439,
|
30357 |
+
"step": 8632
|
30358 |
+
},
|
30359 |
+
{
|
30360 |
+
"epoch": 0.38388688808856875,
|
30361 |
+
"grad_norm": 0.06995617598295212,
|
30362 |
+
"learning_rate": 0.0009819410109743528,
|
30363 |
+
"loss": 1.5455,
|
30364 |
+
"step": 8634
|
30365 |
+
},
|
30366 |
+
{
|
30367 |
+
"epoch": 0.3839758125472411,
|
30368 |
+
"grad_norm": 0.07103978097438812,
|
30369 |
+
"learning_rate": 0.0009819316047492918,
|
30370 |
+
"loss": 1.5439,
|
30371 |
+
"step": 8636
|
30372 |
+
},
|
30373 |
+
{
|
30374 |
+
"epoch": 0.3840647370059135,
|
30375 |
+
"grad_norm": 0.07140542566776276,
|
30376 |
+
"learning_rate": 0.000981922196120276,
|
30377 |
+
"loss": 1.5404,
|
30378 |
+
"step": 8638
|
30379 |
+
},
|
30380 |
+
{
|
30381 |
+
"epoch": 0.38415366146458585,
|
30382 |
+
"grad_norm": 0.0695871040225029,
|
30383 |
+
"learning_rate": 0.0009819127850873525,
|
30384 |
+
"loss": 1.5443,
|
30385 |
+
"step": 8640
|
30386 |
+
},
|
30387 |
+
{
|
30388 |
+
"epoch": 0.3842425859232582,
|
30389 |
+
"grad_norm": 0.07270847260951996,
|
30390 |
+
"learning_rate": 0.0009819033716505679,
|
30391 |
+
"loss": 1.5423,
|
30392 |
+
"step": 8642
|
30393 |
+
},
|
30394 |
+
{
|
30395 |
+
"epoch": 0.38433151038193053,
|
30396 |
+
"grad_norm": 0.07431674748659134,
|
30397 |
+
"learning_rate": 0.000981893955809969,
|
30398 |
+
"loss": 1.5434,
|
30399 |
+
"step": 8644
|
30400 |
+
},
|
30401 |
+
{
|
30402 |
+
"epoch": 0.3844204348406029,
|
30403 |
+
"grad_norm": 0.07491683959960938,
|
30404 |
+
"learning_rate": 0.0009818845375656035,
|
30405 |
+
"loss": 1.5443,
|
30406 |
+
"step": 8646
|
30407 |
+
},
|
30408 |
+
{
|
30409 |
+
"epoch": 0.38450935929927527,
|
30410 |
+
"grad_norm": 0.0724981278181076,
|
30411 |
+
"learning_rate": 0.0009818751169175177,
|
30412 |
+
"loss": 1.5455,
|
30413 |
+
"step": 8648
|
30414 |
+
},
|
30415 |
+
{
|
30416 |
+
"epoch": 0.38459828375794763,
|
30417 |
+
"grad_norm": 0.07452180981636047,
|
30418 |
+
"learning_rate": 0.0009818656938657589,
|
30419 |
+
"loss": 1.55,
|
30420 |
+
"step": 8650
|
30421 |
+
},
|
30422 |
+
{
|
30423 |
+
"epoch": 0.38468720821662,
|
30424 |
+
"grad_norm": 0.07273737341165543,
|
30425 |
+
"learning_rate": 0.000981856268410374,
|
30426 |
+
"loss": 1.5422,
|
30427 |
+
"step": 8652
|
30428 |
+
},
|
30429 |
+
{
|
30430 |
+
"epoch": 0.3847761326752923,
|
30431 |
+
"grad_norm": 0.07237744331359863,
|
30432 |
+
"learning_rate": 0.0009818468405514101,
|
30433 |
+
"loss": 1.5468,
|
30434 |
+
"step": 8654
|
30435 |
+
},
|
30436 |
+
{
|
30437 |
+
"epoch": 0.3848650571339647,
|
30438 |
+
"grad_norm": 0.07318416237831116,
|
30439 |
+
"learning_rate": 0.0009818374102889141,
|
30440 |
+
"loss": 1.5456,
|
30441 |
+
"step": 8656
|
30442 |
+
},
|
30443 |
+
{
|
30444 |
+
"epoch": 0.38495398159263705,
|
30445 |
+
"grad_norm": 0.0707939937710762,
|
30446 |
+
"learning_rate": 0.0009818279776229333,
|
30447 |
+
"loss": 1.5468,
|
30448 |
+
"step": 8658
|
30449 |
+
},
|
30450 |
+
{
|
30451 |
+
"epoch": 0.3850429060513094,
|
30452 |
+
"grad_norm": 0.07714451104402542,
|
30453 |
+
"learning_rate": 0.0009818185425535145,
|
30454 |
+
"loss": 1.55,
|
30455 |
+
"step": 8660
|
30456 |
+
},
|
30457 |
+
{
|
30458 |
+
"epoch": 0.3851318305099818,
|
30459 |
+
"grad_norm": 0.07308610528707504,
|
30460 |
+
"learning_rate": 0.0009818091050807047,
|
30461 |
+
"loss": 1.5422,
|
30462 |
+
"step": 8662
|
30463 |
+
},
|
30464 |
+
{
|
30465 |
+
"epoch": 0.38522075496865416,
|
30466 |
+
"grad_norm": 0.07573612779378891,
|
30467 |
+
"learning_rate": 0.0009817996652045512,
|
30468 |
+
"loss": 1.5528,
|
30469 |
+
"step": 8664
|
30470 |
+
},
|
30471 |
+
{
|
30472 |
+
"epoch": 0.38530967942732647,
|
30473 |
+
"grad_norm": 0.07352116703987122,
|
30474 |
+
"learning_rate": 0.0009817902229251009,
|
30475 |
+
"loss": 1.5453,
|
30476 |
+
"step": 8666
|
30477 |
+
},
|
30478 |
+
{
|
30479 |
+
"epoch": 0.38539860388599884,
|
30480 |
+
"grad_norm": 0.0721651017665863,
|
30481 |
+
"learning_rate": 0.000981780778242401,
|
30482 |
+
"loss": 1.5484,
|
30483 |
+
"step": 8668
|
30484 |
+
},
|
30485 |
+
{
|
30486 |
+
"epoch": 0.3854875283446712,
|
30487 |
+
"grad_norm": 0.07210839539766312,
|
30488 |
+
"learning_rate": 0.0009817713311564987,
|
30489 |
+
"loss": 1.5465,
|
30490 |
+
"step": 8670
|
30491 |
+
},
|
30492 |
+
{
|
30493 |
+
"epoch": 0.3855764528033436,
|
30494 |
+
"grad_norm": 0.07044050097465515,
|
30495 |
+
"learning_rate": 0.000981761881667441,
|
30496 |
+
"loss": 1.5464,
|
30497 |
+
"step": 8672
|
30498 |
+
},
|
30499 |
+
{
|
30500 |
+
"epoch": 0.38566537726201594,
|
30501 |
+
"grad_norm": 0.07243029773235321,
|
30502 |
+
"learning_rate": 0.000981752429775275,
|
30503 |
+
"loss": 1.5452,
|
30504 |
+
"step": 8674
|
30505 |
+
},
|
30506 |
+
{
|
30507 |
+
"epoch": 0.38575430172068825,
|
30508 |
+
"grad_norm": 0.07179372757673264,
|
30509 |
+
"learning_rate": 0.000981742975480048,
|
30510 |
+
"loss": 1.5457,
|
30511 |
+
"step": 8676
|
30512 |
+
},
|
30513 |
+
{
|
30514 |
+
"epoch": 0.3858432261793606,
|
30515 |
+
"grad_norm": 0.07337214797735214,
|
30516 |
+
"learning_rate": 0.000981733518781807,
|
30517 |
+
"loss": 1.5459,
|
30518 |
+
"step": 8678
|
30519 |
+
},
|
30520 |
+
{
|
30521 |
+
"epoch": 0.385932150638033,
|
30522 |
+
"grad_norm": 0.0745837390422821,
|
30523 |
+
"learning_rate": 0.0009817240596805994,
|
30524 |
+
"loss": 1.5494,
|
30525 |
+
"step": 8680
|
30526 |
+
},
|
30527 |
+
{
|
30528 |
+
"epoch": 0.38602107509670536,
|
30529 |
+
"grad_norm": 0.072391077876091,
|
30530 |
+
"learning_rate": 0.0009817145981764722,
|
30531 |
+
"loss": 1.5427,
|
30532 |
+
"step": 8682
|
30533 |
+
},
|
30534 |
+
{
|
30535 |
+
"epoch": 0.3861099995553777,
|
30536 |
+
"grad_norm": 0.07101153582334518,
|
30537 |
+
"learning_rate": 0.0009817051342694725,
|
30538 |
+
"loss": 1.5478,
|
30539 |
+
"step": 8684
|
30540 |
+
},
|
30541 |
+
{
|
30542 |
+
"epoch": 0.38619892401405004,
|
30543 |
+
"grad_norm": 0.07185351103544235,
|
30544 |
+
"learning_rate": 0.0009816956679596475,
|
30545 |
+
"loss": 1.5428,
|
30546 |
+
"step": 8686
|
30547 |
+
},
|
30548 |
+
{
|
30549 |
+
"epoch": 0.3862878484727224,
|
30550 |
+
"grad_norm": 0.07106965035200119,
|
30551 |
+
"learning_rate": 0.000981686199247045,
|
30552 |
+
"loss": 1.5515,
|
30553 |
+
"step": 8688
|
30554 |
+
},
|
30555 |
+
{
|
30556 |
+
"epoch": 0.3863767729313948,
|
30557 |
+
"grad_norm": 0.07175727933645248,
|
30558 |
+
"learning_rate": 0.0009816767281317113,
|
30559 |
+
"loss": 1.5422,
|
30560 |
+
"step": 8690
|
30561 |
+
},
|
30562 |
+
{
|
30563 |
+
"epoch": 0.38646569739006714,
|
30564 |
+
"grad_norm": 0.0721508041024208,
|
30565 |
+
"learning_rate": 0.0009816672546136944,
|
30566 |
+
"loss": 1.5444,
|
30567 |
+
"step": 8692
|
30568 |
+
},
|
30569 |
+
{
|
30570 |
+
"epoch": 0.3865546218487395,
|
30571 |
+
"grad_norm": 0.07354867458343506,
|
30572 |
+
"learning_rate": 0.0009816577786930414,
|
30573 |
+
"loss": 1.5476,
|
30574 |
+
"step": 8694
|
30575 |
+
},
|
30576 |
+
{
|
30577 |
+
"epoch": 0.3866435463074119,
|
30578 |
+
"grad_norm": 0.07081717252731323,
|
30579 |
+
"learning_rate": 0.000981648300369799,
|
30580 |
+
"loss": 1.5489,
|
30581 |
+
"step": 8696
|
30582 |
+
},
|
30583 |
+
{
|
30584 |
+
"epoch": 0.3867324707660842,
|
30585 |
+
"grad_norm": 0.07528056204319,
|
30586 |
+
"learning_rate": 0.0009816388196440154,
|
30587 |
+
"loss": 1.5507,
|
30588 |
+
"step": 8698
|
30589 |
+
},
|
30590 |
+
{
|
30591 |
+
"epoch": 0.38682139522475656,
|
30592 |
+
"grad_norm": 0.07244568318128586,
|
30593 |
+
"learning_rate": 0.000981629336515737,
|
30594 |
+
"loss": 1.5481,
|
30595 |
+
"step": 8700
|
30596 |
+
},
|
30597 |
+
{
|
30598 |
+
"epoch": 0.3869103196834289,
|
30599 |
+
"grad_norm": 0.07286922633647919,
|
30600 |
+
"learning_rate": 0.0009816198509850118,
|
30601 |
+
"loss": 1.5445,
|
30602 |
+
"step": 8702
|
30603 |
+
},
|
30604 |
+
{
|
30605 |
+
"epoch": 0.3869992441421013,
|
30606 |
+
"grad_norm": 0.06972247362136841,
|
30607 |
+
"learning_rate": 0.000981610363051887,
|
30608 |
+
"loss": 1.5486,
|
30609 |
+
"step": 8704
|
30610 |
+
},
|
30611 |
+
{
|
30612 |
+
"epoch": 0.38708816860077366,
|
30613 |
+
"grad_norm": 0.07090491056442261,
|
30614 |
+
"learning_rate": 0.0009816008727164093,
|
30615 |
+
"loss": 1.5433,
|
30616 |
+
"step": 8706
|
30617 |
+
},
|
30618 |
+
{
|
30619 |
+
"epoch": 0.387177093059446,
|
30620 |
+
"grad_norm": 0.0725286453962326,
|
30621 |
+
"learning_rate": 0.0009815913799786269,
|
30622 |
+
"loss": 1.5452,
|
30623 |
+
"step": 8708
|
30624 |
+
},
|
30625 |
+
{
|
30626 |
+
"epoch": 0.38726601751811834,
|
30627 |
+
"grad_norm": 0.07091175764799118,
|
30628 |
+
"learning_rate": 0.0009815818848385865,
|
30629 |
+
"loss": 1.5429,
|
30630 |
+
"step": 8710
|
30631 |
+
},
|
30632 |
+
{
|
30633 |
+
"epoch": 0.3873549419767907,
|
30634 |
+
"grad_norm": 0.07452746480703354,
|
30635 |
+
"learning_rate": 0.0009815723872963358,
|
30636 |
+
"loss": 1.5374,
|
30637 |
+
"step": 8712
|
30638 |
+
},
|
30639 |
+
{
|
30640 |
+
"epoch": 0.3874438664354631,
|
30641 |
+
"grad_norm": 0.07199020683765411,
|
30642 |
+
"learning_rate": 0.0009815628873519223,
|
30643 |
+
"loss": 1.5389,
|
30644 |
+
"step": 8714
|
30645 |
+
},
|
30646 |
+
{
|
30647 |
+
"epoch": 0.38753279089413545,
|
30648 |
+
"grad_norm": 0.07434893399477005,
|
30649 |
+
"learning_rate": 0.0009815533850053928,
|
30650 |
+
"loss": 1.5465,
|
30651 |
+
"step": 8716
|
30652 |
+
},
|
30653 |
+
{
|
30654 |
+
"epoch": 0.3876217153528078,
|
30655 |
+
"grad_norm": 0.0721900537610054,
|
30656 |
+
"learning_rate": 0.0009815438802567955,
|
30657 |
+
"loss": 1.5464,
|
30658 |
+
"step": 8718
|
30659 |
+
},
|
30660 |
+
{
|
30661 |
+
"epoch": 0.38771063981148013,
|
30662 |
+
"grad_norm": 0.0768774077296257,
|
30663 |
+
"learning_rate": 0.000981534373106177,
|
30664 |
+
"loss": 1.5477,
|
30665 |
+
"step": 8720
|
30666 |
+
},
|
30667 |
+
{
|
30668 |
+
"epoch": 0.3877995642701525,
|
30669 |
+
"grad_norm": 0.07451849430799484,
|
30670 |
+
"learning_rate": 0.0009815248635535853,
|
30671 |
+
"loss": 1.5439,
|
30672 |
+
"step": 8722
|
30673 |
+
},
|
30674 |
+
{
|
30675 |
+
"epoch": 0.38788848872882487,
|
30676 |
+
"grad_norm": 0.0778982937335968,
|
30677 |
+
"learning_rate": 0.0009815153515990679,
|
30678 |
+
"loss": 1.5425,
|
30679 |
+
"step": 8724
|
30680 |
+
},
|
30681 |
+
{
|
30682 |
+
"epoch": 0.38797741318749723,
|
30683 |
+
"grad_norm": 0.07124931365251541,
|
30684 |
+
"learning_rate": 0.0009815058372426716,
|
30685 |
+
"loss": 1.5497,
|
30686 |
+
"step": 8726
|
30687 |
+
},
|
30688 |
+
{
|
30689 |
+
"epoch": 0.3880663376461696,
|
30690 |
+
"grad_norm": 0.0718124508857727,
|
30691 |
+
"learning_rate": 0.0009814963204844444,
|
30692 |
+
"loss": 1.5436,
|
30693 |
+
"step": 8728
|
30694 |
+
},
|
30695 |
+
{
|
30696 |
+
"epoch": 0.3881552621048419,
|
30697 |
+
"grad_norm": 0.07384263724088669,
|
30698 |
+
"learning_rate": 0.0009814868013244336,
|
30699 |
+
"loss": 1.5444,
|
30700 |
+
"step": 8730
|
30701 |
+
},
|
30702 |
+
{
|
30703 |
+
"epoch": 0.3882441865635143,
|
30704 |
+
"grad_norm": 0.07269671559333801,
|
30705 |
+
"learning_rate": 0.0009814772797626867,
|
30706 |
+
"loss": 1.5482,
|
30707 |
+
"step": 8732
|
30708 |
+
},
|
30709 |
+
{
|
30710 |
+
"epoch": 0.38833311102218665,
|
30711 |
+
"grad_norm": 0.0724627822637558,
|
30712 |
+
"learning_rate": 0.0009814677557992513,
|
30713 |
+
"loss": 1.5495,
|
30714 |
+
"step": 8734
|
30715 |
+
},
|
30716 |
+
{
|
30717 |
+
"epoch": 0.388422035480859,
|
30718 |
+
"grad_norm": 0.07544320821762085,
|
30719 |
+
"learning_rate": 0.0009814582294341747,
|
30720 |
+
"loss": 1.5459,
|
30721 |
+
"step": 8736
|
30722 |
+
},
|
30723 |
+
{
|
30724 |
+
"epoch": 0.3885109599395314,
|
30725 |
+
"grad_norm": 0.0732409730553627,
|
30726 |
+
"learning_rate": 0.0009814487006675047,
|
30727 |
+
"loss": 1.543,
|
30728 |
+
"step": 8738
|
30729 |
+
},
|
30730 |
+
{
|
30731 |
+
"epoch": 0.3885998843982037,
|
30732 |
+
"grad_norm": 0.071528859436512,
|
30733 |
+
"learning_rate": 0.0009814391694992885,
|
30734 |
+
"loss": 1.5489,
|
30735 |
+
"step": 8740
|
30736 |
+
},
|
30737 |
+
{
|
30738 |
+
"epoch": 0.38868880885687607,
|
30739 |
+
"grad_norm": 0.07471916079521179,
|
30740 |
+
"learning_rate": 0.000981429635929574,
|
30741 |
+
"loss": 1.5427,
|
30742 |
+
"step": 8742
|
30743 |
+
},
|
30744 |
+
{
|
30745 |
+
"epoch": 0.38877773331554843,
|
30746 |
+
"grad_norm": 0.07275721430778503,
|
30747 |
+
"learning_rate": 0.0009814200999584085,
|
30748 |
+
"loss": 1.5449,
|
30749 |
+
"step": 8744
|
30750 |
+
},
|
30751 |
+
{
|
30752 |
+
"epoch": 0.3888666577742208,
|
30753 |
+
"grad_norm": 0.07258014380931854,
|
30754 |
+
"learning_rate": 0.0009814105615858395,
|
30755 |
+
"loss": 1.543,
|
30756 |
+
"step": 8746
|
30757 |
+
},
|
30758 |
+
{
|
30759 |
+
"epoch": 0.38895558223289317,
|
30760 |
+
"grad_norm": 0.07144364714622498,
|
30761 |
+
"learning_rate": 0.0009814010208119147,
|
30762 |
+
"loss": 1.5451,
|
30763 |
+
"step": 8748
|
30764 |
+
},
|
30765 |
+
{
|
30766 |
+
"epoch": 0.38904450669156554,
|
30767 |
+
"grad_norm": 0.07227019965648651,
|
30768 |
+
"learning_rate": 0.0009813914776366816,
|
30769 |
+
"loss": 1.5423,
|
30770 |
+
"step": 8750
|
30771 |
+
},
|
30772 |
+
{
|
30773 |
+
"epoch": 0.38913343115023785,
|
30774 |
+
"grad_norm": 0.0736045092344284,
|
30775 |
+
"learning_rate": 0.0009813819320601883,
|
30776 |
+
"loss": 1.5435,
|
30777 |
+
"step": 8752
|
30778 |
+
},
|
30779 |
+
{
|
30780 |
+
"epoch": 0.3892223556089102,
|
30781 |
+
"grad_norm": 0.07181594520807266,
|
30782 |
+
"learning_rate": 0.0009813723840824819,
|
30783 |
+
"loss": 1.5413,
|
30784 |
+
"step": 8754
|
30785 |
+
},
|
30786 |
+
{
|
30787 |
+
"epoch": 0.3893112800675826,
|
30788 |
+
"grad_norm": 0.07333064079284668,
|
30789 |
+
"learning_rate": 0.0009813628337036098,
|
30790 |
+
"loss": 1.5514,
|
30791 |
+
"step": 8756
|
30792 |
+
},
|
30793 |
+
{
|
30794 |
+
"epoch": 0.38940020452625496,
|
30795 |
+
"grad_norm": 0.07221698760986328,
|
30796 |
+
"learning_rate": 0.0009813532809236202,
|
30797 |
+
"loss": 1.5435,
|
30798 |
+
"step": 8758
|
30799 |
+
},
|
30800 |
+
{
|
30801 |
+
"epoch": 0.3894891289849273,
|
30802 |
+
"grad_norm": 0.07169820368289948,
|
30803 |
+
"learning_rate": 0.0009813437257425606,
|
30804 |
+
"loss": 1.5474,
|
30805 |
+
"step": 8760
|
30806 |
+
},
|
30807 |
+
{
|
30808 |
+
"epoch": 0.38957805344359964,
|
30809 |
+
"grad_norm": 0.07131782174110413,
|
30810 |
+
"learning_rate": 0.0009813341681604785,
|
30811 |
+
"loss": 1.5431,
|
30812 |
+
"step": 8762
|
30813 |
+
},
|
30814 |
+
{
|
30815 |
+
"epoch": 0.389666977902272,
|
30816 |
+
"grad_norm": 0.07381466031074524,
|
30817 |
+
"learning_rate": 0.0009813246081774218,
|
30818 |
+
"loss": 1.551,
|
30819 |
+
"step": 8764
|
30820 |
+
},
|
30821 |
+
{
|
30822 |
+
"epoch": 0.3897559023609444,
|
30823 |
+
"grad_norm": 0.07252717018127441,
|
30824 |
+
"learning_rate": 0.000981315045793438,
|
30825 |
+
"loss": 1.542,
|
30826 |
+
"step": 8766
|
30827 |
+
},
|
30828 |
+
{
|
30829 |
+
"epoch": 0.38984482681961674,
|
30830 |
+
"grad_norm": 0.07131364196538925,
|
30831 |
+
"learning_rate": 0.0009813054810085748,
|
30832 |
+
"loss": 1.5416,
|
30833 |
+
"step": 8768
|
30834 |
+
},
|
30835 |
+
{
|
30836 |
+
"epoch": 0.3899337512782891,
|
30837 |
+
"grad_norm": 0.07210860401391983,
|
30838 |
+
"learning_rate": 0.00098129591382288,
|
30839 |
+
"loss": 1.5414,
|
30840 |
+
"step": 8770
|
30841 |
+
},
|
30842 |
+
{
|
30843 |
+
"epoch": 0.3900226757369615,
|
30844 |
+
"grad_norm": 0.07035349309444427,
|
30845 |
+
"learning_rate": 0.0009812863442364014,
|
30846 |
+
"loss": 1.5424,
|
30847 |
+
"step": 8772
|
30848 |
+
},
|
30849 |
+
{
|
30850 |
+
"epoch": 0.3901116001956338,
|
30851 |
+
"grad_norm": 0.07026588171720505,
|
30852 |
+
"learning_rate": 0.0009812767722491864,
|
30853 |
+
"loss": 1.5484,
|
30854 |
+
"step": 8774
|
30855 |
+
},
|
30856 |
+
{
|
30857 |
+
"epoch": 0.39020052465430616,
|
30858 |
+
"grad_norm": 0.07101976126432419,
|
30859 |
+
"learning_rate": 0.000981267197861283,
|
30860 |
+
"loss": 1.5419,
|
30861 |
+
"step": 8776
|
30862 |
+
},
|
30863 |
+
{
|
30864 |
+
"epoch": 0.3902894491129785,
|
30865 |
+
"grad_norm": 0.07162394374608994,
|
30866 |
+
"learning_rate": 0.0009812576210727392,
|
30867 |
+
"loss": 1.5482,
|
30868 |
+
"step": 8778
|
30869 |
+
},
|
30870 |
+
{
|
30871 |
+
"epoch": 0.3903783735716509,
|
30872 |
+
"grad_norm": 0.07118743658065796,
|
30873 |
+
"learning_rate": 0.0009812480418836024,
|
30874 |
+
"loss": 1.5401,
|
30875 |
+
"step": 8780
|
30876 |
+
},
|
30877 |
+
{
|
30878 |
+
"epoch": 0.39046729803032326,
|
30879 |
+
"grad_norm": 0.0704973116517067,
|
30880 |
+
"learning_rate": 0.0009812384602939203,
|
30881 |
+
"loss": 1.5413,
|
30882 |
+
"step": 8782
|
30883 |
+
},
|
30884 |
+
{
|
30885 |
+
"epoch": 0.3905562224889956,
|
30886 |
+
"grad_norm": 0.07269991189241409,
|
30887 |
+
"learning_rate": 0.000981228876303741,
|
30888 |
+
"loss": 1.5438,
|
30889 |
+
"step": 8784
|
30890 |
+
},
|
30891 |
+
{
|
30892 |
+
"epoch": 0.39064514694766794,
|
30893 |
+
"grad_norm": 0.07165592908859253,
|
30894 |
+
"learning_rate": 0.0009812192899131122,
|
30895 |
+
"loss": 1.548,
|
30896 |
+
"step": 8786
|
30897 |
+
},
|
30898 |
+
{
|
30899 |
+
"epoch": 0.3907340714063403,
|
30900 |
+
"grad_norm": 0.07121030986309052,
|
30901 |
+
"learning_rate": 0.0009812097011220816,
|
30902 |
+
"loss": 1.5459,
|
30903 |
+
"step": 8788
|
30904 |
+
},
|
30905 |
+
{
|
30906 |
+
"epoch": 0.3908229958650127,
|
30907 |
+
"grad_norm": 0.07146286964416504,
|
30908 |
+
"learning_rate": 0.0009812001099306974,
|
30909 |
+
"loss": 1.539,
|
30910 |
+
"step": 8790
|
30911 |
+
},
|
30912 |
+
{
|
30913 |
+
"epoch": 0.39091192032368505,
|
30914 |
+
"grad_norm": 0.07126247137784958,
|
30915 |
+
"learning_rate": 0.0009811905163390068,
|
30916 |
+
"loss": 1.5442,
|
30917 |
+
"step": 8792
|
30918 |
+
},
|
30919 |
+
{
|
30920 |
+
"epoch": 0.3910008447823574,
|
30921 |
+
"grad_norm": 0.06753192096948624,
|
30922 |
+
"learning_rate": 0.0009811809203470582,
|
30923 |
+
"loss": 1.546,
|
30924 |
+
"step": 8794
|
30925 |
+
},
|
30926 |
+
{
|
30927 |
+
"epoch": 0.3910897692410297,
|
30928 |
+
"grad_norm": 0.0699109360575676,
|
30929 |
+
"learning_rate": 0.0009811713219548992,
|
30930 |
+
"loss": 1.542,
|
30931 |
+
"step": 8796
|
30932 |
+
},
|
30933 |
+
{
|
30934 |
+
"epoch": 0.3911786936997021,
|
30935 |
+
"grad_norm": 0.0715608224272728,
|
30936 |
+
"learning_rate": 0.0009811617211625781,
|
30937 |
+
"loss": 1.5429,
|
30938 |
+
"step": 8798
|
30939 |
+
},
|
30940 |
+
{
|
30941 |
+
"epoch": 0.39126761815837446,
|
30942 |
+
"grad_norm": 0.07075057178735733,
|
30943 |
+
"learning_rate": 0.0009811521179701422,
|
30944 |
+
"loss": 1.5433,
|
30945 |
+
"step": 8800
|
30946 |
+
},
|
30947 |
+
{
|
30948 |
+
"epoch": 0.39135654261704683,
|
30949 |
+
"grad_norm": 0.07018981128931046,
|
30950 |
+
"learning_rate": 0.0009811425123776398,
|
30951 |
+
"loss": 1.5432,
|
30952 |
+
"step": 8802
|
30953 |
+
},
|
30954 |
+
{
|
30955 |
+
"epoch": 0.3914454670757192,
|
30956 |
+
"grad_norm": 0.07095321267843246,
|
30957 |
+
"learning_rate": 0.0009811329043851185,
|
30958 |
+
"loss": 1.5474,
|
30959 |
+
"step": 8804
|
30960 |
+
},
|
30961 |
+
{
|
30962 |
+
"epoch": 0.3915343915343915,
|
30963 |
+
"grad_norm": 0.07263996452093124,
|
30964 |
+
"learning_rate": 0.0009811232939926267,
|
30965 |
+
"loss": 1.5394,
|
30966 |
+
"step": 8806
|
30967 |
+
},
|
30968 |
+
{
|
30969 |
+
"epoch": 0.3916233159930639,
|
30970 |
+
"grad_norm": 0.07189413160085678,
|
30971 |
+
"learning_rate": 0.0009811136812002117,
|
30972 |
+
"loss": 1.5441,
|
30973 |
+
"step": 8808
|
30974 |
+
},
|
30975 |
+
{
|
30976 |
+
"epoch": 0.39171224045173625,
|
30977 |
+
"grad_norm": 0.0719655454158783,
|
30978 |
+
"learning_rate": 0.000981104066007922,
|
30979 |
+
"loss": 1.545,
|
30980 |
+
"step": 8810
|
30981 |
+
},
|
30982 |
+
{
|
30983 |
+
"epoch": 0.3918011649104086,
|
30984 |
+
"grad_norm": 0.0707225650548935,
|
30985 |
+
"learning_rate": 0.0009810944484158052,
|
30986 |
+
"loss": 1.5435,
|
30987 |
+
"step": 8812
|
30988 |
+
},
|
30989 |
+
{
|
30990 |
+
"epoch": 0.391890089369081,
|
30991 |
+
"grad_norm": 0.07110429555177689,
|
30992 |
+
"learning_rate": 0.0009810848284239097,
|
30993 |
+
"loss": 1.5402,
|
30994 |
+
"step": 8814
|
30995 |
+
},
|
30996 |
+
{
|
30997 |
+
"epoch": 0.3919790138277533,
|
30998 |
+
"grad_norm": 0.07303628325462341,
|
30999 |
+
"learning_rate": 0.000981075206032283,
|
31000 |
+
"loss": 1.5408,
|
31001 |
+
"step": 8816
|
31002 |
+
},
|
31003 |
+
{
|
31004 |
+
"epoch": 0.39206793828642567,
|
31005 |
+
"grad_norm": 0.07266764342784882,
|
31006 |
+
"learning_rate": 0.0009810655812409735,
|
31007 |
+
"loss": 1.5399,
|
31008 |
+
"step": 8818
|
31009 |
+
},
|
31010 |
+
{
|
31011 |
+
"epoch": 0.39215686274509803,
|
31012 |
+
"grad_norm": 0.07289233058691025,
|
31013 |
+
"learning_rate": 0.0009810559540500289,
|
31014 |
+
"loss": 1.5437,
|
31015 |
+
"step": 8820
|
31016 |
+
},
|
31017 |
+
{
|
31018 |
+
"epoch": 0.3922457872037704,
|
31019 |
+
"grad_norm": 0.07148449867963791,
|
31020 |
+
"learning_rate": 0.0009810463244594974,
|
31021 |
+
"loss": 1.5445,
|
31022 |
+
"step": 8822
|
31023 |
+
},
|
31024 |
+
{
|
31025 |
+
"epoch": 0.39233471166244277,
|
31026 |
+
"grad_norm": 0.0700794905424118,
|
31027 |
+
"learning_rate": 0.0009810366924694269,
|
31028 |
+
"loss": 1.5361,
|
31029 |
+
"step": 8824
|
31030 |
+
},
|
31031 |
+
{
|
31032 |
+
"epoch": 0.39242363612111514,
|
31033 |
+
"grad_norm": 0.07210569828748703,
|
31034 |
+
"learning_rate": 0.0009810270580798656,
|
31035 |
+
"loss": 1.5488,
|
31036 |
+
"step": 8826
|
31037 |
+
},
|
31038 |
+
{
|
31039 |
+
"epoch": 0.39251256057978745,
|
31040 |
+
"grad_norm": 0.07074844092130661,
|
31041 |
+
"learning_rate": 0.0009810174212908614,
|
31042 |
+
"loss": 1.5454,
|
31043 |
+
"step": 8828
|
31044 |
+
},
|
31045 |
+
{
|
31046 |
+
"epoch": 0.3926014850384598,
|
31047 |
+
"grad_norm": 0.07008279860019684,
|
31048 |
+
"learning_rate": 0.0009810077821024625,
|
31049 |
+
"loss": 1.5463,
|
31050 |
+
"step": 8830
|
31051 |
+
},
|
31052 |
+
{
|
31053 |
+
"epoch": 0.3926904094971322,
|
31054 |
+
"grad_norm": 0.07124333083629608,
|
31055 |
+
"learning_rate": 0.0009809981405147171,
|
31056 |
+
"loss": 1.5415,
|
31057 |
+
"step": 8832
|
31058 |
+
},
|
31059 |
+
{
|
31060 |
+
"epoch": 0.39277933395580455,
|
31061 |
+
"grad_norm": 0.06977679580450058,
|
31062 |
+
"learning_rate": 0.000980988496527673,
|
31063 |
+
"loss": 1.5459,
|
31064 |
+
"step": 8834
|
31065 |
+
},
|
31066 |
+
{
|
31067 |
+
"epoch": 0.3928682584144769,
|
31068 |
+
"grad_norm": 0.0721663162112236,
|
31069 |
+
"learning_rate": 0.0009809788501413783,
|
31070 |
+
"loss": 1.5433,
|
31071 |
+
"step": 8836
|
31072 |
+
},
|
31073 |
+
{
|
31074 |
+
"epoch": 0.39295718287314924,
|
31075 |
+
"grad_norm": 0.07385817915201187,
|
31076 |
+
"learning_rate": 0.0009809692013558816,
|
31077 |
+
"loss": 1.545,
|
31078 |
+
"step": 8838
|
31079 |
+
},
|
31080 |
+
{
|
31081 |
+
"epoch": 0.3930461073318216,
|
31082 |
+
"grad_norm": 0.07491130381822586,
|
31083 |
+
"learning_rate": 0.0009809595501712302,
|
31084 |
+
"loss": 1.5419,
|
31085 |
+
"step": 8840
|
31086 |
+
},
|
31087 |
+
{
|
31088 |
+
"epoch": 0.39313503179049397,
|
31089 |
+
"grad_norm": 0.07294177263975143,
|
31090 |
+
"learning_rate": 0.0009809498965874732,
|
31091 |
+
"loss": 1.5461,
|
31092 |
+
"step": 8842
|
31093 |
+
},
|
31094 |
+
{
|
31095 |
+
"epoch": 0.39322395624916634,
|
31096 |
+
"grad_norm": 0.07299856841564178,
|
31097 |
+
"learning_rate": 0.000980940240604658,
|
31098 |
+
"loss": 1.5455,
|
31099 |
+
"step": 8844
|
31100 |
+
},
|
31101 |
+
{
|
31102 |
+
"epoch": 0.3933128807078387,
|
31103 |
+
"grad_norm": 0.07228664308786392,
|
31104 |
+
"learning_rate": 0.0009809305822228332,
|
31105 |
+
"loss": 1.5422,
|
31106 |
+
"step": 8846
|
31107 |
+
},
|
31108 |
+
{
|
31109 |
+
"epoch": 0.3934018051665111,
|
31110 |
+
"grad_norm": 0.07752872258424759,
|
31111 |
+
"learning_rate": 0.0009809209214420467,
|
31112 |
+
"loss": 1.5404,
|
31113 |
+
"step": 8848
|
31114 |
+
},
|
31115 |
+
{
|
31116 |
+
"epoch": 0.3934907296251834,
|
31117 |
+
"grad_norm": 0.07412002980709076,
|
31118 |
+
"learning_rate": 0.000980911258262347,
|
31119 |
+
"loss": 1.5446,
|
31120 |
+
"step": 8850
|
31121 |
+
},
|
31122 |
+
{
|
31123 |
+
"epoch": 0.39357965408385576,
|
31124 |
+
"grad_norm": 0.06998312473297119,
|
31125 |
+
"learning_rate": 0.0009809015926837818,
|
31126 |
+
"loss": 1.5454,
|
31127 |
+
"step": 8852
|
31128 |
+
},
|
31129 |
+
{
|
31130 |
+
"epoch": 0.3936685785425281,
|
31131 |
+
"grad_norm": 0.0724993646144867,
|
31132 |
+
"learning_rate": 0.0009808919247064,
|
31133 |
+
"loss": 1.5369,
|
31134 |
+
"step": 8854
|
31135 |
+
},
|
31136 |
+
{
|
31137 |
+
"epoch": 0.3937575030012005,
|
31138 |
+
"grad_norm": 0.0712260976433754,
|
31139 |
+
"learning_rate": 0.0009808822543302493,
|
31140 |
+
"loss": 1.544,
|
31141 |
+
"step": 8856
|
31142 |
+
},
|
31143 |
+
{
|
31144 |
+
"epoch": 0.39384642745987286,
|
31145 |
+
"grad_norm": 0.07351220399141312,
|
31146 |
+
"learning_rate": 0.0009808725815553781,
|
31147 |
+
"loss": 1.5481,
|
31148 |
+
"step": 8858
|
31149 |
+
},
|
31150 |
+
{
|
31151 |
+
"epoch": 0.3939353519185452,
|
31152 |
+
"grad_norm": 0.07386364787817001,
|
31153 |
+
"learning_rate": 0.0009808629063818347,
|
31154 |
+
"loss": 1.5355,
|
31155 |
+
"step": 8860
|
31156 |
+
},
|
31157 |
+
{
|
31158 |
+
"epoch": 0.39402427637721754,
|
31159 |
+
"grad_norm": 0.0700744241476059,
|
31160 |
+
"learning_rate": 0.000980853228809667,
|
31161 |
+
"loss": 1.5344,
|
31162 |
+
"step": 8862
|
31163 |
+
},
|
31164 |
+
{
|
31165 |
+
"epoch": 0.3941132008358899,
|
31166 |
+
"grad_norm": 0.06920643895864487,
|
31167 |
+
"learning_rate": 0.0009808435488389239,
|
31168 |
+
"loss": 1.5445,
|
31169 |
+
"step": 8864
|
31170 |
+
},
|
31171 |
+
{
|
31172 |
+
"epoch": 0.3942021252945623,
|
31173 |
+
"grad_norm": 0.07501189410686493,
|
31174 |
+
"learning_rate": 0.0009808338664696532,
|
31175 |
+
"loss": 1.545,
|
31176 |
+
"step": 8866
|
31177 |
+
},
|
31178 |
+
{
|
31179 |
+
"epoch": 0.39429104975323465,
|
31180 |
+
"grad_norm": 0.07265200465917587,
|
31181 |
+
"learning_rate": 0.0009808241817019035,
|
31182 |
+
"loss": 1.5478,
|
31183 |
+
"step": 8868
|
31184 |
+
},
|
31185 |
+
{
|
31186 |
+
"epoch": 0.39437997421190696,
|
31187 |
+
"grad_norm": 0.07012397050857544,
|
31188 |
+
"learning_rate": 0.000980814494535723,
|
31189 |
+
"loss": 1.5415,
|
31190 |
+
"step": 8870
|
31191 |
+
},
|
31192 |
+
{
|
31193 |
+
"epoch": 0.3944688986705793,
|
31194 |
+
"grad_norm": 0.07346207648515701,
|
31195 |
+
"learning_rate": 0.0009808048049711597,
|
31196 |
+
"loss": 1.5456,
|
31197 |
+
"step": 8872
|
31198 |
+
},
|
31199 |
+
{
|
31200 |
+
"epoch": 0.3945578231292517,
|
31201 |
+
"grad_norm": 0.06875230371952057,
|
31202 |
+
"learning_rate": 0.0009807951130082625,
|
31203 |
+
"loss": 1.5495,
|
31204 |
+
"step": 8874
|
31205 |
+
},
|
31206 |
+
{
|
31207 |
+
"epoch": 0.39464674758792406,
|
31208 |
+
"grad_norm": 0.07028649747371674,
|
31209 |
+
"learning_rate": 0.0009807854186470793,
|
31210 |
+
"loss": 1.5382,
|
31211 |
+
"step": 8876
|
31212 |
+
},
|
31213 |
+
{
|
31214 |
+
"epoch": 0.39473567204659643,
|
31215 |
+
"grad_norm": 0.07126179337501526,
|
31216 |
+
"learning_rate": 0.0009807757218876587,
|
31217 |
+
"loss": 1.5412,
|
31218 |
+
"step": 8878
|
31219 |
+
},
|
31220 |
+
{
|
31221 |
+
"epoch": 0.3948245965052688,
|
31222 |
+
"grad_norm": 0.07412000000476837,
|
31223 |
+
"learning_rate": 0.000980766022730049,
|
31224 |
+
"loss": 1.5441,
|
31225 |
+
"step": 8880
|
31226 |
+
},
|
31227 |
+
{
|
31228 |
+
"epoch": 0.3949135209639411,
|
31229 |
+
"grad_norm": 0.06931138783693314,
|
31230 |
+
"learning_rate": 0.0009807563211742987,
|
31231 |
+
"loss": 1.5408,
|
31232 |
+
"step": 8882
|
31233 |
+
},
|
31234 |
+
{
|
31235 |
+
"epoch": 0.3950024454226135,
|
31236 |
+
"grad_norm": 0.06960853189229965,
|
31237 |
+
"learning_rate": 0.0009807466172204557,
|
31238 |
+
"loss": 1.5467,
|
31239 |
+
"step": 8884
|
31240 |
+
},
|
31241 |
+
{
|
31242 |
+
"epoch": 0.39509136988128585,
|
31243 |
+
"grad_norm": 0.07029549777507782,
|
31244 |
+
"learning_rate": 0.000980736910868569,
|
31245 |
+
"loss": 1.5485,
|
31246 |
+
"step": 8886
|
31247 |
+
},
|
31248 |
+
{
|
31249 |
+
"epoch": 0.3951802943399582,
|
31250 |
+
"grad_norm": 0.070205919444561,
|
31251 |
+
"learning_rate": 0.0009807272021186868,
|
31252 |
+
"loss": 1.544,
|
31253 |
+
"step": 8888
|
31254 |
+
},
|
31255 |
+
{
|
31256 |
+
"epoch": 0.3952692187986306,
|
31257 |
+
"grad_norm": 0.07060658931732178,
|
31258 |
+
"learning_rate": 0.0009807174909708576,
|
31259 |
+
"loss": 1.542,
|
31260 |
+
"step": 8890
|
31261 |
+
},
|
31262 |
+
{
|
31263 |
+
"epoch": 0.3953581432573029,
|
31264 |
+
"grad_norm": 0.070548877120018,
|
31265 |
+
"learning_rate": 0.0009807077774251296,
|
31266 |
+
"loss": 1.5451,
|
31267 |
+
"step": 8892
|
31268 |
+
},
|
31269 |
+
{
|
31270 |
+
"epoch": 0.39544706771597526,
|
31271 |
+
"grad_norm": 0.06973063200712204,
|
31272 |
+
"learning_rate": 0.0009806980614815515,
|
31273 |
+
"loss": 1.5422,
|
31274 |
+
"step": 8894
|
31275 |
+
},
|
31276 |
+
{
|
31277 |
+
"epoch": 0.39553599217464763,
|
31278 |
+
"grad_norm": 0.07283268868923187,
|
31279 |
+
"learning_rate": 0.0009806883431401718,
|
31280 |
+
"loss": 1.5433,
|
31281 |
+
"step": 8896
|
31282 |
+
},
|
31283 |
+
{
|
31284 |
+
"epoch": 0.39562491663332,
|
31285 |
+
"grad_norm": 0.07797042280435562,
|
31286 |
+
"learning_rate": 0.0009806786224010387,
|
31287 |
+
"loss": 1.549,
|
31288 |
+
"step": 8898
|
31289 |
+
},
|
31290 |
+
{
|
31291 |
+
"epoch": 0.39571384109199237,
|
31292 |
+
"grad_norm": 0.07383356988430023,
|
31293 |
+
"learning_rate": 0.000980668899264201,
|
31294 |
+
"loss": 1.5419,
|
31295 |
+
"step": 8900
|
31296 |
+
},
|
31297 |
+
{
|
31298 |
+
"epoch": 0.39580276555066474,
|
31299 |
+
"grad_norm": 0.08115527033805847,
|
31300 |
+
"learning_rate": 0.0009806591737297069,
|
31301 |
+
"loss": 1.5418,
|
31302 |
+
"step": 8902
|
31303 |
+
},
|
31304 |
+
{
|
31305 |
+
"epoch": 0.39589169000933705,
|
31306 |
+
"grad_norm": 0.0753684863448143,
|
31307 |
+
"learning_rate": 0.0009806494457976049,
|
31308 |
+
"loss": 1.5422,
|
31309 |
+
"step": 8904
|
31310 |
+
},
|
31311 |
+
{
|
31312 |
+
"epoch": 0.3959806144680094,
|
31313 |
+
"grad_norm": 0.07579310983419418,
|
31314 |
+
"learning_rate": 0.0009806397154679437,
|
31315 |
+
"loss": 1.5431,
|
31316 |
+
"step": 8906
|
31317 |
+
},
|
31318 |
+
{
|
31319 |
+
"epoch": 0.3960695389266818,
|
31320 |
+
"grad_norm": 0.0755925178527832,
|
31321 |
+
"learning_rate": 0.0009806299827407722,
|
31322 |
+
"loss": 1.5424,
|
31323 |
+
"step": 8908
|
31324 |
+
},
|
31325 |
+
{
|
31326 |
+
"epoch": 0.39615846338535415,
|
31327 |
+
"grad_norm": 0.07460242509841919,
|
31328 |
+
"learning_rate": 0.0009806202476161383,
|
31329 |
+
"loss": 1.5464,
|
31330 |
+
"step": 8910
|
31331 |
+
},
|
31332 |
+
{
|
31333 |
+
"epoch": 0.3962473878440265,
|
31334 |
+
"grad_norm": 0.07479722797870636,
|
31335 |
+
"learning_rate": 0.000980610510094091,
|
31336 |
+
"loss": 1.5461,
|
31337 |
+
"step": 8912
|
31338 |
+
},
|
31339 |
+
{
|
31340 |
+
"epoch": 0.39633631230269883,
|
31341 |
+
"grad_norm": 0.0737161934375763,
|
31342 |
+
"learning_rate": 0.0009806007701746785,
|
31343 |
+
"loss": 1.5415,
|
31344 |
+
"step": 8914
|
31345 |
+
},
|
31346 |
+
{
|
31347 |
+
"epoch": 0.3964252367613712,
|
31348 |
+
"grad_norm": 0.07179709523916245,
|
31349 |
+
"learning_rate": 0.0009805910278579495,
|
31350 |
+
"loss": 1.5388,
|
31351 |
+
"step": 8916
|
31352 |
+
},
|
31353 |
+
{
|
31354 |
+
"epoch": 0.39651416122004357,
|
31355 |
+
"grad_norm": 0.07436472177505493,
|
31356 |
+
"learning_rate": 0.0009805812831439528,
|
31357 |
+
"loss": 1.5408,
|
31358 |
+
"step": 8918
|
31359 |
+
},
|
31360 |
+
{
|
31361 |
+
"epoch": 0.39660308567871594,
|
31362 |
+
"grad_norm": 0.07385652512311935,
|
31363 |
+
"learning_rate": 0.000980571536032737,
|
31364 |
+
"loss": 1.5375,
|
31365 |
+
"step": 8920
|
31366 |
+
},
|
31367 |
+
{
|
31368 |
+
"epoch": 0.3966920101373883,
|
31369 |
+
"grad_norm": 0.07151640951633453,
|
31370 |
+
"learning_rate": 0.0009805617865243504,
|
31371 |
+
"loss": 1.5407,
|
31372 |
+
"step": 8922
|
31373 |
+
},
|
31374 |
+
{
|
31375 |
+
"epoch": 0.3967809345960606,
|
31376 |
+
"grad_norm": 0.07092677056789398,
|
31377 |
+
"learning_rate": 0.000980552034618842,
|
31378 |
+
"loss": 1.5415,
|
31379 |
+
"step": 8924
|
31380 |
+
},
|
31381 |
+
{
|
31382 |
+
"epoch": 0.396869859054733,
|
31383 |
+
"grad_norm": 0.0716884657740593,
|
31384 |
+
"learning_rate": 0.0009805422803162603,
|
31385 |
+
"loss": 1.5341,
|
31386 |
+
"step": 8926
|
31387 |
+
},
|
31388 |
+
{
|
31389 |
+
"epoch": 0.39695878351340536,
|
31390 |
+
"grad_norm": 0.07016546279191971,
|
31391 |
+
"learning_rate": 0.0009805325236166538,
|
31392 |
+
"loss": 1.5385,
|
31393 |
+
"step": 8928
|
31394 |
+
},
|
31395 |
+
{
|
31396 |
+
"epoch": 0.3970477079720777,
|
31397 |
+
"grad_norm": 0.07406767457723618,
|
31398 |
+
"learning_rate": 0.0009805227645200713,
|
31399 |
+
"loss": 1.5411,
|
31400 |
+
"step": 8930
|
31401 |
+
},
|
31402 |
+
{
|
31403 |
+
"epoch": 0.3971366324307501,
|
31404 |
+
"grad_norm": 0.0695648342370987,
|
31405 |
+
"learning_rate": 0.0009805130030265616,
|
31406 |
+
"loss": 1.5376,
|
31407 |
+
"step": 8932
|
31408 |
+
},
|
31409 |
+
{
|
31410 |
+
"epoch": 0.39722555688942246,
|
31411 |
+
"grad_norm": 0.07370419800281525,
|
31412 |
+
"learning_rate": 0.0009805032391361733,
|
31413 |
+
"loss": 1.5464,
|
31414 |
+
"step": 8934
|
31415 |
+
},
|
31416 |
+
{
|
31417 |
+
"epoch": 0.3973144813480948,
|
31418 |
+
"grad_norm": 0.07572710514068604,
|
31419 |
+
"learning_rate": 0.0009804934728489551,
|
31420 |
+
"loss": 1.5397,
|
31421 |
+
"step": 8936
|
31422 |
+
},
|
31423 |
+
{
|
31424 |
+
"epoch": 0.39740340580676714,
|
31425 |
+
"grad_norm": 0.07438923418521881,
|
31426 |
+
"learning_rate": 0.0009804837041649556,
|
31427 |
+
"loss": 1.5395,
|
31428 |
+
"step": 8938
|
31429 |
+
},
|
31430 |
+
{
|
31431 |
+
"epoch": 0.3974923302654395,
|
31432 |
+
"grad_norm": 0.07321964204311371,
|
31433 |
+
"learning_rate": 0.0009804739330842235,
|
31434 |
+
"loss": 1.5405,
|
31435 |
+
"step": 8940
|
31436 |
+
},
|
31437 |
+
{
|
31438 |
+
"epoch": 0.3975812547241119,
|
31439 |
+
"grad_norm": 0.07280636578798294,
|
31440 |
+
"learning_rate": 0.000980464159606808,
|
31441 |
+
"loss": 1.5442,
|
31442 |
+
"step": 8942
|
31443 |
+
},
|
31444 |
+
{
|
31445 |
+
"epoch": 0.39767017918278424,
|
31446 |
+
"grad_norm": 0.07473626732826233,
|
31447 |
+
"learning_rate": 0.0009804543837327573,
|
31448 |
+
"loss": 1.5427,
|
31449 |
+
"step": 8944
|
31450 |
+
},
|
31451 |
+
{
|
31452 |
+
"epoch": 0.39775910364145656,
|
31453 |
+
"grad_norm": 0.06971806287765503,
|
31454 |
+
"learning_rate": 0.0009804446054621203,
|
31455 |
+
"loss": 1.5431,
|
31456 |
+
"step": 8946
|
31457 |
+
},
|
31458 |
+
{
|
31459 |
+
"epoch": 0.3978480281001289,
|
31460 |
+
"grad_norm": 0.07413081079721451,
|
31461 |
+
"learning_rate": 0.0009804348247949462,
|
31462 |
+
"loss": 1.5433,
|
31463 |
+
"step": 8948
|
31464 |
+
},
|
31465 |
+
{
|
31466 |
+
"epoch": 0.3979369525588013,
|
31467 |
+
"grad_norm": 0.07259391993284225,
|
31468 |
+
"learning_rate": 0.0009804250417312832,
|
31469 |
+
"loss": 1.5446,
|
31470 |
+
"step": 8950
|
31471 |
+
},
|
31472 |
+
{
|
31473 |
+
"epoch": 0.39802587701747366,
|
31474 |
+
"grad_norm": 0.0727652907371521,
|
31475 |
+
"learning_rate": 0.0009804152562711804,
|
31476 |
+
"loss": 1.5387,
|
31477 |
+
"step": 8952
|
31478 |
+
},
|
31479 |
+
{
|
31480 |
+
"epoch": 0.39811480147614603,
|
31481 |
+
"grad_norm": 0.07275709509849548,
|
31482 |
+
"learning_rate": 0.0009804054684146865,
|
31483 |
+
"loss": 1.5379,
|
31484 |
+
"step": 8954
|
31485 |
+
},
|
31486 |
+
{
|
31487 |
+
"epoch": 0.3982037259348184,
|
31488 |
+
"grad_norm": 0.07379709184169769,
|
31489 |
+
"learning_rate": 0.0009803956781618505,
|
31490 |
+
"loss": 1.5357,
|
31491 |
+
"step": 8956
|
31492 |
+
},
|
31493 |
+
{
|
31494 |
+
"epoch": 0.3982926503934907,
|
31495 |
+
"grad_norm": 0.06894747912883759,
|
31496 |
+
"learning_rate": 0.000980385885512721,
|
31497 |
+
"loss": 1.544,
|
31498 |
+
"step": 8958
|
31499 |
+
},
|
31500 |
+
{
|
31501 |
+
"epoch": 0.3983815748521631,
|
31502 |
+
"grad_norm": 0.06997144967317581,
|
31503 |
+
"learning_rate": 0.000980376090467347,
|
31504 |
+
"loss": 1.5365,
|
31505 |
+
"step": 8960
|
31506 |
+
},
|
31507 |
+
{
|
31508 |
+
"epoch": 0.39847049931083545,
|
31509 |
+
"grad_norm": 0.07101274281740189,
|
31510 |
+
"learning_rate": 0.0009803662930257773,
|
31511 |
+
"loss": 1.5355,
|
31512 |
+
"step": 8962
|
31513 |
+
},
|
31514 |
+
{
|
31515 |
+
"epoch": 0.3985594237695078,
|
31516 |
+
"grad_norm": 0.07142171263694763,
|
31517 |
+
"learning_rate": 0.0009803564931880606,
|
31518 |
+
"loss": 1.5441,
|
31519 |
+
"step": 8964
|
31520 |
+
},
|
31521 |
+
{
|
31522 |
+
"epoch": 0.3986483482281802,
|
31523 |
+
"grad_norm": 0.06851017475128174,
|
31524 |
+
"learning_rate": 0.0009803466909542463,
|
31525 |
+
"loss": 1.5381,
|
31526 |
+
"step": 8966
|
31527 |
+
},
|
31528 |
+
{
|
31529 |
+
"epoch": 0.3987372726868525,
|
31530 |
+
"grad_norm": 0.06920833140611649,
|
31531 |
+
"learning_rate": 0.0009803368863243829,
|
31532 |
+
"loss": 1.5397,
|
31533 |
+
"step": 8968
|
31534 |
+
},
|
31535 |
+
{
|
31536 |
+
"epoch": 0.39882619714552486,
|
31537 |
+
"grad_norm": 0.07191641628742218,
|
31538 |
+
"learning_rate": 0.0009803270792985194,
|
31539 |
+
"loss": 1.541,
|
31540 |
+
"step": 8970
|
31541 |
+
},
|
31542 |
+
{
|
31543 |
+
"epoch": 0.39891512160419723,
|
31544 |
+
"grad_norm": 0.07115970551967621,
|
31545 |
+
"learning_rate": 0.0009803172698767046,
|
31546 |
+
"loss": 1.5363,
|
31547 |
+
"step": 8972
|
31548 |
+
},
|
31549 |
+
{
|
31550 |
+
"epoch": 0.3990040460628696,
|
31551 |
+
"grad_norm": 0.06930098682641983,
|
31552 |
+
"learning_rate": 0.0009803074580589874,
|
31553 |
+
"loss": 1.5372,
|
31554 |
+
"step": 8974
|
31555 |
+
},
|
31556 |
+
{
|
31557 |
+
"epoch": 0.39909297052154197,
|
31558 |
+
"grad_norm": 0.06866835802793503,
|
31559 |
+
"learning_rate": 0.0009802976438454173,
|
31560 |
+
"loss": 1.5429,
|
31561 |
+
"step": 8976
|
31562 |
+
},
|
31563 |
+
{
|
31564 |
+
"epoch": 0.39918189498021434,
|
31565 |
+
"grad_norm": 0.06998659670352936,
|
31566 |
+
"learning_rate": 0.0009802878272360423,
|
31567 |
+
"loss": 1.537,
|
31568 |
+
"step": 8978
|
31569 |
+
},
|
31570 |
+
{
|
31571 |
+
"epoch": 0.39927081943888665,
|
31572 |
+
"grad_norm": 0.07126189023256302,
|
31573 |
+
"learning_rate": 0.0009802780082309123,
|
31574 |
+
"loss": 1.5386,
|
31575 |
+
"step": 8980
|
31576 |
+
},
|
31577 |
+
{
|
31578 |
+
"epoch": 0.399359743897559,
|
31579 |
+
"grad_norm": 0.0688776820898056,
|
31580 |
+
"learning_rate": 0.0009802681868300758,
|
31581 |
+
"loss": 1.5404,
|
31582 |
+
"step": 8982
|
31583 |
+
},
|
31584 |
+
{
|
31585 |
+
"epoch": 0.3994486683562314,
|
31586 |
+
"grad_norm": 0.07415543496608734,
|
31587 |
+
"learning_rate": 0.0009802583630335818,
|
31588 |
+
"loss": 1.5461,
|
31589 |
+
"step": 8984
|
31590 |
+
},
|
31591 |
+
{
|
31592 |
+
"epoch": 0.39953759281490375,
|
31593 |
+
"grad_norm": 0.06990732252597809,
|
31594 |
+
"learning_rate": 0.0009802485368414792,
|
31595 |
+
"loss": 1.54,
|
31596 |
+
"step": 8986
|
31597 |
+
},
|
31598 |
+
{
|
31599 |
+
"epoch": 0.3996265172735761,
|
31600 |
+
"grad_norm": 0.06985145807266235,
|
31601 |
+
"learning_rate": 0.0009802387082538174,
|
31602 |
+
"loss": 1.5392,
|
31603 |
+
"step": 8988
|
31604 |
+
},
|
31605 |
+
{
|
31606 |
+
"epoch": 0.39971544173224843,
|
31607 |
+
"grad_norm": 0.07134158909320831,
|
31608 |
+
"learning_rate": 0.000980228877270645,
|
31609 |
+
"loss": 1.5432,
|
31610 |
+
"step": 8990
|
31611 |
+
},
|
31612 |
+
{
|
31613 |
+
"epoch": 0.3998043661909208,
|
31614 |
+
"grad_norm": 0.07347475737333298,
|
31615 |
+
"learning_rate": 0.0009802190438920116,
|
31616 |
+
"loss": 1.5434,
|
31617 |
+
"step": 8992
|
31618 |
+
},
|
31619 |
+
{
|
31620 |
+
"epoch": 0.39989329064959317,
|
31621 |
+
"grad_norm": 0.07028983533382416,
|
31622 |
+
"learning_rate": 0.0009802092081179657,
|
31623 |
+
"loss": 1.5385,
|
31624 |
+
"step": 8994
|
31625 |
+
},
|
31626 |
+
{
|
31627 |
+
"epoch": 0.39998221510826554,
|
31628 |
+
"grad_norm": 0.07134208083152771,
|
31629 |
+
"learning_rate": 0.0009801993699485566,
|
31630 |
+
"loss": 1.5446,
|
31631 |
+
"step": 8996
|
31632 |
+
},
|
31633 |
+
{
|
31634 |
+
"epoch": 0.4000711395669379,
|
31635 |
+
"grad_norm": 0.07063104212284088,
|
31636 |
+
"learning_rate": 0.000980189529383833,
|
31637 |
+
"loss": 1.5452,
|
31638 |
+
"step": 8998
|
31639 |
+
},
|
31640 |
+
{
|
31641 |
+
"epoch": 0.4001600640256102,
|
31642 |
+
"grad_norm": 0.06996823847293854,
|
31643 |
+
"learning_rate": 0.0009801796864238447,
|
31644 |
+
"loss": 1.5371,
|
31645 |
+
"step": 9000
|
31646 |
+
},
|
31647 |
+
{
|
31648 |
+
"epoch": 0.4001600640256102,
|
31649 |
+
"eval_loss": 1.51895010471344,
|
31650 |
+
"eval_runtime": 12.3629,
|
31651 |
+
"eval_samples_per_second": 558.929,
|
31652 |
+
"eval_steps_per_second": 69.886,
|
31653 |
+
"step": 9000
|
31654 |
}
|
31655 |
],
|
31656 |
"logging_steps": 2,
|
|
|
31670 |
"attributes": {}
|
31671 |
}
|
31672 |
},
|
31673 |
+
"total_flos": 1.925758108827648e+19,
|
31674 |
"train_batch_size": 768,
|
31675 |
"trial_name": null,
|
31676 |
"trial_params": null
|