Training in progress, step 485, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47a6b59d01849a163702b8e50607c498745112dc6ced09ce72986bc17ec447c5
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c943a007ca820031ad467134581d9a1d6d566dbef8d77e1cbb5e854f9dbe6f0
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2c2f577326b2b04f054023f26d112b121ac8ff4bf7c93ae38255948cc3a2c22
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04f4b94f1a44c59895f464174a55ad4567f9ad6cb9a5184c25fed320934f9e0d
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3383,6 +3383,41 @@
|
|
3383 |
"learning_rate": 9.688991567458933e-05,
|
3384 |
"loss": 0.7721,
|
3385 |
"step": 480
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3386 |
}
|
3387 |
],
|
3388 |
"logging_steps": 1,
|
@@ -3402,7 +3437,7 @@
|
|
3402 |
"attributes": {}
|
3403 |
}
|
3404 |
},
|
3405 |
-
"total_flos": 5.
|
3406 |
"train_batch_size": 4,
|
3407 |
"trial_name": null,
|
3408 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.6276285991588483,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 485,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3383 |
"learning_rate": 9.688991567458933e-05,
|
3384 |
"loss": 0.7721,
|
3385 |
"step": 480
|
3386 |
+
},
|
3387 |
+
{
|
3388 |
+
"epoch": 0.6224522808152702,
|
3389 |
+
"grad_norm": 0.7499825954437256,
|
3390 |
+
"learning_rate": 9.687558608994232e-05,
|
3391 |
+
"loss": 0.8728,
|
3392 |
+
"step": 481
|
3393 |
+
},
|
3394 |
+
{
|
3395 |
+
"epoch": 0.6237463604011647,
|
3396 |
+
"grad_norm": 0.871703028678894,
|
3397 |
+
"learning_rate": 9.686122463423732e-05,
|
3398 |
+
"loss": 0.88,
|
3399 |
+
"step": 482
|
3400 |
+
},
|
3401 |
+
{
|
3402 |
+
"epoch": 0.6250404399870592,
|
3403 |
+
"grad_norm": 0.8204110860824585,
|
3404 |
+
"learning_rate": 9.684683131723884e-05,
|
3405 |
+
"loss": 0.8569,
|
3406 |
+
"step": 483
|
3407 |
+
},
|
3408 |
+
{
|
3409 |
+
"epoch": 0.6263345195729537,
|
3410 |
+
"grad_norm": 0.8779101371765137,
|
3411 |
+
"learning_rate": 9.683240614873294e-05,
|
3412 |
+
"loss": 0.901,
|
3413 |
+
"step": 484
|
3414 |
+
},
|
3415 |
+
{
|
3416 |
+
"epoch": 0.6276285991588483,
|
3417 |
+
"grad_norm": 0.8231368064880371,
|
3418 |
+
"learning_rate": 9.681794913852746e-05,
|
3419 |
+
"loss": 0.989,
|
3420 |
+
"step": 485
|
3421 |
}
|
3422 |
],
|
3423 |
"logging_steps": 1,
|
|
|
3437 |
"attributes": {}
|
3438 |
}
|
3439 |
},
|
3440 |
+
"total_flos": 5.421995282713805e+17,
|
3441 |
"train_batch_size": 4,
|
3442 |
"trial_name": null,
|
3443 |
"trial_params": null
|