Training in progress, step 640, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8721ee43349bfc7e39016efefeb84f0798023653ef25fbfec79493feb5cad8e
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0b13d732b2a8dad952ee020e87f52fea1e5a59d34717eed4f8c6474e09d2124
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3037bb99ad816203fc49047110ff2f6ec00b478885f7281330c502f80f4d07f6
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc7648e34cd3e7e1b0e3d71e0e1fe805f3949228899d535acad963d0ca07c4a6
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4468,6 +4468,41 @@
|
|
4468 |
"learning_rate": 9.429488861426137e-05,
|
4469 |
"loss": 0.8799,
|
4470 |
"step": 635
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4471 |
}
|
4472 |
],
|
4473 |
"logging_steps": 1,
|
@@ -4487,7 +4522,7 @@
|
|
4487 |
"attributes": {}
|
4488 |
}
|
4489 |
},
|
4490 |
-
"total_flos": 7.
|
4491 |
"train_batch_size": 4,
|
4492 |
"trial_name": null,
|
4493 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8282109349725008,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 640,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4468 |
"learning_rate": 9.429488861426137e-05,
|
4469 |
"loss": 0.8799,
|
4470 |
"step": 635
|
4471 |
+
},
|
4472 |
+
{
|
4473 |
+
"epoch": 0.8230346166289226,
|
4474 |
+
"grad_norm": 0.7446788549423218,
|
4475 |
+
"learning_rate": 9.427574857613672e-05,
|
4476 |
+
"loss": 0.8071,
|
4477 |
+
"step": 636
|
4478 |
+
},
|
4479 |
+
{
|
4480 |
+
"epoch": 0.8243286962148172,
|
4481 |
+
"grad_norm": 0.8236122131347656,
|
4482 |
+
"learning_rate": 9.425657843460288e-05,
|
4483 |
+
"loss": 0.9889,
|
4484 |
+
"step": 637
|
4485 |
+
},
|
4486 |
+
{
|
4487 |
+
"epoch": 0.8256227758007118,
|
4488 |
+
"grad_norm": 0.8189204335212708,
|
4489 |
+
"learning_rate": 9.423737820269376e-05,
|
4490 |
+
"loss": 0.9607,
|
4491 |
+
"step": 638
|
4492 |
+
},
|
4493 |
+
{
|
4494 |
+
"epoch": 0.8269168553866063,
|
4495 |
+
"grad_norm": 0.9449727535247803,
|
4496 |
+
"learning_rate": 9.421814789346375e-05,
|
4497 |
+
"loss": 0.9581,
|
4498 |
+
"step": 639
|
4499 |
+
},
|
4500 |
+
{
|
4501 |
+
"epoch": 0.8282109349725008,
|
4502 |
+
"grad_norm": 0.7527281045913696,
|
4503 |
+
"learning_rate": 9.419888751998767e-05,
|
4504 |
+
"loss": 0.7984,
|
4505 |
+
"step": 640
|
4506 |
}
|
4507 |
],
|
4508 |
"logging_steps": 1,
|
|
|
4522 |
"attributes": {}
|
4523 |
}
|
4524 |
},
|
4525 |
+
"total_flos": 7.154797898838835e+17,
|
4526 |
"train_batch_size": 4,
|
4527 |
"trial_name": null,
|
4528 |
"trial_params": null
|