Training in progress, step 780, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:916f37872f2b282127854c113b88d7933e3c6c22deb3493dee6f2bb869c8dc81
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a45e4e480aaa7c81fec8584a873ce0efc9ecb9ce8e1ec028a2ec8faf12c27ec1
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8566e721ef91b4bb0b82cb6023f5dd2184670c621b1104f02d62bf820375f9df
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a5479532bafb0d8dbf7d1cd4c9665f76912e36e4b505953c015533d8cac1f5d
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -5456,6 +5456,41 @@
|
|
5456 |
"learning_rate": 9.132852062103844e-05,
|
5457 |
"loss": 0.868,
|
5458 |
"step": 775
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5459 |
}
|
5460 |
],
|
5461 |
"logging_steps": 1,
|
@@ -5475,7 +5510,7 @@
|
|
5475 |
"attributes": {}
|
5476 |
}
|
5477 |
},
|
5478 |
-
"total_flos": 8.
|
5479 |
"train_batch_size": 4,
|
5480 |
"trial_name": null,
|
5481 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0093820769977353,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 780,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
5456 |
"learning_rate": 9.132852062103844e-05,
|
5457 |
"loss": 0.868,
|
5458 |
"step": 775
|
5459 |
+
},
|
5460 |
+
{
|
5461 |
+
"epoch": 1.0042057586541573,
|
5462 |
+
"grad_norm": 0.6421488523483276,
|
5463 |
+
"learning_rate": 9.130530194076858e-05,
|
5464 |
+
"loss": 0.6891,
|
5465 |
+
"step": 776
|
5466 |
+
},
|
5467 |
+
{
|
5468 |
+
"epoch": 1.0054998382400517,
|
5469 |
+
"grad_norm": 0.7791386246681213,
|
5470 |
+
"learning_rate": 9.12820551767182e-05,
|
5471 |
+
"loss": 0.8292,
|
5472 |
+
"step": 777
|
5473 |
+
},
|
5474 |
+
{
|
5475 |
+
"epoch": 1.0067939178259464,
|
5476 |
+
"grad_norm": 0.7175559401512146,
|
5477 |
+
"learning_rate": 9.125878034469289e-05,
|
5478 |
+
"loss": 0.7888,
|
5479 |
+
"step": 778
|
5480 |
+
},
|
5481 |
+
{
|
5482 |
+
"epoch": 1.0080879974118409,
|
5483 |
+
"grad_norm": 0.75743567943573,
|
5484 |
+
"learning_rate": 9.123547746051743e-05,
|
5485 |
+
"loss": 0.675,
|
5486 |
+
"step": 779
|
5487 |
+
},
|
5488 |
+
{
|
5489 |
+
"epoch": 1.0093820769977353,
|
5490 |
+
"grad_norm": 0.7583343386650085,
|
5491 |
+
"learning_rate": 9.12121465400356e-05,
|
5492 |
+
"loss": 0.7507,
|
5493 |
+
"step": 780
|
5494 |
}
|
5495 |
],
|
5496 |
"logging_steps": 1,
|
|
|
5510 |
"attributes": {}
|
5511 |
}
|
5512 |
},
|
5513 |
+
"total_flos": 8.719211228477522e+17,
|
5514 |
"train_batch_size": 4,
|
5515 |
"trial_name": null,
|
5516 |
"trial_params": null
|