Training in progress, step 385, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ad5bb6a07b5358dccb40a92e32494631d5f1c697661e9c9628e3e1b798cad32
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4f1b6dde27d8af2c96a8d1851af11d6c5644f5a3c144dfa41d9597eeffc1f83
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcc07ce19b7fd4f90004a8e11f50490415cba07804798230ae98a4519365742a
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51fbcb00d7a67643a5230f4ba48fcd85e7a8215e6d43c6acb6e42d34667255bc
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2675,6 +2675,41 @@
|
|
2675 |
"learning_rate": 9.816034510373286e-05,
|
2676 |
"loss": 0.9889,
|
2677 |
"step": 380
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2678 |
}
|
2679 |
],
|
2680 |
"logging_steps": 1,
|
@@ -2694,7 +2729,7 @@
|
|
2694 |
"attributes": {}
|
2695 |
}
|
2696 |
},
|
2697 |
-
"total_flos": 4.
|
2698 |
"train_batch_size": 4,
|
2699 |
"trial_name": null,
|
2700 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.498220640569395,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 385,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2675 |
"learning_rate": 9.816034510373286e-05,
|
2676 |
"loss": 0.9889,
|
2677 |
"step": 380
|
2678 |
+
},
|
2679 |
+
{
|
2680 |
+
"epoch": 0.4930443222258169,
|
2681 |
+
"grad_norm": 0.829309344291687,
|
2682 |
+
"learning_rate": 9.814924818785865e-05,
|
2683 |
+
"loss": 0.8139,
|
2684 |
+
"step": 381
|
2685 |
+
},
|
2686 |
+
{
|
2687 |
+
"epoch": 0.4943384018117114,
|
2688 |
+
"grad_norm": 0.83943110704422,
|
2689 |
+
"learning_rate": 9.81381185349542e-05,
|
2690 |
+
"loss": 0.9202,
|
2691 |
+
"step": 382
|
2692 |
+
},
|
2693 |
+
{
|
2694 |
+
"epoch": 0.49563248139760596,
|
2695 |
+
"grad_norm": 0.7981933355331421,
|
2696 |
+
"learning_rate": 9.812695615258662e-05,
|
2697 |
+
"loss": 0.9131,
|
2698 |
+
"step": 383
|
2699 |
+
},
|
2700 |
+
{
|
2701 |
+
"epoch": 0.49692656098350046,
|
2702 |
+
"grad_norm": 0.7930905818939209,
|
2703 |
+
"learning_rate": 9.81157610483453e-05,
|
2704 |
+
"loss": 0.769,
|
2705 |
+
"step": 384
|
2706 |
+
},
|
2707 |
+
{
|
2708 |
+
"epoch": 0.498220640569395,
|
2709 |
+
"grad_norm": 0.8699679970741272,
|
2710 |
+
"learning_rate": 9.81045332298419e-05,
|
2711 |
+
"loss": 0.9468,
|
2712 |
+
"step": 385
|
2713 |
}
|
2714 |
],
|
2715 |
"logging_steps": 1,
|
|
|
2729 |
"attributes": {}
|
2730 |
}
|
2731 |
},
|
2732 |
+
"total_flos": 4.304058111020237e+17,
|
2733 |
"train_batch_size": 4,
|
2734 |
"trial_name": null,
|
2735 |
"trial_params": null
|