Training in progress, step 585, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86e81dd5a890a7956466a556372acdea40e7874dd36d48313477dc752b4cb1d9
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:613659f29181eb8b432f0d2e9dfcae177dee2e33e9738d77d19452cd77b18bf8
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d53b54b216d5077c52ca37e8c9f0b1685f0a5a9e8331d755086a0f111dd328a
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7c9085847647e6cd13695f1ec4ce9d144b2ab365d6811882265bef6d20ea83e
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4083,6 +4083,41 @@
|
|
4083 |
"learning_rate": 9.530085881215705e-05,
|
4084 |
"loss": 0.8092,
|
4085 |
"step": 580
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4086 |
}
|
4087 |
],
|
4088 |
"logging_steps": 1,
|
@@ -4102,7 +4137,7 @@
|
|
4102 |
"attributes": {}
|
4103 |
}
|
4104 |
},
|
4105 |
-
"total_flos": 6.
|
4106 |
"train_batch_size": 4,
|
4107 |
"trial_name": null,
|
4108 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7570365577483015,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 585,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4083 |
"learning_rate": 9.530085881215705e-05,
|
4084 |
"loss": 0.8092,
|
4085 |
"step": 580
|
4086 |
+
},
|
4087 |
+
{
|
4088 |
+
"epoch": 0.7518602394047234,
|
4089 |
+
"grad_norm": 0.8892885446548462,
|
4090 |
+
"learning_rate": 9.528339393146033e-05,
|
4091 |
+
"loss": 0.8422,
|
4092 |
+
"step": 581
|
4093 |
+
},
|
4094 |
+
{
|
4095 |
+
"epoch": 0.7531543189906179,
|
4096 |
+
"grad_norm": 0.7766391634941101,
|
4097 |
+
"learning_rate": 9.526589826224887e-05,
|
4098 |
+
"loss": 0.9596,
|
4099 |
+
"step": 582
|
4100 |
+
},
|
4101 |
+
{
|
4102 |
+
"epoch": 0.7544483985765125,
|
4103 |
+
"grad_norm": 0.8290911316871643,
|
4104 |
+
"learning_rate": 9.524837181641813e-05,
|
4105 |
+
"loss": 0.9624,
|
4106 |
+
"step": 583
|
4107 |
+
},
|
4108 |
+
{
|
4109 |
+
"epoch": 0.755742478162407,
|
4110 |
+
"grad_norm": 0.7832044363021851,
|
4111 |
+
"learning_rate": 9.523081460588444e-05,
|
4112 |
+
"loss": 0.8141,
|
4113 |
+
"step": 584
|
4114 |
+
},
|
4115 |
+
{
|
4116 |
+
"epoch": 0.7570365577483015,
|
4117 |
+
"grad_norm": 0.9279314875602722,
|
4118 |
+
"learning_rate": 9.521322664258508e-05,
|
4119 |
+
"loss": 0.8869,
|
4120 |
+
"step": 585
|
4121 |
}
|
4122 |
],
|
4123 |
"logging_steps": 1,
|
|
|
4137 |
"attributes": {}
|
4138 |
}
|
4139 |
},
|
4140 |
+
"total_flos": 6.539932454407373e+17,
|
4141 |
"train_batch_size": 4,
|
4142 |
"trial_name": null,
|
4143 |
"trial_params": null
|