Training in progress, step 455, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b043cd7a58160e6996c90fab1beb1517bb2613e4b0999b16dca09195ed9daad8
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa5d29c4411fdeb35a1244b024f4c5c0c57180230285a448b1e43fc79ce803fc
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:450d4f0e3a0bdbd8c489459f618a362a3c0456d0f149f06ab048c544e79c9d17
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ef7e9caf74d940d149631b7d434e8f3ed3d9f20dbfe42784c1a25924da5d43b
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3173,6 +3173,41 @@
|
|
3173 |
"learning_rate": 9.730493562858953e-05,
|
3174 |
"loss": 0.8234,
|
3175 |
"step": 450
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3176 |
}
|
3177 |
],
|
3178 |
"logging_steps": 1,
|
@@ -3192,7 +3227,7 @@
|
|
3192 |
"attributes": {}
|
3193 |
}
|
3194 |
},
|
3195 |
-
"total_flos": 5.
|
3196 |
"train_batch_size": 4,
|
3197 |
"trial_name": null,
|
3198 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.5888062115820123,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 455,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3173 |
"learning_rate": 9.730493562858953e-05,
|
3174 |
"loss": 0.8234,
|
3175 |
"step": 450
|
3176 |
+
},
|
3177 |
+
{
|
3178 |
+
"epoch": 0.5836298932384342,
|
3179 |
+
"grad_norm": 0.8427807688713074,
|
3180 |
+
"learning_rate": 9.729156660844017e-05,
|
3181 |
+
"loss": 1.1023,
|
3182 |
+
"step": 451
|
3183 |
+
},
|
3184 |
+
{
|
3185 |
+
"epoch": 0.5849239728243287,
|
3186 |
+
"grad_norm": 0.8546382188796997,
|
3187 |
+
"learning_rate": 9.727816543440458e-05,
|
3188 |
+
"loss": 0.886,
|
3189 |
+
"step": 452
|
3190 |
+
},
|
3191 |
+
{
|
3192 |
+
"epoch": 0.5862180524102232,
|
3193 |
+
"grad_norm": 0.6701838970184326,
|
3194 |
+
"learning_rate": 9.726473211559437e-05,
|
3195 |
+
"loss": 0.7861,
|
3196 |
+
"step": 453
|
3197 |
+
},
|
3198 |
+
{
|
3199 |
+
"epoch": 0.5875121319961177,
|
3200 |
+
"grad_norm": 1.010311245918274,
|
3201 |
+
"learning_rate": 9.725126666114292e-05,
|
3202 |
+
"loss": 1.0393,
|
3203 |
+
"step": 454
|
3204 |
+
},
|
3205 |
+
{
|
3206 |
+
"epoch": 0.5888062115820123,
|
3207 |
+
"grad_norm": 0.8440571427345276,
|
3208 |
+
"learning_rate": 9.72377690802055e-05,
|
3209 |
+
"loss": 0.9959,
|
3210 |
+
"step": 455
|
3211 |
}
|
3212 |
],
|
3213 |
"logging_steps": 1,
|
|
|
3227 |
"attributes": {}
|
3228 |
}
|
3229 |
},
|
3230 |
+
"total_flos": 5.0866141312057344e+17,
|
3231 |
"train_batch_size": 4,
|
3232 |
"trial_name": null,
|
3233 |
"trial_params": null
|