Training in progress, step 725, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59d4b20c39e023774f653c79696e7bf1a0895c40499204e0da55c8972f138d66
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e425df4cff4f5a833435a908df13e0e68fb91e068e65be0350970182c98532fe
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1cf257bb73c12dfb0ceb40ef8d7dab03fb8c678e0b5befc5cb019048e159abe6
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93cb95f675bf4560789b437cb4b6409914c75b59d4784dc1ea4e7bea55c15059
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -5063,6 +5063,41 @@
|
|
5063 |
"learning_rate": 9.256184469043851e-05,
|
5064 |
"loss": 0.7484,
|
5065 |
"step": 720
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5066 |
}
|
5067 |
],
|
5068 |
"logging_steps": 1,
|
@@ -5082,7 +5117,7 @@
|
|
5082 |
"attributes": {}
|
5083 |
}
|
5084 |
},
|
5085 |
-
"total_flos": 8.
|
5086 |
"train_batch_size": 4,
|
5087 |
"trial_name": null,
|
5088 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.938207699773536,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 725,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
5063 |
"learning_rate": 9.256184469043851e-05,
|
5064 |
"loss": 0.7484,
|
5065 |
"step": 720
|
5066 |
+
},
|
5067 |
+
{
|
5068 |
+
"epoch": 0.9330313814299579,
|
5069 |
+
"grad_norm": 0.7698187232017517,
|
5070 |
+
"learning_rate": 9.254019439590835e-05,
|
5071 |
+
"loss": 0.7563,
|
5072 |
+
"step": 721
|
5073 |
+
},
|
5074 |
+
{
|
5075 |
+
"epoch": 0.9343254610158525,
|
5076 |
+
"grad_norm": 0.8331950306892395,
|
5077 |
+
"learning_rate": 9.251851517798514e-05,
|
5078 |
+
"loss": 0.8826,
|
5079 |
+
"step": 722
|
5080 |
+
},
|
5081 |
+
{
|
5082 |
+
"epoch": 0.935619540601747,
|
5083 |
+
"grad_norm": 0.7643804550170898,
|
5084 |
+
"learning_rate": 9.24968070514087e-05,
|
5085 |
+
"loss": 1.0688,
|
5086 |
+
"step": 723
|
5087 |
+
},
|
5088 |
+
{
|
5089 |
+
"epoch": 0.9369136201876416,
|
5090 |
+
"grad_norm": 0.802943229675293,
|
5091 |
+
"learning_rate": 9.247507003093858e-05,
|
5092 |
+
"loss": 0.8872,
|
5093 |
+
"step": 724
|
5094 |
+
},
|
5095 |
+
{
|
5096 |
+
"epoch": 0.938207699773536,
|
5097 |
+
"grad_norm": 0.8977981209754944,
|
5098 |
+
"learning_rate": 9.245330413135395e-05,
|
5099 |
+
"loss": 0.8946,
|
5100 |
+
"step": 725
|
5101 |
}
|
5102 |
],
|
5103 |
"logging_steps": 1,
|
|
|
5117 |
"attributes": {}
|
5118 |
}
|
5119 |
},
|
5120 |
+
"total_flos": 8.105044494778368e+17,
|
5121 |
"train_batch_size": 4,
|
5122 |
"trial_name": null,
|
5123 |
"trial_params": null
|