End of training
Browse files
all_results.json
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"eval_loss": 1.
|
4 |
-
"eval_runtime":
|
5 |
-
"eval_samples_per_second": 1.
|
6 |
-
"eval_steps_per_second": 0.
|
7 |
-
"eval_wer": 57.
|
8 |
-
"train_loss": 0.
|
9 |
-
"train_runtime":
|
10 |
-
"train_samples_per_second":
|
11 |
-
"train_steps_per_second": 0.
|
12 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 18.52,
|
3 |
+
"eval_loss": 1.1797882318496704,
|
4 |
+
"eval_runtime": 450.6771,
|
5 |
+
"eval_samples_per_second": 1.136,
|
6 |
+
"eval_steps_per_second": 0.142,
|
7 |
+
"eval_wer": 57.87681598062954,
|
8 |
+
"train_loss": 0.0056979965269565586,
|
9 |
+
"train_runtime": 579.6036,
|
10 |
+
"train_samples_per_second": 13.803,
|
11 |
+
"train_steps_per_second": 0.863
|
12 |
}
|
eval_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"eval_loss": 1.
|
4 |
-
"eval_runtime":
|
5 |
-
"eval_samples_per_second": 1.
|
6 |
-
"eval_steps_per_second": 0.
|
7 |
-
"eval_wer": 57.
|
8 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 18.52,
|
3 |
+
"eval_loss": 1.1797882318496704,
|
4 |
+
"eval_runtime": 450.6771,
|
5 |
+
"eval_samples_per_second": 1.136,
|
6 |
+
"eval_steps_per_second": 0.142,
|
7 |
+
"eval_wer": 57.87681598062954
|
8 |
}
|
runs/Dec16_17-39-22_129-146-104-29/events.out.tfevents.1671213470.129-146-104-29.136366.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0e41f9f380a847cd5940de28e629b05b10d7854b06fc01af4b0b868a9f4c782
|
3 |
+
size 358
|
train_results.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples_per_second":
|
6 |
-
"train_steps_per_second": 0.
|
7 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 18.52,
|
3 |
+
"train_loss": 0.0056979965269565586,
|
4 |
+
"train_runtime": 579.6036,
|
5 |
+
"train_samples_per_second": 13.803,
|
6 |
+
"train_steps_per_second": 0.863
|
7 |
}
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric": 57.
|
3 |
-
"best_model_checkpoint": "./checkpoint-
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -176,18 +176,87 @@
|
|
176 |
"step": 400
|
177 |
},
|
178 |
{
|
179 |
-
"epoch":
|
180 |
-
"
|
181 |
-
"
|
182 |
-
"
|
183 |
-
|
184 |
-
|
185 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
}
|
187 |
],
|
188 |
-
"max_steps":
|
189 |
-
"num_train_epochs":
|
190 |
-
"total_flos":
|
191 |
"trial_name": null,
|
192 |
"trial_params": null
|
193 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 57.87681598062954,
|
3 |
+
"best_model_checkpoint": "./checkpoint-500",
|
4 |
+
"epoch": 18.51851851851852,
|
5 |
+
"global_step": 500,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
176 |
"step": 400
|
177 |
},
|
178 |
{
|
179 |
+
"epoch": 15.19,
|
180 |
+
"learning_rate": 8.120000000000002e-06,
|
181 |
+
"loss": 0.0465,
|
182 |
+
"step": 410
|
183 |
+
},
|
184 |
+
{
|
185 |
+
"epoch": 15.56,
|
186 |
+
"learning_rate": 8.32e-06,
|
187 |
+
"loss": 0.0392,
|
188 |
+
"step": 420
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"epoch": 15.93,
|
192 |
+
"learning_rate": 8.52e-06,
|
193 |
+
"loss": 0.0354,
|
194 |
+
"step": 430
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"epoch": 16.3,
|
198 |
+
"learning_rate": 8.720000000000001e-06,
|
199 |
+
"loss": 0.0296,
|
200 |
+
"step": 440
|
201 |
+
},
|
202 |
+
{
|
203 |
+
"epoch": 16.67,
|
204 |
+
"learning_rate": 8.920000000000001e-06,
|
205 |
+
"loss": 0.0255,
|
206 |
+
"step": 450
|
207 |
+
},
|
208 |
+
{
|
209 |
+
"epoch": 17.04,
|
210 |
+
"learning_rate": 9.12e-06,
|
211 |
+
"loss": 0.0274,
|
212 |
+
"step": 460
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 17.41,
|
216 |
+
"learning_rate": 9.32e-06,
|
217 |
+
"loss": 0.0216,
|
218 |
+
"step": 470
|
219 |
+
},
|
220 |
+
{
|
221 |
+
"epoch": 17.78,
|
222 |
+
"learning_rate": 9.52e-06,
|
223 |
+
"loss": 0.022,
|
224 |
+
"step": 480
|
225 |
+
},
|
226 |
+
{
|
227 |
+
"epoch": 18.15,
|
228 |
+
"learning_rate": 9.72e-06,
|
229 |
+
"loss": 0.0219,
|
230 |
+
"step": 490
|
231 |
+
},
|
232 |
+
{
|
233 |
+
"epoch": 18.52,
|
234 |
+
"learning_rate": 9.920000000000002e-06,
|
235 |
+
"loss": 0.0159,
|
236 |
+
"step": 500
|
237 |
+
},
|
238 |
+
{
|
239 |
+
"epoch": 18.52,
|
240 |
+
"eval_loss": 1.1797882318496704,
|
241 |
+
"eval_runtime": 450.9533,
|
242 |
+
"eval_samples_per_second": 1.135,
|
243 |
+
"eval_steps_per_second": 0.142,
|
244 |
+
"eval_wer": 57.87681598062954,
|
245 |
+
"step": 500
|
246 |
+
},
|
247 |
+
{
|
248 |
+
"epoch": 18.52,
|
249 |
+
"step": 500,
|
250 |
+
"total_flos": 2.30348866535424e+18,
|
251 |
+
"train_loss": 0.0056979965269565586,
|
252 |
+
"train_runtime": 579.6036,
|
253 |
+
"train_samples_per_second": 13.803,
|
254 |
+
"train_steps_per_second": 0.863
|
255 |
}
|
256 |
],
|
257 |
+
"max_steps": 500,
|
258 |
+
"num_train_epochs": 19,
|
259 |
+
"total_flos": 2.30348866535424e+18,
|
260 |
"trial_name": null,
|
261 |
"trial_params": null
|
262 |
}
|