|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.076167076167076, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0, |
|
"loss": 1.8118, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.6585365853658536e-07, |
|
"loss": 1.8122, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.53658536585366e-07, |
|
"loss": 1.8174, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.3414634146341465e-06, |
|
"loss": 1.7616, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.8292682926829268e-06, |
|
"loss": 1.6875, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.317073170731708e-06, |
|
"loss": 1.5201, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.8048780487804884e-06, |
|
"loss": 1.3982, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.292682926829269e-06, |
|
"loss": 1.3541, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.780487804878049e-06, |
|
"loss": 1.2092, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.268292682926829e-06, |
|
"loss": 1.1599, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.142654299736023, |
|
"eval_runtime": 101.9854, |
|
"eval_samples_per_second": 3.863, |
|
"eval_steps_per_second": 0.127, |
|
"eval_wer": 15.213946117274169, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.75609756097561e-06, |
|
"loss": 1.0124, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5.243902439024391e-06, |
|
"loss": 0.9171, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5.731707317073171e-06, |
|
"loss": 0.8027, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 6.219512195121951e-06, |
|
"loss": 0.7284, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 6.707317073170733e-06, |
|
"loss": 0.6185, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 7.1951219512195125e-06, |
|
"loss": 0.57, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 7.682926829268293e-06, |
|
"loss": 0.4985, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 8.170731707317073e-06, |
|
"loss": 0.488, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.658536585365854e-06, |
|
"loss": 0.4569, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.146341463414635e-06, |
|
"loss": 0.4655, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 0.5613037943840027, |
|
"eval_runtime": 91.9697, |
|
"eval_samples_per_second": 4.284, |
|
"eval_steps_per_second": 0.141, |
|
"eval_wer": 17.591125198098258, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.634146341463415e-06, |
|
"loss": 0.425, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.96923076923077e-06, |
|
"loss": 0.4162, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.846153846153848e-06, |
|
"loss": 0.3809, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.723076923076924e-06, |
|
"loss": 0.3533, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.3511, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.476923076923079e-06, |
|
"loss": 0.3475, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.353846153846155e-06, |
|
"loss": 0.321, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.230769230769232e-06, |
|
"loss": 0.2859, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.107692307692308e-06, |
|
"loss": 0.3191, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 8.984615384615386e-06, |
|
"loss": 0.2753, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 0.5241264700889587, |
|
"eval_runtime": 88.0526, |
|
"eval_samples_per_second": 4.475, |
|
"eval_steps_per_second": 0.148, |
|
"eval_wer": 17.21321467755699, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.861538461538463e-06, |
|
"loss": 0.3104, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 8.73846153846154e-06, |
|
"loss": 0.2734, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 8.615384615384617e-06, |
|
"loss": 0.2608, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 8.492307692307693e-06, |
|
"loss": 0.2509, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 8.36923076923077e-06, |
|
"loss": 0.2548, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 8.246153846153848e-06, |
|
"loss": 0.2469, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 8.123076923076924e-06, |
|
"loss": 0.2231, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.2138, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 7.876923076923077e-06, |
|
"loss": 0.2349, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 7.753846153846155e-06, |
|
"loss": 0.2077, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"eval_loss": 0.5241798758506775, |
|
"eval_runtime": 88.5317, |
|
"eval_samples_per_second": 4.45, |
|
"eval_steps_per_second": 0.147, |
|
"eval_wer": 17.26197732536877, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 7.630769230769232e-06, |
|
"loss": 0.2322, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 7.507692307692308e-06, |
|
"loss": 0.2036, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 7.384615384615386e-06, |
|
"loss": 0.2058, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 7.261538461538462e-06, |
|
"loss": 0.1797, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 7.1384615384615385e-06, |
|
"loss": 0.186, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 7.015384615384616e-06, |
|
"loss": 0.2035, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 6.892307692307693e-06, |
|
"loss": 0.1794, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 6.76923076923077e-06, |
|
"loss": 0.1589, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 6.646153846153846e-06, |
|
"loss": 0.1879, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 6.523076923076923e-06, |
|
"loss": 0.1636, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"eval_loss": 0.5289868712425232, |
|
"eval_runtime": 95.5188, |
|
"eval_samples_per_second": 4.125, |
|
"eval_steps_per_second": 0.136, |
|
"eval_wer": 17.66426916981592, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.1767, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 6.276923076923077e-06, |
|
"loss": 0.1657, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 6.153846153846155e-06, |
|
"loss": 0.1607, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 6.030769230769231e-06, |
|
"loss": 0.1458, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 5.907692307692308e-06, |
|
"loss": 0.1541, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 5.784615384615385e-06, |
|
"loss": 0.1494, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 5.661538461538462e-06, |
|
"loss": 0.144, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 5.538461538461539e-06, |
|
"loss": 0.1311, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 5.415384615384615e-06, |
|
"loss": 0.1411, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 5.292307692307693e-06, |
|
"loss": 0.1322, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"eval_loss": 0.5350630283355713, |
|
"eval_runtime": 92.5111, |
|
"eval_samples_per_second": 4.259, |
|
"eval_steps_per_second": 0.141, |
|
"eval_wer": 18.2128489576984, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 5.16923076923077e-06, |
|
"loss": 0.1436, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 5.046153846153846e-06, |
|
"loss": 0.1375, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.923076923076924e-06, |
|
"loss": 0.1361, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.129, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 4.676923076923077e-06, |
|
"loss": 0.1127, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 4.553846153846154e-06, |
|
"loss": 0.1266, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 4.430769230769232e-06, |
|
"loss": 0.1193, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 4.307692307692308e-06, |
|
"loss": 0.1127, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 4.184615384615385e-06, |
|
"loss": 0.1064, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 4.061538461538462e-06, |
|
"loss": 0.123, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"eval_loss": 0.5429388284683228, |
|
"eval_runtime": 91.5818, |
|
"eval_samples_per_second": 4.302, |
|
"eval_steps_per_second": 0.142, |
|
"eval_wer": 18.907716689016212, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 3.938461538461539e-06, |
|
"loss": 0.1057, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3.815384615384616e-06, |
|
"loss": 0.1258, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 3.692307692307693e-06, |
|
"loss": 0.1108, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 3.5692307692307692e-06, |
|
"loss": 0.1115, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 3.4461538461538464e-06, |
|
"loss": 0.0998, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 3.323076923076923e-06, |
|
"loss": 0.1106, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 0.1045, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 3.0769230769230774e-06, |
|
"loss": 0.0908, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 2.953846153846154e-06, |
|
"loss": 0.0931, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 2.830769230769231e-06, |
|
"loss": 0.1074, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"eval_loss": 0.5500437021255493, |
|
"eval_runtime": 104.0907, |
|
"eval_samples_per_second": 3.785, |
|
"eval_steps_per_second": 0.125, |
|
"eval_wer": 19.054004632451544, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 2.7076923076923076e-06, |
|
"loss": 0.0937, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 2.584615384615385e-06, |
|
"loss": 0.1091, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 2.461538461538462e-06, |
|
"loss": 0.0951, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 2.3384615384615387e-06, |
|
"loss": 0.1003, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 2.215384615384616e-06, |
|
"loss": 0.0836, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 2.0923076923076926e-06, |
|
"loss": 0.0907, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 1.9692307692307693e-06, |
|
"loss": 0.1013, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 1.8461538461538465e-06, |
|
"loss": 0.0891, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 1.7230769230769232e-06, |
|
"loss": 0.077, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 0.1007, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"eval_loss": 0.5552565455436707, |
|
"eval_runtime": 88.458, |
|
"eval_samples_per_second": 4.454, |
|
"eval_steps_per_second": 0.147, |
|
"eval_wer": 19.310008533463368, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 1.476923076923077e-06, |
|
"loss": 0.0849, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 1.3538461538461538e-06, |
|
"loss": 0.0971, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 1.230769230769231e-06, |
|
"loss": 0.0876, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 1.107692307692308e-06, |
|
"loss": 0.0879, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 9.846153846153847e-07, |
|
"loss": 0.0805, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 8.615384615384616e-07, |
|
"loss": 0.0888, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 7.384615384615385e-07, |
|
"loss": 0.0858, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 6.153846153846155e-07, |
|
"loss": 0.0825, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 4.923076923076923e-07, |
|
"loss": 0.0748, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 3.6923076923076927e-07, |
|
"loss": 0.0876, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"eval_loss": 0.5568162202835083, |
|
"eval_runtime": 89.7223, |
|
"eval_samples_per_second": 4.391, |
|
"eval_steps_per_second": 0.145, |
|
"eval_wer": 19.3465805193222, |
|
"step": 400 |
|
} |
|
], |
|
"max_steps": 407, |
|
"num_train_epochs": 9223372036854775807, |
|
"total_flos": 6.2536891981824e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|