|
{ |
|
"best_metric": 0.3162029981613159, |
|
"best_model_checkpoint": "ai-light-dance_drums_ft_pretrain_wav2vec2-base-new_onset-idmt-2/checkpoint-459", |
|
"epoch": 100.0, |
|
"global_step": 900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 97.93186950683594, |
|
"eval_runtime": 2.7466, |
|
"eval_samples_per_second": 6.19, |
|
"eval_steps_per_second": 1.82, |
|
"eval_wer": 1.0, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 17.1836, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 45.722869873046875, |
|
"eval_runtime": 2.9189, |
|
"eval_samples_per_second": 5.824, |
|
"eval_steps_per_second": 1.713, |
|
"eval_wer": 1.0, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.00013, |
|
"loss": 13.2869, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.757852554321289, |
|
"eval_runtime": 2.8561, |
|
"eval_samples_per_second": 5.952, |
|
"eval_steps_per_second": 1.751, |
|
"eval_wer": 1.0, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.00023, |
|
"loss": 2.6495, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.7427165508270264, |
|
"eval_runtime": 2.8408, |
|
"eval_samples_per_second": 5.984, |
|
"eval_steps_per_second": 1.76, |
|
"eval_wer": 1.0, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 0.0002989655172413793, |
|
"loss": 1.7135, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.5476696491241455, |
|
"eval_runtime": 2.8174, |
|
"eval_samples_per_second": 6.034, |
|
"eval_steps_per_second": 1.775, |
|
"eval_wer": 1.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.00029551724137931033, |
|
"loss": 1.4609, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.7126435041427612, |
|
"eval_runtime": 2.9262, |
|
"eval_samples_per_second": 5.81, |
|
"eval_steps_per_second": 1.709, |
|
"eval_wer": 1.0, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.00029206896551724134, |
|
"loss": 1.374, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.3668222427368164, |
|
"eval_runtime": 2.8538, |
|
"eval_samples_per_second": 5.957, |
|
"eval_steps_per_second": 1.752, |
|
"eval_wer": 0.9966666666666667, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.0002886206896551724, |
|
"loss": 1.2951, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.127436637878418, |
|
"eval_runtime": 2.8558, |
|
"eval_samples_per_second": 5.953, |
|
"eval_steps_per_second": 1.751, |
|
"eval_wer": 0.9866666666666667, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 0.00028517241379310345, |
|
"loss": 1.0493, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.7346072196960449, |
|
"eval_runtime": 3.0416, |
|
"eval_samples_per_second": 5.589, |
|
"eval_steps_per_second": 1.644, |
|
"eval_wer": 0.5177777777777778, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.00028172413793103445, |
|
"loss": 0.8835, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.7663962841033936, |
|
"eval_runtime": 2.8583, |
|
"eval_samples_per_second": 5.947, |
|
"eval_steps_per_second": 1.749, |
|
"eval_wer": 0.4122222222222222, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.5438075065612793, |
|
"eval_runtime": 2.9205, |
|
"eval_samples_per_second": 5.821, |
|
"eval_steps_per_second": 1.712, |
|
"eval_wer": 0.38666666666666666, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 0.0002782758620689655, |
|
"loss": 0.7019, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.4876076281070709, |
|
"eval_runtime": 2.8398, |
|
"eval_samples_per_second": 5.986, |
|
"eval_steps_per_second": 1.761, |
|
"eval_wer": 0.3711111111111111, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 12.22, |
|
"learning_rate": 0.0002748275862068965, |
|
"loss": 0.6906, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.519428551197052, |
|
"eval_runtime": 2.7785, |
|
"eval_samples_per_second": 6.119, |
|
"eval_steps_per_second": 1.8, |
|
"eval_wer": 0.36, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 0.00027137931034482756, |
|
"loss": 0.6535, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.4489333927631378, |
|
"eval_runtime": 2.8252, |
|
"eval_samples_per_second": 6.017, |
|
"eval_steps_per_second": 1.77, |
|
"eval_wer": 0.35555555555555557, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 14.44, |
|
"learning_rate": 0.0002679310344827586, |
|
"loss": 0.6225, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.4382663071155548, |
|
"eval_runtime": 2.7906, |
|
"eval_samples_per_second": 6.092, |
|
"eval_steps_per_second": 1.792, |
|
"eval_wer": 0.3333333333333333, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"learning_rate": 0.0002644827586206896, |
|
"loss": 0.547, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.4521160423755646, |
|
"eval_runtime": 2.8251, |
|
"eval_samples_per_second": 6.017, |
|
"eval_steps_per_second": 1.77, |
|
"eval_wer": 0.35555555555555557, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 0.00026103448275862067, |
|
"loss": 0.5525, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.5476210117340088, |
|
"eval_runtime": 2.8122, |
|
"eval_samples_per_second": 6.045, |
|
"eval_steps_per_second": 1.778, |
|
"eval_wer": 0.33444444444444443, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 17.78, |
|
"learning_rate": 0.0002575862068965517, |
|
"loss": 0.6152, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.4465982913970947, |
|
"eval_runtime": 2.8461, |
|
"eval_samples_per_second": 5.973, |
|
"eval_steps_per_second": 1.757, |
|
"eval_wer": 0.36, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 18.89, |
|
"learning_rate": 0.00025413793103448273, |
|
"loss": 0.5055, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.39812201261520386, |
|
"eval_runtime": 2.805, |
|
"eval_samples_per_second": 6.061, |
|
"eval_steps_per_second": 1.783, |
|
"eval_wer": 0.32555555555555554, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0002506896551724138, |
|
"loss": 0.5204, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.4923681318759918, |
|
"eval_runtime": 2.8028, |
|
"eval_samples_per_second": 6.065, |
|
"eval_steps_per_second": 1.784, |
|
"eval_wer": 0.30777777777777776, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.4085298776626587, |
|
"eval_runtime": 2.8109, |
|
"eval_samples_per_second": 6.048, |
|
"eval_steps_per_second": 1.779, |
|
"eval_wer": 0.32, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 21.11, |
|
"learning_rate": 0.0002472413793103448, |
|
"loss": 0.4742, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.4254695177078247, |
|
"eval_runtime": 2.7979, |
|
"eval_samples_per_second": 6.076, |
|
"eval_steps_per_second": 1.787, |
|
"eval_wer": 0.3233333333333333, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"learning_rate": 0.00024379310344827584, |
|
"loss": 0.4774, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 0.43211016058921814, |
|
"eval_runtime": 2.8119, |
|
"eval_samples_per_second": 6.046, |
|
"eval_steps_per_second": 1.778, |
|
"eval_wer": 0.28888888888888886, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 23.33, |
|
"learning_rate": 0.00024034482758620687, |
|
"loss": 0.5029, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.44116583466529846, |
|
"eval_runtime": 2.7953, |
|
"eval_samples_per_second": 6.082, |
|
"eval_steps_per_second": 1.789, |
|
"eval_wer": 0.31666666666666665, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 24.44, |
|
"learning_rate": 0.00023689655172413792, |
|
"loss": 0.4889, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.40511757135391235, |
|
"eval_runtime": 2.8082, |
|
"eval_samples_per_second": 6.054, |
|
"eval_steps_per_second": 1.781, |
|
"eval_wer": 0.30444444444444446, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 25.56, |
|
"learning_rate": 0.00023344827586206895, |
|
"loss": 0.4446, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.39176392555236816, |
|
"eval_runtime": 2.8027, |
|
"eval_samples_per_second": 6.066, |
|
"eval_steps_per_second": 1.784, |
|
"eval_wer": 0.3088888888888889, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 0.00023, |
|
"loss": 0.4255, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 0.403872549533844, |
|
"eval_runtime": 2.786, |
|
"eval_samples_per_second": 6.102, |
|
"eval_steps_per_second": 1.795, |
|
"eval_wer": 0.29555555555555557, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 27.78, |
|
"learning_rate": 0.000226551724137931, |
|
"loss": 0.4396, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.4112667143344879, |
|
"eval_runtime": 2.8335, |
|
"eval_samples_per_second": 6.0, |
|
"eval_steps_per_second": 1.765, |
|
"eval_wer": 0.29555555555555557, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 28.89, |
|
"learning_rate": 0.00022310344827586204, |
|
"loss": 0.4265, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 0.5575971603393555, |
|
"eval_runtime": 2.7693, |
|
"eval_samples_per_second": 6.139, |
|
"eval_steps_per_second": 1.806, |
|
"eval_wer": 0.3022222222222222, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 0.0002196551724137931, |
|
"loss": 0.4289, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.35578060150146484, |
|
"eval_runtime": 2.7632, |
|
"eval_samples_per_second": 6.152, |
|
"eval_steps_per_second": 1.81, |
|
"eval_wer": 0.30777777777777776, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 0.33904996514320374, |
|
"eval_runtime": 2.7428, |
|
"eval_samples_per_second": 6.198, |
|
"eval_steps_per_second": 1.823, |
|
"eval_wer": 0.31666666666666665, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 31.11, |
|
"learning_rate": 0.00021620689655172412, |
|
"loss": 0.3817, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 0.3738517165184021, |
|
"eval_runtime": 2.7731, |
|
"eval_samples_per_second": 6.13, |
|
"eval_steps_per_second": 1.803, |
|
"eval_wer": 0.3422222222222222, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 32.22, |
|
"learning_rate": 0.00021275862068965515, |
|
"loss": 0.4192, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 0.31792330741882324, |
|
"eval_runtime": 2.7573, |
|
"eval_samples_per_second": 6.166, |
|
"eval_steps_per_second": 1.813, |
|
"eval_wer": 0.3055555555555556, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 0.0002093103448275862, |
|
"loss": 0.3719, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 0.3621741235256195, |
|
"eval_runtime": 2.7351, |
|
"eval_samples_per_second": 6.215, |
|
"eval_steps_per_second": 1.828, |
|
"eval_wer": 0.30333333333333334, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 34.44, |
|
"learning_rate": 0.00020586206896551723, |
|
"loss": 0.3685, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 0.40566930174827576, |
|
"eval_runtime": 2.7612, |
|
"eval_samples_per_second": 6.157, |
|
"eval_steps_per_second": 1.811, |
|
"eval_wer": 0.32555555555555554, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 35.56, |
|
"learning_rate": 0.0002024137931034483, |
|
"loss": 0.3752, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 0.39497387409210205, |
|
"eval_runtime": 2.7507, |
|
"eval_samples_per_second": 6.18, |
|
"eval_steps_per_second": 1.818, |
|
"eval_wer": 0.31, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 36.67, |
|
"learning_rate": 0.0001989655172413793, |
|
"loss": 0.378, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 0.39072442054748535, |
|
"eval_runtime": 2.7439, |
|
"eval_samples_per_second": 6.196, |
|
"eval_steps_per_second": 1.822, |
|
"eval_wer": 0.3566666666666667, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 37.78, |
|
"learning_rate": 0.00019551724137931032, |
|
"loss": 0.4438, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 0.33762815594673157, |
|
"eval_runtime": 2.7815, |
|
"eval_samples_per_second": 6.112, |
|
"eval_steps_per_second": 1.798, |
|
"eval_wer": 0.31, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 38.89, |
|
"learning_rate": 0.00019206896551724134, |
|
"loss": 0.3978, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 0.3395032286643982, |
|
"eval_runtime": 2.8043, |
|
"eval_samples_per_second": 6.062, |
|
"eval_steps_per_second": 1.783, |
|
"eval_wer": 0.2833333333333333, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.0001886206896551724, |
|
"loss": 0.3639, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 0.36456623673439026, |
|
"eval_runtime": 2.7816, |
|
"eval_samples_per_second": 6.112, |
|
"eval_steps_per_second": 1.798, |
|
"eval_wer": 0.28555555555555556, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 0.3546362519264221, |
|
"eval_runtime": 2.7679, |
|
"eval_samples_per_second": 6.142, |
|
"eval_steps_per_second": 1.806, |
|
"eval_wer": 0.30444444444444446, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 41.11, |
|
"learning_rate": 0.00018517241379310343, |
|
"loss": 0.3535, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 0.36989736557006836, |
|
"eval_runtime": 2.8116, |
|
"eval_samples_per_second": 6.046, |
|
"eval_steps_per_second": 1.778, |
|
"eval_wer": 0.28888888888888886, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 42.22, |
|
"learning_rate": 0.00018172413793103448, |
|
"loss": 0.3311, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 0.38820379972457886, |
|
"eval_runtime": 2.7493, |
|
"eval_samples_per_second": 6.183, |
|
"eval_steps_per_second": 1.819, |
|
"eval_wer": 0.3022222222222222, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 43.33, |
|
"learning_rate": 0.0001782758620689655, |
|
"loss": 0.3475, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 0.4749428331851959, |
|
"eval_runtime": 2.8377, |
|
"eval_samples_per_second": 5.991, |
|
"eval_steps_per_second": 1.762, |
|
"eval_wer": 0.28888888888888886, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 44.44, |
|
"learning_rate": 0.00017482758620689654, |
|
"loss": 0.4048, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 0.34369951486587524, |
|
"eval_runtime": 2.794, |
|
"eval_samples_per_second": 6.085, |
|
"eval_steps_per_second": 1.79, |
|
"eval_wer": 0.2911111111111111, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 45.56, |
|
"learning_rate": 0.00017137931034482757, |
|
"loss": 0.2984, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 0.36636969447135925, |
|
"eval_runtime": 2.7731, |
|
"eval_samples_per_second": 6.13, |
|
"eval_steps_per_second": 1.803, |
|
"eval_wer": 0.27, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 46.67, |
|
"learning_rate": 0.0001679310344827586, |
|
"loss": 0.3535, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 0.3290660083293915, |
|
"eval_runtime": 2.767, |
|
"eval_samples_per_second": 6.144, |
|
"eval_steps_per_second": 1.807, |
|
"eval_wer": 0.28888888888888886, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 47.78, |
|
"learning_rate": 0.00016448275862068962, |
|
"loss": 0.3015, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 0.35376498103141785, |
|
"eval_runtime": 2.7572, |
|
"eval_samples_per_second": 6.166, |
|
"eval_steps_per_second": 1.813, |
|
"eval_wer": 0.27666666666666667, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 48.89, |
|
"learning_rate": 0.00016103448275862068, |
|
"loss": 0.3628, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 0.44109877943992615, |
|
"eval_runtime": 2.7694, |
|
"eval_samples_per_second": 6.139, |
|
"eval_steps_per_second": 1.805, |
|
"eval_wer": 0.2733333333333333, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.0001575862068965517, |
|
"loss": 0.3303, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 0.34246134757995605, |
|
"eval_runtime": 2.7763, |
|
"eval_samples_per_second": 6.123, |
|
"eval_steps_per_second": 1.801, |
|
"eval_wer": 0.29, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_loss": 0.3162029981613159, |
|
"eval_runtime": 2.7842, |
|
"eval_samples_per_second": 6.106, |
|
"eval_steps_per_second": 1.796, |
|
"eval_wer": 0.3011111111111111, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 51.11, |
|
"learning_rate": 0.00015413793103448276, |
|
"loss": 0.271, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_loss": 0.36851903796195984, |
|
"eval_runtime": 2.7813, |
|
"eval_samples_per_second": 6.112, |
|
"eval_steps_per_second": 1.798, |
|
"eval_wer": 0.29333333333333333, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 52.22, |
|
"learning_rate": 0.0001506896551724138, |
|
"loss": 0.3299, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_loss": 0.42159539461135864, |
|
"eval_runtime": 2.767, |
|
"eval_samples_per_second": 6.144, |
|
"eval_steps_per_second": 1.807, |
|
"eval_wer": 0.29333333333333333, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 53.33, |
|
"learning_rate": 0.00014724137931034482, |
|
"loss": 0.2782, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_loss": 0.4713245928287506, |
|
"eval_runtime": 2.8739, |
|
"eval_samples_per_second": 5.915, |
|
"eval_steps_per_second": 1.74, |
|
"eval_wer": 0.30444444444444446, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 54.44, |
|
"learning_rate": 0.00014379310344827585, |
|
"loss": 0.348, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_loss": 0.4310116767883301, |
|
"eval_runtime": 2.7991, |
|
"eval_samples_per_second": 6.073, |
|
"eval_steps_per_second": 1.786, |
|
"eval_wer": 0.30777777777777776, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 55.56, |
|
"learning_rate": 0.00014034482758620688, |
|
"loss": 0.2969, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_loss": 0.48976629972457886, |
|
"eval_runtime": 2.8118, |
|
"eval_samples_per_second": 6.046, |
|
"eval_steps_per_second": 1.778, |
|
"eval_wer": 0.27666666666666667, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 56.67, |
|
"learning_rate": 0.0001368965517241379, |
|
"loss": 0.2757, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_loss": 0.5194886326789856, |
|
"eval_runtime": 2.7939, |
|
"eval_samples_per_second": 6.085, |
|
"eval_steps_per_second": 1.79, |
|
"eval_wer": 0.2788888888888889, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 57.78, |
|
"learning_rate": 0.00013344827586206896, |
|
"loss": 0.2662, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_loss": 0.46313199400901794, |
|
"eval_runtime": 2.7984, |
|
"eval_samples_per_second": 6.075, |
|
"eval_steps_per_second": 1.787, |
|
"eval_wer": 0.2911111111111111, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 58.89, |
|
"learning_rate": 0.00013, |
|
"loss": 0.2706, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_loss": 0.427517294883728, |
|
"eval_runtime": 2.7564, |
|
"eval_samples_per_second": 6.168, |
|
"eval_steps_per_second": 1.814, |
|
"eval_wer": 0.2833333333333333, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.00012655172413793102, |
|
"loss": 0.2684, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_loss": 0.553533136844635, |
|
"eval_runtime": 2.7742, |
|
"eval_samples_per_second": 6.128, |
|
"eval_steps_per_second": 1.802, |
|
"eval_wer": 0.2788888888888889, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_loss": 0.4733206331729889, |
|
"eval_runtime": 2.7693, |
|
"eval_samples_per_second": 6.139, |
|
"eval_steps_per_second": 1.806, |
|
"eval_wer": 0.29777777777777775, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 61.11, |
|
"learning_rate": 0.00012310344827586205, |
|
"loss": 0.2819, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_loss": 0.49692198634147644, |
|
"eval_runtime": 2.7566, |
|
"eval_samples_per_second": 6.167, |
|
"eval_steps_per_second": 1.814, |
|
"eval_wer": 0.2833333333333333, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 62.22, |
|
"learning_rate": 0.00011965517241379309, |
|
"loss": 0.2819, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_loss": 0.6202179789543152, |
|
"eval_runtime": 2.7859, |
|
"eval_samples_per_second": 6.102, |
|
"eval_steps_per_second": 1.795, |
|
"eval_wer": 0.2788888888888889, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 63.33, |
|
"learning_rate": 0.00011620689655172413, |
|
"loss": 0.2889, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_loss": 0.39546066522598267, |
|
"eval_runtime": 2.7637, |
|
"eval_samples_per_second": 6.151, |
|
"eval_steps_per_second": 1.809, |
|
"eval_wer": 0.2733333333333333, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 64.44, |
|
"learning_rate": 0.00011275862068965516, |
|
"loss": 0.2515, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_loss": 0.38055384159088135, |
|
"eval_runtime": 2.7537, |
|
"eval_samples_per_second": 6.173, |
|
"eval_steps_per_second": 1.816, |
|
"eval_wer": 0.26555555555555554, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 65.56, |
|
"learning_rate": 0.0001093103448275862, |
|
"loss": 0.2468, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_loss": 0.3472989499568939, |
|
"eval_runtime": 2.8011, |
|
"eval_samples_per_second": 6.069, |
|
"eval_steps_per_second": 1.785, |
|
"eval_wer": 0.2722222222222222, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 0.00010586206896551723, |
|
"loss": 0.2557, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_loss": 0.417023628950119, |
|
"eval_runtime": 2.7867, |
|
"eval_samples_per_second": 6.1, |
|
"eval_steps_per_second": 1.794, |
|
"eval_wer": 0.2722222222222222, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 67.78, |
|
"learning_rate": 0.00010241379310344827, |
|
"loss": 0.2477, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_loss": 0.4748758375644684, |
|
"eval_runtime": 2.7749, |
|
"eval_samples_per_second": 6.126, |
|
"eval_steps_per_second": 1.802, |
|
"eval_wer": 0.2677777777777778, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 68.89, |
|
"learning_rate": 9.89655172413793e-05, |
|
"loss": 0.2965, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_loss": 0.438679039478302, |
|
"eval_runtime": 2.7924, |
|
"eval_samples_per_second": 6.088, |
|
"eval_steps_per_second": 1.791, |
|
"eval_wer": 0.2611111111111111, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 9.551724137931034e-05, |
|
"loss": 0.2606, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_loss": 0.45858699083328247, |
|
"eval_runtime": 2.7888, |
|
"eval_samples_per_second": 6.096, |
|
"eval_steps_per_second": 1.793, |
|
"eval_wer": 0.26555555555555554, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_loss": 0.5755282044410706, |
|
"eval_runtime": 2.7561, |
|
"eval_samples_per_second": 6.168, |
|
"eval_steps_per_second": 1.814, |
|
"eval_wer": 0.2733333333333333, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 71.11, |
|
"learning_rate": 9.206896551724137e-05, |
|
"loss": 0.2442, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_loss": 0.5582060217857361, |
|
"eval_runtime": 2.7794, |
|
"eval_samples_per_second": 6.116, |
|
"eval_steps_per_second": 1.799, |
|
"eval_wer": 0.26555555555555554, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 72.22, |
|
"learning_rate": 8.862068965517241e-05, |
|
"loss": 0.347, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_loss": 0.38965609669685364, |
|
"eval_runtime": 2.7716, |
|
"eval_samples_per_second": 6.134, |
|
"eval_steps_per_second": 1.804, |
|
"eval_wer": 0.27111111111111114, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 73.33, |
|
"learning_rate": 8.517241379310344e-05, |
|
"loss": 0.2444, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_loss": 0.33690622448921204, |
|
"eval_runtime": 2.7566, |
|
"eval_samples_per_second": 6.167, |
|
"eval_steps_per_second": 1.814, |
|
"eval_wer": 0.25333333333333335, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 74.44, |
|
"learning_rate": 8.172413793103448e-05, |
|
"loss": 0.2811, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_loss": 0.34874993562698364, |
|
"eval_runtime": 2.7479, |
|
"eval_samples_per_second": 6.186, |
|
"eval_steps_per_second": 1.82, |
|
"eval_wer": 0.2577777777777778, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 75.56, |
|
"learning_rate": 7.827586206896551e-05, |
|
"loss": 0.24, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_loss": 0.3692302107810974, |
|
"eval_runtime": 2.785, |
|
"eval_samples_per_second": 6.104, |
|
"eval_steps_per_second": 1.795, |
|
"eval_wer": 0.2588888888888889, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 76.67, |
|
"learning_rate": 7.482758620689654e-05, |
|
"loss": 0.2466, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_loss": 0.45672333240509033, |
|
"eval_runtime": 2.7533, |
|
"eval_samples_per_second": 6.174, |
|
"eval_steps_per_second": 1.816, |
|
"eval_wer": 0.2577777777777778, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 77.78, |
|
"learning_rate": 7.137931034482758e-05, |
|
"loss": 0.2769, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_loss": 0.40405967831611633, |
|
"eval_runtime": 2.7964, |
|
"eval_samples_per_second": 6.079, |
|
"eval_steps_per_second": 1.788, |
|
"eval_wer": 0.2633333333333333, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 78.89, |
|
"learning_rate": 6.79310344827586e-05, |
|
"loss": 0.2464, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_loss": 0.38128018379211426, |
|
"eval_runtime": 2.7812, |
|
"eval_samples_per_second": 6.113, |
|
"eval_steps_per_second": 1.798, |
|
"eval_wer": 0.26222222222222225, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 6.448275862068965e-05, |
|
"loss": 0.2791, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_loss": 0.3990322947502136, |
|
"eval_runtime": 2.7516, |
|
"eval_samples_per_second": 6.178, |
|
"eval_steps_per_second": 1.817, |
|
"eval_wer": 0.25555555555555554, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_loss": 0.39965325593948364, |
|
"eval_runtime": 2.7649, |
|
"eval_samples_per_second": 6.149, |
|
"eval_steps_per_second": 1.808, |
|
"eval_wer": 0.24888888888888888, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 81.11, |
|
"learning_rate": 6.103448275862068e-05, |
|
"loss": 0.2365, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_loss": 0.4537028968334198, |
|
"eval_runtime": 2.7885, |
|
"eval_samples_per_second": 6.097, |
|
"eval_steps_per_second": 1.793, |
|
"eval_wer": 0.25333333333333335, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 82.22, |
|
"learning_rate": 5.758620689655172e-05, |
|
"loss": 0.2693, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_loss": 0.5943002104759216, |
|
"eval_runtime": 2.8023, |
|
"eval_samples_per_second": 6.067, |
|
"eval_steps_per_second": 1.784, |
|
"eval_wer": 0.2611111111111111, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"learning_rate": 5.413793103448275e-05, |
|
"loss": 0.2285, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_loss": 0.5804929733276367, |
|
"eval_runtime": 2.7598, |
|
"eval_samples_per_second": 6.16, |
|
"eval_steps_per_second": 1.812, |
|
"eval_wer": 0.26555555555555554, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 84.44, |
|
"learning_rate": 5.068965517241379e-05, |
|
"loss": 0.2468, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_loss": 0.5608753561973572, |
|
"eval_runtime": 2.7731, |
|
"eval_samples_per_second": 6.13, |
|
"eval_steps_per_second": 1.803, |
|
"eval_wer": 0.26555555555555554, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 85.56, |
|
"learning_rate": 4.724137931034482e-05, |
|
"loss": 0.2226, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_loss": 0.59482342004776, |
|
"eval_runtime": 2.7726, |
|
"eval_samples_per_second": 6.131, |
|
"eval_steps_per_second": 1.803, |
|
"eval_wer": 0.26666666666666666, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 86.67, |
|
"learning_rate": 4.379310344827586e-05, |
|
"loss": 0.2419, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_loss": 0.5909682512283325, |
|
"eval_runtime": 2.7879, |
|
"eval_samples_per_second": 6.098, |
|
"eval_steps_per_second": 1.793, |
|
"eval_wer": 0.2544444444444444, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 87.78, |
|
"learning_rate": 4.034482758620689e-05, |
|
"loss": 0.2254, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_loss": 0.5740683078765869, |
|
"eval_runtime": 2.7572, |
|
"eval_samples_per_second": 6.166, |
|
"eval_steps_per_second": 1.813, |
|
"eval_wer": 0.26, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 88.89, |
|
"learning_rate": 3.689655172413793e-05, |
|
"loss": 0.2083, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_loss": 0.498391717672348, |
|
"eval_runtime": 2.7579, |
|
"eval_samples_per_second": 6.164, |
|
"eval_steps_per_second": 1.813, |
|
"eval_wer": 0.2611111111111111, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"learning_rate": 3.344827586206896e-05, |
|
"loss": 0.2318, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_loss": 0.5093002915382385, |
|
"eval_runtime": 2.7728, |
|
"eval_samples_per_second": 6.131, |
|
"eval_steps_per_second": 1.803, |
|
"eval_wer": 0.26, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_loss": 0.5284357070922852, |
|
"eval_runtime": 2.7447, |
|
"eval_samples_per_second": 6.194, |
|
"eval_steps_per_second": 1.822, |
|
"eval_wer": 0.2633333333333333, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 91.11, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 0.2458, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_loss": 0.48846206068992615, |
|
"eval_runtime": 2.7457, |
|
"eval_samples_per_second": 6.192, |
|
"eval_steps_per_second": 1.821, |
|
"eval_wer": 0.26555555555555554, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 92.22, |
|
"learning_rate": 2.6551724137931032e-05, |
|
"loss": 0.2394, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_loss": 0.4817904829978943, |
|
"eval_runtime": 2.7858, |
|
"eval_samples_per_second": 6.102, |
|
"eval_steps_per_second": 1.795, |
|
"eval_wer": 0.26222222222222225, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 93.33, |
|
"learning_rate": 2.3103448275862067e-05, |
|
"loss": 0.2018, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_loss": 0.5037254691123962, |
|
"eval_runtime": 2.7345, |
|
"eval_samples_per_second": 6.217, |
|
"eval_steps_per_second": 1.829, |
|
"eval_wer": 0.26, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 94.44, |
|
"learning_rate": 1.9655172413793102e-05, |
|
"loss": 0.235, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_loss": 0.5010991096496582, |
|
"eval_runtime": 2.8024, |
|
"eval_samples_per_second": 6.066, |
|
"eval_steps_per_second": 1.784, |
|
"eval_wer": 0.2577777777777778, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 95.56, |
|
"learning_rate": 1.6206896551724137e-05, |
|
"loss": 0.2252, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_loss": 0.49311307072639465, |
|
"eval_runtime": 2.7705, |
|
"eval_samples_per_second": 6.136, |
|
"eval_steps_per_second": 1.805, |
|
"eval_wer": 0.2611111111111111, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 96.67, |
|
"learning_rate": 1.275862068965517e-05, |
|
"loss": 0.2147, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_loss": 0.4880674481391907, |
|
"eval_runtime": 2.7947, |
|
"eval_samples_per_second": 6.083, |
|
"eval_steps_per_second": 1.789, |
|
"eval_wer": 0.2588888888888889, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 97.78, |
|
"learning_rate": 9.310344827586206e-06, |
|
"loss": 0.2227, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_loss": 0.49562954902648926, |
|
"eval_runtime": 2.7533, |
|
"eval_samples_per_second": 6.174, |
|
"eval_steps_per_second": 1.816, |
|
"eval_wer": 0.2588888888888889, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 98.89, |
|
"learning_rate": 5.862068965517241e-06, |
|
"loss": 0.2168, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_loss": 0.5096976161003113, |
|
"eval_runtime": 2.7653, |
|
"eval_samples_per_second": 6.148, |
|
"eval_steps_per_second": 1.808, |
|
"eval_wer": 0.2588888888888889, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 2.413793103448276e-06, |
|
"loss": 0.2282, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 0.5173905491828918, |
|
"eval_runtime": 2.7832, |
|
"eval_samples_per_second": 6.108, |
|
"eval_steps_per_second": 1.797, |
|
"eval_wer": 0.26, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 900, |
|
"total_flos": 1.389987159899058e+18, |
|
"train_loss": 0.7617763585514492, |
|
"train_runtime": 3519.9031, |
|
"train_samples_per_second": 4.006, |
|
"train_steps_per_second": 0.256 |
|
} |
|
], |
|
"max_steps": 900, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.389987159899058e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|