|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 5100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5.069866666666666e-05, |
|
"loss": 10.7052, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 3.4682674407958984, |
|
"eval_runtime": 13.0004, |
|
"eval_samples_per_second": 26.23, |
|
"eval_steps_per_second": 3.308, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.000102432, |
|
"loss": 3.2395, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"eval_loss": 3.14892840385437, |
|
"eval_runtime": 12.7764, |
|
"eval_samples_per_second": 26.69, |
|
"eval_steps_per_second": 3.366, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 0.00015416533333333332, |
|
"loss": 2.9951, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"eval_loss": 2.982297897338867, |
|
"eval_runtime": 12.7866, |
|
"eval_samples_per_second": 26.668, |
|
"eval_steps_per_second": 3.363, |
|
"eval_wer": 1.0007380073800738, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 0.00020589866666666665, |
|
"loss": 2.3574, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"eval_loss": 1.2614495754241943, |
|
"eval_runtime": 12.8042, |
|
"eval_samples_per_second": 26.632, |
|
"eval_steps_per_second": 3.358, |
|
"eval_wer": 0.7597785977859779, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 0.000257632, |
|
"loss": 1.7287, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"eval_loss": 1.1816928386688232, |
|
"eval_runtime": 12.7085, |
|
"eval_samples_per_second": 26.833, |
|
"eval_steps_per_second": 3.384, |
|
"eval_wer": 0.7420664206642067, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"learning_rate": 0.00030936533333333335, |
|
"loss": 1.6144, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"eval_loss": 1.131492257118225, |
|
"eval_runtime": 12.8371, |
|
"eval_samples_per_second": 26.564, |
|
"eval_steps_per_second": 3.35, |
|
"eval_wer": 0.7321033210332103, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"learning_rate": 0.00036109866666666666, |
|
"loss": 1.5598, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"eval_loss": 1.232160210609436, |
|
"eval_runtime": 12.7503, |
|
"eval_samples_per_second": 26.745, |
|
"eval_steps_per_second": 3.372, |
|
"eval_wer": 0.7549815498154981, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"learning_rate": 0.0003837186206896552, |
|
"loss": 1.5418, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"eval_loss": 1.272075891494751, |
|
"eval_runtime": 12.8108, |
|
"eval_samples_per_second": 26.618, |
|
"eval_steps_per_second": 3.357, |
|
"eval_wer": 0.7819188191881918, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"learning_rate": 0.0003747990804597701, |
|
"loss": 1.4578, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"eval_loss": 1.1709508895874023, |
|
"eval_runtime": 12.7848, |
|
"eval_samples_per_second": 26.672, |
|
"eval_steps_per_second": 3.363, |
|
"eval_wer": 0.7531365313653137, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"learning_rate": 0.0003658795402298851, |
|
"loss": 1.4311, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"eval_loss": 1.2042367458343506, |
|
"eval_runtime": 12.6761, |
|
"eval_samples_per_second": 26.901, |
|
"eval_steps_per_second": 3.392, |
|
"eval_wer": 0.7490774907749077, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 21.57, |
|
"learning_rate": 0.00035696, |
|
"loss": 1.3483, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 21.57, |
|
"eval_loss": 1.1702170372009277, |
|
"eval_runtime": 12.5762, |
|
"eval_samples_per_second": 27.115, |
|
"eval_steps_per_second": 3.419, |
|
"eval_wer": 0.7464944649446494, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 23.53, |
|
"learning_rate": 0.0003480404597701149, |
|
"loss": 1.3078, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 23.53, |
|
"eval_loss": 1.1963412761688232, |
|
"eval_runtime": 12.6937, |
|
"eval_samples_per_second": 26.864, |
|
"eval_steps_per_second": 3.387, |
|
"eval_wer": 0.7420664206642067, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 25.49, |
|
"learning_rate": 0.00033912091954022987, |
|
"loss": 1.2576, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 25.49, |
|
"eval_loss": 1.1501450538635254, |
|
"eval_runtime": 12.6626, |
|
"eval_samples_per_second": 26.93, |
|
"eval_steps_per_second": 3.396, |
|
"eval_wer": 0.7280442804428044, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 27.45, |
|
"learning_rate": 0.0003302013793103448, |
|
"loss": 1.2173, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 27.45, |
|
"eval_loss": 1.2525919675827026, |
|
"eval_runtime": 12.6262, |
|
"eval_samples_per_second": 27.007, |
|
"eval_steps_per_second": 3.406, |
|
"eval_wer": 0.7298892988929889, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 29.41, |
|
"learning_rate": 0.00032128183908045977, |
|
"loss": 1.2217, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 29.41, |
|
"eval_loss": 1.2478744983673096, |
|
"eval_runtime": 12.8026, |
|
"eval_samples_per_second": 26.635, |
|
"eval_steps_per_second": 3.359, |
|
"eval_wer": 0.7309963099630996, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 31.37, |
|
"learning_rate": 0.0003123622988505747, |
|
"loss": 1.1536, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 31.37, |
|
"eval_loss": 1.2567418813705444, |
|
"eval_runtime": 12.6734, |
|
"eval_samples_per_second": 26.907, |
|
"eval_steps_per_second": 3.393, |
|
"eval_wer": 0.7431734317343174, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 0.00030344275862068966, |
|
"loss": 1.0939, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_loss": 1.2800976037979126, |
|
"eval_runtime": 12.5686, |
|
"eval_samples_per_second": 27.131, |
|
"eval_steps_per_second": 3.421, |
|
"eval_wer": 0.7247232472324723, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 35.29, |
|
"learning_rate": 0.0002945232183908046, |
|
"loss": 1.0745, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 35.29, |
|
"eval_loss": 1.2340304851531982, |
|
"eval_runtime": 12.6385, |
|
"eval_samples_per_second": 26.981, |
|
"eval_steps_per_second": 3.402, |
|
"eval_wer": 0.7151291512915129, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 37.25, |
|
"learning_rate": 0.00028560367816091956, |
|
"loss": 1.0454, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 37.25, |
|
"eval_loss": 1.237194299697876, |
|
"eval_runtime": 12.4912, |
|
"eval_samples_per_second": 27.299, |
|
"eval_steps_per_second": 3.442, |
|
"eval_wer": 0.7151291512915129, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 39.22, |
|
"learning_rate": 0.00027668413793103446, |
|
"loss": 1.0101, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 39.22, |
|
"eval_loss": 1.2461133003234863, |
|
"eval_runtime": 12.6855, |
|
"eval_samples_per_second": 26.881, |
|
"eval_steps_per_second": 3.39, |
|
"eval_wer": 0.7376383763837638, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 41.18, |
|
"learning_rate": 0.0002677645977011494, |
|
"loss": 0.9833, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 41.18, |
|
"eval_loss": 1.2552708387374878, |
|
"eval_runtime": 12.6273, |
|
"eval_samples_per_second": 27.005, |
|
"eval_steps_per_second": 3.405, |
|
"eval_wer": 0.7269372693726938, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 43.14, |
|
"learning_rate": 0.00025884505747126435, |
|
"loss": 0.9314, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 43.14, |
|
"eval_loss": 1.2371633052825928, |
|
"eval_runtime": 12.5794, |
|
"eval_samples_per_second": 27.108, |
|
"eval_steps_per_second": 3.418, |
|
"eval_wer": 0.7014760147601476, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 45.1, |
|
"learning_rate": 0.0002499255172413793, |
|
"loss": 0.9147, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 45.1, |
|
"eval_loss": 1.3035242557525635, |
|
"eval_runtime": 12.8232, |
|
"eval_samples_per_second": 26.592, |
|
"eval_steps_per_second": 3.353, |
|
"eval_wer": 0.7357933579335794, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 47.06, |
|
"learning_rate": 0.00024109517241379313, |
|
"loss": 0.8758, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 47.06, |
|
"eval_loss": 1.2598013877868652, |
|
"eval_runtime": 12.7812, |
|
"eval_samples_per_second": 26.68, |
|
"eval_steps_per_second": 3.364, |
|
"eval_wer": 0.7092250922509226, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 49.02, |
|
"learning_rate": 0.00023217563218390802, |
|
"loss": 0.8356, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 49.02, |
|
"eval_loss": 1.255703091621399, |
|
"eval_runtime": 12.549, |
|
"eval_samples_per_second": 27.173, |
|
"eval_steps_per_second": 3.427, |
|
"eval_wer": 0.7143911439114391, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 50.98, |
|
"learning_rate": 0.00022325609195402297, |
|
"loss": 0.8105, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 50.98, |
|
"eval_loss": 1.2618709802627563, |
|
"eval_runtime": 12.4348, |
|
"eval_samples_per_second": 27.423, |
|
"eval_steps_per_second": 3.458, |
|
"eval_wer": 0.7236162361623616, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 52.94, |
|
"learning_rate": 0.00021433655172413795, |
|
"loss": 0.7947, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 52.94, |
|
"eval_loss": 1.399444818496704, |
|
"eval_runtime": 12.9361, |
|
"eval_samples_per_second": 26.36, |
|
"eval_steps_per_second": 3.324, |
|
"eval_wer": 0.7490774907749077, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 54.9, |
|
"learning_rate": 0.0002054170114942529, |
|
"loss": 0.7623, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 54.9, |
|
"eval_loss": 1.2931541204452515, |
|
"eval_runtime": 12.7092, |
|
"eval_samples_per_second": 26.831, |
|
"eval_steps_per_second": 3.383, |
|
"eval_wer": 0.7132841328413284, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 56.86, |
|
"learning_rate": 0.0001964974712643678, |
|
"loss": 0.7282, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 56.86, |
|
"eval_loss": 1.2799276113510132, |
|
"eval_runtime": 12.7788, |
|
"eval_samples_per_second": 26.685, |
|
"eval_steps_per_second": 3.365, |
|
"eval_wer": 0.7088560885608856, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 58.82, |
|
"learning_rate": 0.00018757793103448274, |
|
"loss": 0.7108, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 58.82, |
|
"eval_loss": 1.3615078926086426, |
|
"eval_runtime": 12.6509, |
|
"eval_samples_per_second": 26.955, |
|
"eval_steps_per_second": 3.399, |
|
"eval_wer": 0.714760147601476, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 60.78, |
|
"learning_rate": 0.00017865839080459772, |
|
"loss": 0.6896, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 60.78, |
|
"eval_loss": 1.312876582145691, |
|
"eval_runtime": 12.6527, |
|
"eval_samples_per_second": 26.951, |
|
"eval_steps_per_second": 3.398, |
|
"eval_wer": 0.7040590405904059, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 62.75, |
|
"learning_rate": 0.00016973885057471264, |
|
"loss": 0.6496, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 62.75, |
|
"eval_loss": 1.4050244092941284, |
|
"eval_runtime": 12.6982, |
|
"eval_samples_per_second": 26.854, |
|
"eval_steps_per_second": 3.386, |
|
"eval_wer": 0.6933579335793358, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 64.71, |
|
"learning_rate": 0.0001608193103448276, |
|
"loss": 0.6075, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 64.71, |
|
"eval_loss": 1.35708749294281, |
|
"eval_runtime": 12.752, |
|
"eval_samples_per_second": 26.741, |
|
"eval_steps_per_second": 3.372, |
|
"eval_wer": 0.7025830258302583, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 0.00015189977011494254, |
|
"loss": 0.6242, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"eval_loss": 1.3368754386901855, |
|
"eval_runtime": 12.8446, |
|
"eval_samples_per_second": 26.548, |
|
"eval_steps_per_second": 3.348, |
|
"eval_wer": 0.7062730627306273, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 68.63, |
|
"learning_rate": 0.00014298022988505749, |
|
"loss": 0.5865, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 68.63, |
|
"eval_loss": 1.4367624521255493, |
|
"eval_runtime": 12.5885, |
|
"eval_samples_per_second": 27.088, |
|
"eval_steps_per_second": 3.416, |
|
"eval_wer": 0.7140221402214022, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 70.59, |
|
"learning_rate": 0.0001340606896551724, |
|
"loss": 0.5721, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 70.59, |
|
"eval_loss": 1.4223829507827759, |
|
"eval_runtime": 12.6692, |
|
"eval_samples_per_second": 26.916, |
|
"eval_steps_per_second": 3.394, |
|
"eval_wer": 0.7066420664206642, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 72.55, |
|
"learning_rate": 0.00012514114942528736, |
|
"loss": 0.5475, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 72.55, |
|
"eval_loss": 1.4797747135162354, |
|
"eval_runtime": 12.8068, |
|
"eval_samples_per_second": 26.626, |
|
"eval_steps_per_second": 3.358, |
|
"eval_wer": 0.7118081180811808, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 74.51, |
|
"learning_rate": 0.00011622160919540229, |
|
"loss": 0.5086, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 74.51, |
|
"eval_loss": 1.510679841041565, |
|
"eval_runtime": 12.607, |
|
"eval_samples_per_second": 27.048, |
|
"eval_steps_per_second": 3.411, |
|
"eval_wer": 0.7232472324723247, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 76.47, |
|
"learning_rate": 0.00010730206896551725, |
|
"loss": 0.4958, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 76.47, |
|
"eval_loss": 1.4849300384521484, |
|
"eval_runtime": 12.6845, |
|
"eval_samples_per_second": 26.883, |
|
"eval_steps_per_second": 3.39, |
|
"eval_wer": 0.7088560885608856, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 78.43, |
|
"learning_rate": 9.838252873563218e-05, |
|
"loss": 0.5046, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 78.43, |
|
"eval_loss": 1.4450523853302002, |
|
"eval_runtime": 12.6526, |
|
"eval_samples_per_second": 26.951, |
|
"eval_steps_per_second": 3.399, |
|
"eval_wer": 0.7114391143911439, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 80.39, |
|
"learning_rate": 8.946298850574712e-05, |
|
"loss": 0.4694, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 80.39, |
|
"eval_loss": 1.4674367904663086, |
|
"eval_runtime": 12.49, |
|
"eval_samples_per_second": 27.302, |
|
"eval_steps_per_second": 3.443, |
|
"eval_wer": 0.7088560885608856, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 82.35, |
|
"learning_rate": 8.054344827586206e-05, |
|
"loss": 0.4386, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 82.35, |
|
"eval_loss": 1.524474859237671, |
|
"eval_runtime": 12.6393, |
|
"eval_samples_per_second": 26.979, |
|
"eval_steps_per_second": 3.402, |
|
"eval_wer": 0.7103321033210332, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 84.31, |
|
"learning_rate": 7.162390804597701e-05, |
|
"loss": 0.4516, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 84.31, |
|
"eval_loss": 1.5031787157058716, |
|
"eval_runtime": 12.6415, |
|
"eval_samples_per_second": 26.975, |
|
"eval_steps_per_second": 3.401, |
|
"eval_wer": 0.7103321033210332, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 86.27, |
|
"learning_rate": 6.27935632183908e-05, |
|
"loss": 0.4113, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 86.27, |
|
"eval_loss": 1.5246329307556152, |
|
"eval_runtime": 12.6508, |
|
"eval_samples_per_second": 26.955, |
|
"eval_steps_per_second": 3.399, |
|
"eval_wer": 0.7195571955719557, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 88.24, |
|
"learning_rate": 5.3874022988505745e-05, |
|
"loss": 0.3972, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 88.24, |
|
"eval_loss": 1.5318336486816406, |
|
"eval_runtime": 12.5704, |
|
"eval_samples_per_second": 27.127, |
|
"eval_steps_per_second": 3.421, |
|
"eval_wer": 0.7114391143911439, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 90.2, |
|
"learning_rate": 4.4954482758620694e-05, |
|
"loss": 0.4006, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 90.2, |
|
"eval_loss": 1.554287314414978, |
|
"eval_runtime": 12.5754, |
|
"eval_samples_per_second": 27.116, |
|
"eval_steps_per_second": 3.419, |
|
"eval_wer": 0.6981549815498155, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 92.16, |
|
"learning_rate": 3.603494252873563e-05, |
|
"loss": 0.4014, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 92.16, |
|
"eval_loss": 1.5442076921463013, |
|
"eval_runtime": 12.6146, |
|
"eval_samples_per_second": 27.032, |
|
"eval_steps_per_second": 3.409, |
|
"eval_wer": 0.7047970479704797, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 94.12, |
|
"learning_rate": 2.711540229885057e-05, |
|
"loss": 0.3672, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 94.12, |
|
"eval_loss": 1.5541517734527588, |
|
"eval_runtime": 12.6543, |
|
"eval_samples_per_second": 26.947, |
|
"eval_steps_per_second": 3.398, |
|
"eval_wer": 0.7136531365313653, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 96.08, |
|
"learning_rate": 1.8195862068965517e-05, |
|
"loss": 0.3666, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 96.08, |
|
"eval_loss": 1.5414179563522339, |
|
"eval_runtime": 12.5239, |
|
"eval_samples_per_second": 27.228, |
|
"eval_steps_per_second": 3.433, |
|
"eval_wer": 0.7018450184501845, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 98.04, |
|
"learning_rate": 9.27632183908046e-06, |
|
"loss": 0.3574, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 98.04, |
|
"eval_loss": 1.5465455055236816, |
|
"eval_runtime": 12.7119, |
|
"eval_samples_per_second": 26.825, |
|
"eval_steps_per_second": 3.383, |
|
"eval_wer": 0.7059040590405904, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 3.567816091954023e-07, |
|
"loss": 0.3428, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 1.5443140268325806, |
|
"eval_runtime": 12.8582, |
|
"eval_samples_per_second": 26.52, |
|
"eval_steps_per_second": 3.344, |
|
"eval_wer": 0.7029520295202952, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 5100, |
|
"total_flos": 9.838577578075728e+18, |
|
"train_loss": 1.1548647121354645, |
|
"train_runtime": 5602.3197, |
|
"train_samples_per_second": 14.458, |
|
"train_steps_per_second": 0.91 |
|
} |
|
], |
|
"max_steps": 5100, |
|
"num_train_epochs": 100, |
|
"total_flos": 9.838577578075728e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|