|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.909466234149218, |
|
"global_step": 6700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"eval_cer": 0.2550782058484374, |
|
"eval_loss": 0.9434235095977783, |
|
"eval_runtime": 65.8936, |
|
"eval_samples_per_second": 7.588, |
|
"eval_steps_per_second": 0.956, |
|
"eval_wer": 0.8986276613768566, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 3.5921, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_cer": 0.05617811418940895, |
|
"eval_loss": 0.2126482129096985, |
|
"eval_runtime": 65.2511, |
|
"eval_samples_per_second": 7.663, |
|
"eval_steps_per_second": 0.966, |
|
"eval_wer": 0.20076805783023663, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00028565965583173995, |
|
"loss": 0.3347, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_cer": 0.052689157623961445, |
|
"eval_loss": 0.19026830792427063, |
|
"eval_runtime": 66.2853, |
|
"eval_samples_per_second": 7.543, |
|
"eval_steps_per_second": 0.95, |
|
"eval_wer": 0.18907776585531147, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0002665391969407266, |
|
"loss": 0.1948, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_cer": 0.04130569764347595, |
|
"eval_loss": 0.1462544947862625, |
|
"eval_runtime": 65.903, |
|
"eval_samples_per_second": 7.587, |
|
"eval_steps_per_second": 0.956, |
|
"eval_wer": 0.13915400688993054, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00024741873804971315, |
|
"loss": 0.1737, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_cer": 0.040093433074125544, |
|
"eval_loss": 0.1409013271331787, |
|
"eval_runtime": 66.3053, |
|
"eval_samples_per_second": 7.541, |
|
"eval_steps_per_second": 0.95, |
|
"eval_wer": 0.13599141582425028, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00022829827915869978, |
|
"loss": 0.1466, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_cer": 0.03887131269526822, |
|
"eval_loss": 0.1429334431886673, |
|
"eval_runtime": 65.9993, |
|
"eval_samples_per_second": 7.576, |
|
"eval_steps_per_second": 0.955, |
|
"eval_wer": 0.13401479640820016, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_cer": 0.03858549421956772, |
|
"eval_loss": 0.1421019285917282, |
|
"eval_runtime": 66.1016, |
|
"eval_samples_per_second": 7.564, |
|
"eval_steps_per_second": 0.953, |
|
"eval_wer": 0.12678601682950247, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.0002091778202676864, |
|
"loss": 0.1378, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"eval_cer": 0.03703813212698225, |
|
"eval_loss": 0.1354704052209854, |
|
"eval_runtime": 65.9839, |
|
"eval_samples_per_second": 7.578, |
|
"eval_steps_per_second": 0.955, |
|
"eval_wer": 0.12305867735923647, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.00019005736137667304, |
|
"loss": 0.1217, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_cer": 0.034948700511516516, |
|
"eval_loss": 0.13004331290721893, |
|
"eval_runtime": 66.7534, |
|
"eval_samples_per_second": 7.49, |
|
"eval_steps_per_second": 0.944, |
|
"eval_wer": 0.11176370926752131, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.00017093690248565967, |
|
"loss": 0.1121, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_cer": 0.03565831879601431, |
|
"eval_loss": 0.1295933872461319, |
|
"eval_runtime": 67.056, |
|
"eval_samples_per_second": 7.456, |
|
"eval_steps_per_second": 0.94, |
|
"eval_wer": 0.11729824363246173, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.00015181644359464624, |
|
"loss": 0.1038, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_cer": 0.034948700511516516, |
|
"eval_loss": 0.13280533254146576, |
|
"eval_runtime": 66.5598, |
|
"eval_samples_per_second": 7.512, |
|
"eval_steps_per_second": 0.947, |
|
"eval_wer": 0.11080363697972553, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.00013269598470363287, |
|
"loss": 0.0941, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_cer": 0.035214807368203184, |
|
"eval_loss": 0.12650151550769806, |
|
"eval_runtime": 66.4767, |
|
"eval_samples_per_second": 7.521, |
|
"eval_steps_per_second": 0.948, |
|
"eval_wer": 0.1124978821934828, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"eval_cer": 0.034012398608359695, |
|
"eval_loss": 0.1327013224363327, |
|
"eval_runtime": 66.3907, |
|
"eval_samples_per_second": 7.531, |
|
"eval_steps_per_second": 0.949, |
|
"eval_wer": 0.10718924719037669, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.0001135755258126195, |
|
"loss": 0.0862, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"eval_cer": 0.0352246631777101, |
|
"eval_loss": 0.1414576768875122, |
|
"eval_runtime": 66.6897, |
|
"eval_samples_per_second": 7.497, |
|
"eval_steps_per_second": 0.945, |
|
"eval_wer": 0.11294968091715141, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 9.44550669216061e-05, |
|
"loss": 0.0748, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_cer": 0.03466288203581601, |
|
"eval_loss": 0.13079801201820374, |
|
"eval_runtime": 66.6475, |
|
"eval_samples_per_second": 7.502, |
|
"eval_steps_per_second": 0.945, |
|
"eval_wer": 0.1101824137346812, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 7.533460803059272e-05, |
|
"loss": 0.0727, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_cer": 0.03377585918019377, |
|
"eval_loss": 0.1386057287454605, |
|
"eval_runtime": 66.5005, |
|
"eval_samples_per_second": 7.519, |
|
"eval_steps_per_second": 0.947, |
|
"eval_wer": 0.10470435421019936, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 5.621414913957934e-05, |
|
"loss": 0.0622, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"eval_cer": 0.0333717709904103, |
|
"eval_loss": 0.14488892257213593, |
|
"eval_runtime": 66.3875, |
|
"eval_samples_per_second": 7.532, |
|
"eval_steps_per_second": 0.949, |
|
"eval_wer": 0.10509967809340938, |
|
"step": 5695 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 3.7093690248565965e-05, |
|
"loss": 0.0582, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"eval_cer": 0.03320422222879276, |
|
"eval_loss": 0.14533209800720215, |
|
"eval_runtime": 66.3474, |
|
"eval_samples_per_second": 7.536, |
|
"eval_steps_per_second": 0.95, |
|
"eval_wer": 0.10493025357203366, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"eval_cer": 0.03320422222879276, |
|
"eval_loss": 0.14391696453094482, |
|
"eval_runtime": 66.4699, |
|
"eval_samples_per_second": 7.522, |
|
"eval_steps_per_second": 0.948, |
|
"eval_wer": 0.10515615293386796, |
|
"step": 6365 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 1.8021032504780113e-05, |
|
"loss": 0.0493, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"eval_cer": 0.03264244108689867, |
|
"eval_loss": 0.14469173550605774, |
|
"eval_runtime": 65.8617, |
|
"eval_samples_per_second": 7.592, |
|
"eval_steps_per_second": 0.957, |
|
"eval_wer": 0.10267125995369063, |
|
"step": 6700 |
|
} |
|
], |
|
"max_steps": 6776, |
|
"num_train_epochs": 8, |
|
"total_flos": 2.1224029808181802e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|