|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 137.09493670886076, |
|
"global_step": 4250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 3.5e-05, |
|
"loss": 7.2926, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.8496713638305664, |
|
"eval_runtime": 271.7106, |
|
"eval_samples_per_second": 26.745, |
|
"eval_steps_per_second": 3.345, |
|
"eval_wer": 1.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"learning_rate": 7e-05, |
|
"loss": 3.417, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"eval_cer": 0.9856759584948347, |
|
"eval_loss": 3.285226583480835, |
|
"eval_runtime": 275.8724, |
|
"eval_samples_per_second": 26.342, |
|
"eval_steps_per_second": 3.295, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 24.19, |
|
"learning_rate": 6.578313253012048e-05, |
|
"loss": 2.0264, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 24.19, |
|
"eval_cer": 0.176804441300443, |
|
"eval_loss": 0.7098603248596191, |
|
"eval_runtime": 269.2535, |
|
"eval_samples_per_second": 26.989, |
|
"eval_steps_per_second": 3.376, |
|
"eval_wer": 0.7342138090806487, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 32.25, |
|
"learning_rate": 6.156626506024095e-05, |
|
"loss": 0.4018, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 32.25, |
|
"eval_cer": 0.15511300475620768, |
|
"eval_loss": 0.6187673211097717, |
|
"eval_runtime": 271.6934, |
|
"eval_samples_per_second": 26.747, |
|
"eval_steps_per_second": 3.346, |
|
"eval_wer": 0.6415460467694989, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"learning_rate": 5.734939759036144e-05, |
|
"loss": 0.2444, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"eval_cer": 0.15995397008055237, |
|
"eval_loss": 0.6631603837013245, |
|
"eval_runtime": 276.2198, |
|
"eval_samples_per_second": 26.309, |
|
"eval_steps_per_second": 3.291, |
|
"eval_wer": 0.6361800289091737, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 48.38, |
|
"learning_rate": 5.313253012048192e-05, |
|
"loss": 0.1882, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 48.38, |
|
"eval_cer": 0.13876129966064343, |
|
"eval_loss": 0.6070172190666199, |
|
"eval_runtime": 272.4136, |
|
"eval_samples_per_second": 26.676, |
|
"eval_steps_per_second": 3.337, |
|
"eval_wer": 0.578262677464705, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 56.44, |
|
"learning_rate": 4.891566265060241e-05, |
|
"loss": 0.153, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 56.44, |
|
"eval_cer": 0.13767574986063888, |
|
"eval_loss": 0.6425250172615051, |
|
"eval_runtime": 272.6101, |
|
"eval_samples_per_second": 26.657, |
|
"eval_steps_per_second": 3.334, |
|
"eval_wer": 0.5720056234283112, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 64.51, |
|
"learning_rate": 4.469879518072288e-05, |
|
"loss": 0.1214, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 64.51, |
|
"eval_cer": 0.1337117001405021, |
|
"eval_loss": 0.6362873315811157, |
|
"eval_runtime": 271.8138, |
|
"eval_samples_per_second": 26.735, |
|
"eval_steps_per_second": 3.344, |
|
"eval_wer": 0.5546007167891016, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 72.57, |
|
"learning_rate": 4.048192771084337e-05, |
|
"loss": 0.1011, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 72.57, |
|
"eval_cer": 0.12238677519991394, |
|
"eval_loss": 0.6309632658958435, |
|
"eval_runtime": 276.7543, |
|
"eval_samples_per_second": 26.258, |
|
"eval_steps_per_second": 3.285, |
|
"eval_wer": 0.5221669999801992, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 80.63, |
|
"learning_rate": 3.6265060240963855e-05, |
|
"loss": 0.0879, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 80.63, |
|
"eval_cer": 0.12531743366899534, |
|
"eval_loss": 0.6352854371070862, |
|
"eval_runtime": 270.8217, |
|
"eval_samples_per_second": 26.833, |
|
"eval_steps_per_second": 3.356, |
|
"eval_wer": 0.5258301487040374, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 88.7, |
|
"learning_rate": 3.2048192771084335e-05, |
|
"loss": 0.0782, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 88.7, |
|
"eval_cer": 0.11265268600227542, |
|
"eval_loss": 0.607792854309082, |
|
"eval_runtime": 270.7843, |
|
"eval_samples_per_second": 26.837, |
|
"eval_steps_per_second": 3.357, |
|
"eval_wer": 0.4904263113082391, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 96.76, |
|
"learning_rate": 2.783132530120482e-05, |
|
"loss": 0.0709, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 96.76, |
|
"eval_cer": 0.11539100982210675, |
|
"eval_loss": 0.6464908123016357, |
|
"eval_runtime": 272.9311, |
|
"eval_samples_per_second": 26.626, |
|
"eval_steps_per_second": 3.331, |
|
"eval_wer": 0.49601013801160326, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 104.82, |
|
"learning_rate": 2.36144578313253e-05, |
|
"loss": 0.0661, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 104.82, |
|
"eval_cer": 0.11656783708277235, |
|
"eval_loss": 0.6621575951576233, |
|
"eval_runtime": 270.3992, |
|
"eval_samples_per_second": 26.875, |
|
"eval_steps_per_second": 3.362, |
|
"eval_wer": 0.494544878522068, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 112.89, |
|
"learning_rate": 1.9397590361445782e-05, |
|
"loss": 0.0616, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 112.89, |
|
"eval_cer": 0.11035119002989337, |
|
"eval_loss": 0.6440250277519226, |
|
"eval_runtime": 271.0109, |
|
"eval_samples_per_second": 26.814, |
|
"eval_steps_per_second": 3.354, |
|
"eval_wer": 0.47860523137239375, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 120.95, |
|
"learning_rate": 1.5180722891566264e-05, |
|
"loss": 0.0579, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 120.95, |
|
"eval_cer": 0.11444237621309375, |
|
"eval_loss": 0.6815317273139954, |
|
"eval_runtime": 269.1998, |
|
"eval_samples_per_second": 26.995, |
|
"eval_steps_per_second": 3.377, |
|
"eval_wer": 0.4887432429756648, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 129.03, |
|
"learning_rate": 1.0963855421686746e-05, |
|
"loss": 0.0549, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 129.03, |
|
"eval_cer": 0.11051744540466885, |
|
"eval_loss": 0.6602992415428162, |
|
"eval_runtime": 274.1737, |
|
"eval_samples_per_second": 26.505, |
|
"eval_steps_per_second": 3.315, |
|
"eval_wer": 0.47799140645110194, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 137.09, |
|
"learning_rate": 6.746987951807228e-06, |
|
"loss": 0.0527, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 137.09, |
|
"eval_cer": 0.10900158757583364, |
|
"eval_loss": 0.6652226448059082, |
|
"eval_runtime": 271.842, |
|
"eval_samples_per_second": 26.732, |
|
"eval_steps_per_second": 3.344, |
|
"eval_wer": 0.47486287943290495, |
|
"step": 4250 |
|
} |
|
], |
|
"max_steps": 4650, |
|
"num_train_epochs": 150, |
|
"total_flos": 3.524250563411945e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|