|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 89.98360655737704, |
|
"global_step": 2700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.485e-05, |
|
"loss": 11.2315, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 2.985e-05, |
|
"loss": 4.0404, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 4.484999999999999e-05, |
|
"loss": 3.056, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 5.985e-05, |
|
"loss": 2.8167, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 16.66, |
|
"learning_rate": 7.484999999999999e-05, |
|
"loss": 2.721, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 16.66, |
|
"eval_loss": 2.6876425743103027, |
|
"eval_runtime": 32.088, |
|
"eval_samples_per_second": 27.799, |
|
"eval_steps_per_second": 3.49, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 19.98, |
|
"learning_rate": 8.984999999999999e-05, |
|
"loss": 2.6493, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 23.33, |
|
"learning_rate": 0.00010484999999999999, |
|
"loss": 1.9825, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 26.66, |
|
"learning_rate": 0.00011985, |
|
"loss": 1.4345, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 29.98, |
|
"learning_rate": 0.00013485, |
|
"loss": 1.3429, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 0.00014954999999999998, |
|
"loss": 1.2944, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_loss": 0.6482492089271545, |
|
"eval_runtime": 32.0457, |
|
"eval_samples_per_second": 27.835, |
|
"eval_steps_per_second": 3.495, |
|
"eval_wer": 0.7135025966532026, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 36.66, |
|
"learning_rate": 0.00016455, |
|
"loss": 1.2347, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 39.98, |
|
"learning_rate": 0.00017955, |
|
"loss": 1.1858, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 43.33, |
|
"learning_rate": 0.00019454999999999999, |
|
"loss": 1.1475, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 46.66, |
|
"learning_rate": 0.00020955, |
|
"loss": 1.0875, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 49.98, |
|
"learning_rate": 0.00022455, |
|
"loss": 1.0515, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 49.98, |
|
"eval_loss": 0.6644838452339172, |
|
"eval_runtime": 31.726, |
|
"eval_samples_per_second": 28.116, |
|
"eval_steps_per_second": 3.53, |
|
"eval_wer": 0.6754183496826313, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 53.33, |
|
"learning_rate": 0.00023954999999999997, |
|
"loss": 1.0217, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 56.66, |
|
"learning_rate": 0.00025455, |
|
"loss": 0.9991, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 59.98, |
|
"learning_rate": 0.00026954999999999997, |
|
"loss": 0.9918, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 63.33, |
|
"learning_rate": 0.00028455, |
|
"loss": 0.9552, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 66.66, |
|
"learning_rate": 0.00029955, |
|
"loss": 0.9153, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 66.66, |
|
"eval_loss": 0.7648739814758301, |
|
"eval_runtime": 31.6976, |
|
"eval_samples_per_second": 28.141, |
|
"eval_steps_per_second": 3.533, |
|
"eval_wer": 0.693594922100404, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 69.98, |
|
"learning_rate": 0.0002584285714285714, |
|
"loss": 0.8827, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 73.33, |
|
"learning_rate": 0.00021557142857142855, |
|
"loss": 0.8474, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 76.66, |
|
"learning_rate": 0.0001727142857142857, |
|
"loss": 0.8016, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 79.98, |
|
"learning_rate": 0.00012985714285714285, |
|
"loss": 0.7467, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"learning_rate": 8.699999999999999e-05, |
|
"loss": 0.7056, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"eval_loss": 0.8047966361045837, |
|
"eval_runtime": 31.4528, |
|
"eval_samples_per_second": 28.36, |
|
"eval_steps_per_second": 3.561, |
|
"eval_wer": 0.6755626081938835, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 86.66, |
|
"learning_rate": 4.414285714285714e-05, |
|
"loss": 0.6637, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 89.98, |
|
"learning_rate": 1.2857142857142856e-06, |
|
"loss": 0.6249, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 89.98, |
|
"step": 2700, |
|
"total_flos": 2.311805909026184e+19, |
|
"train_loss": 1.793763725845902, |
|
"train_runtime": 8224.5964, |
|
"train_samples_per_second": 21.24, |
|
"train_steps_per_second": 0.328 |
|
} |
|
], |
|
"max_steps": 2700, |
|
"num_train_epochs": 90, |
|
"total_flos": 2.311805909026184e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|