|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 99.98461538461538, |
|
"global_step": 3200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.9200000000000003e-06, |
|
"loss": 10.7305, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 3.920000000000001e-06, |
|
"loss": 3.0098, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 5.92e-06, |
|
"loss": 2.9327, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 12.49, |
|
"learning_rate": 7.92e-06, |
|
"loss": 2.8216, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 9.920000000000002e-06, |
|
"loss": 2.3731, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"eval_loss": 1.5517226457595825, |
|
"eval_runtime": 24.8327, |
|
"eval_samples_per_second": 20.497, |
|
"eval_steps_per_second": 1.289, |
|
"eval_wer": 0.9499121265377856, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 18.74, |
|
"learning_rate": 1.1920000000000001e-05, |
|
"loss": 1.9105, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 21.86, |
|
"learning_rate": 1.392e-05, |
|
"loss": 1.714, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 24.98, |
|
"learning_rate": 1.5920000000000003e-05, |
|
"loss": 1.5476, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 28.12, |
|
"learning_rate": 1.792e-05, |
|
"loss": 1.4238, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"learning_rate": 1.9920000000000002e-05, |
|
"loss": 1.3312, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"eval_loss": 0.8717297911643982, |
|
"eval_runtime": 24.7966, |
|
"eval_samples_per_second": 20.527, |
|
"eval_steps_per_second": 1.29, |
|
"eval_wer": 0.6189220855301699, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 34.37, |
|
"learning_rate": 1.912727272727273e-05, |
|
"loss": 1.2049, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 37.49, |
|
"learning_rate": 1.821818181818182e-05, |
|
"loss": 1.1346, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 40.62, |
|
"learning_rate": 1.730909090909091e-05, |
|
"loss": 1.0533, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 43.74, |
|
"learning_rate": 1.64e-05, |
|
"loss": 0.9638, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 46.86, |
|
"learning_rate": 1.549090909090909e-05, |
|
"loss": 0.9135, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 46.86, |
|
"eval_loss": 0.8298946619033813, |
|
"eval_runtime": 24.721, |
|
"eval_samples_per_second": 20.59, |
|
"eval_steps_per_second": 1.294, |
|
"eval_wer": 0.5310486233157586, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 49.98, |
|
"learning_rate": 1.4581818181818184e-05, |
|
"loss": 0.8568, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 53.12, |
|
"learning_rate": 1.3672727272727273e-05, |
|
"loss": 0.8141, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 56.25, |
|
"learning_rate": 1.2763636363636365e-05, |
|
"loss": 0.7526, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 59.37, |
|
"learning_rate": 1.1854545454545457e-05, |
|
"loss": 0.7177, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 62.49, |
|
"learning_rate": 1.0945454545454545e-05, |
|
"loss": 0.6719, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 62.49, |
|
"eval_loss": 0.8842366933822632, |
|
"eval_runtime": 25.0435, |
|
"eval_samples_per_second": 20.325, |
|
"eval_steps_per_second": 1.278, |
|
"eval_wer": 0.5043936731107206, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 65.62, |
|
"learning_rate": 1.0036363636363637e-05, |
|
"loss": 0.6552, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 68.74, |
|
"learning_rate": 9.127272727272727e-06, |
|
"loss": 0.6145, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 71.86, |
|
"learning_rate": 8.21818181818182e-06, |
|
"loss": 0.596, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 74.98, |
|
"learning_rate": 7.30909090909091e-06, |
|
"loss": 0.5719, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 78.12, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.5583, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 78.12, |
|
"eval_loss": 0.9093144536018372, |
|
"eval_runtime": 24.6074, |
|
"eval_samples_per_second": 20.685, |
|
"eval_steps_per_second": 1.3, |
|
"eval_wer": 0.4800820152314001, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 81.25, |
|
"learning_rate": 5.490909090909091e-06, |
|
"loss": 0.5417, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 84.37, |
|
"learning_rate": 4.581818181818183e-06, |
|
"loss": 0.5241, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 87.49, |
|
"learning_rate": 3.672727272727273e-06, |
|
"loss": 0.4901, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 90.62, |
|
"learning_rate": 2.763636363636364e-06, |
|
"loss": 0.4882, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 93.74, |
|
"learning_rate": 1.8545454545454546e-06, |
|
"loss": 0.4728, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 93.74, |
|
"eval_loss": 0.9488239884376526, |
|
"eval_runtime": 24.6884, |
|
"eval_samples_per_second": 20.617, |
|
"eval_steps_per_second": 1.296, |
|
"eval_wer": 0.48125366139425896, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 96.86, |
|
"learning_rate": 9.454545454545455e-07, |
|
"loss": 0.4682, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 99.98, |
|
"learning_rate": 3.636363636363637e-08, |
|
"loss": 0.4634, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 99.98, |
|
"step": 3200, |
|
"total_flos": 3.840629068156852e+19, |
|
"train_loss": 1.4163260304927825, |
|
"train_runtime": 8387.3816, |
|
"train_samples_per_second": 12.34, |
|
"train_steps_per_second": 0.382 |
|
} |
|
], |
|
"max_steps": 3200, |
|
"num_train_epochs": 100, |
|
"total_flos": 3.840629068156852e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|