|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.997824510514866, |
|
"global_step": 3440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5e-05, |
|
"loss": 7.294, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001, |
|
"loss": 3.2282, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00015, |
|
"loss": 3.0143, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0002, |
|
"loss": 2.9501, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00025, |
|
"loss": 2.6683, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 1.7697819471359253, |
|
"eval_runtime": 250.7581, |
|
"eval_samples_per_second": 18.424, |
|
"eval_steps_per_second": 4.606, |
|
"eval_wer": 1.0040690203756961, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0003, |
|
"loss": 2.1972, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00035, |
|
"loss": 2.0775, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0004, |
|
"loss": 1.9871, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.00045000000000000004, |
|
"loss": 1.9768, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0005, |
|
"loss": 1.9548, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 1.0889918804168701, |
|
"eval_runtime": 243.6531, |
|
"eval_samples_per_second": 18.961, |
|
"eval_steps_per_second": 4.74, |
|
"eval_wer": 0.8601847885945053, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.00055, |
|
"loss": 1.9588, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.0006, |
|
"loss": 1.9734, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.0006495, |
|
"loss": 1.9725, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.0006995, |
|
"loss": 1.9717, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.0007495000000000001, |
|
"loss": 1.9568, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"eval_loss": 1.0877875089645386, |
|
"eval_runtime": 239.5705, |
|
"eval_samples_per_second": 19.285, |
|
"eval_steps_per_second": 4.821, |
|
"eval_wer": 0.868016887964266, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 0.0007995, |
|
"loss": 1.9528, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 0.0008495000000000001, |
|
"loss": 1.9862, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 0.0008995, |
|
"loss": 1.9592, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 0.0009495, |
|
"loss": 1.938, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 0.0009995000000000002, |
|
"loss": 1.9497, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"eval_loss": 1.1500531435012817, |
|
"eval_runtime": 242.7035, |
|
"eval_samples_per_second": 19.036, |
|
"eval_steps_per_second": 4.759, |
|
"eval_wer": 0.8837728691182769, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.00093125, |
|
"loss": 1.9326, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 0.0008618055555555557, |
|
"loss": 1.8927, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 0.0007923611111111111, |
|
"loss": 1.8929, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 0.0007229166666666666, |
|
"loss": 1.8684, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 0.0006534722222222223, |
|
"loss": 1.8453, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"eval_loss": 1.0452075004577637, |
|
"eval_runtime": 247.1245, |
|
"eval_samples_per_second": 18.695, |
|
"eval_steps_per_second": 4.674, |
|
"eval_wer": 0.8417977115584654, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 0.0005840277777777778, |
|
"loss": 1.8081, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 0.0005145833333333333, |
|
"loss": 1.78, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 0.00044513888888888885, |
|
"loss": 1.7618, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 0.00037569444444444445, |
|
"loss": 1.7155, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 0.00030625000000000004, |
|
"loss": 1.6952, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"eval_loss": 0.9152895212173462, |
|
"eval_runtime": 242.409, |
|
"eval_samples_per_second": 19.059, |
|
"eval_steps_per_second": 4.765, |
|
"eval_wer": 0.7822615186930184, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 0.00023680555555555556, |
|
"loss": 1.6874, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 0.0001673611111111111, |
|
"loss": 1.6143, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 9.791666666666667e-05, |
|
"loss": 1.5948, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 2.9166666666666666e-05, |
|
"loss": 1.5723, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 3440, |
|
"total_flos": 1.01153307245466e+19, |
|
"train_loss": 2.1586562755496, |
|
"train_runtime": 12185.0109, |
|
"train_samples_per_second": 9.052, |
|
"train_steps_per_second": 0.282 |
|
} |
|
], |
|
"max_steps": 3440, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.01153307245466e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|