|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 49.994219653179194, |
|
"global_step": 2150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 3.7125e-06, |
|
"loss": 14.869, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 7.4625e-06, |
|
"loss": 6.701, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 1.1212499999999998e-05, |
|
"loss": 4.3877, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 1.49625e-05, |
|
"loss": 3.788, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"learning_rate": 1.8712499999999997e-05, |
|
"loss": 3.3873, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"eval_loss": 3.3324315547943115, |
|
"eval_runtime": 36.9219, |
|
"eval_samples_per_second": 125.129, |
|
"eval_steps_per_second": 1.977, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"learning_rate": 2.2462499999999997e-05, |
|
"loss": 3.1689, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 16.28, |
|
"learning_rate": 2.6212499999999997e-05, |
|
"loss": 3.0785, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"learning_rate": 2.99625e-05, |
|
"loss": 2.9982, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 20.92, |
|
"learning_rate": 3.37125e-05, |
|
"loss": 2.9572, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 23.25, |
|
"learning_rate": 3.7462499999999996e-05, |
|
"loss": 2.9207, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 23.25, |
|
"eval_loss": 2.8923747539520264, |
|
"eval_runtime": 36.3858, |
|
"eval_samples_per_second": 126.972, |
|
"eval_steps_per_second": 2.006, |
|
"eval_wer": 0.9987150461971487, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 25.58, |
|
"learning_rate": 4.12125e-05, |
|
"loss": 2.8705, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 27.9, |
|
"learning_rate": 4.4962499999999995e-05, |
|
"loss": 2.8024, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 30.23, |
|
"learning_rate": 4.8675e-05, |
|
"loss": 2.6545, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 32.55, |
|
"learning_rate": 5.2424999999999994e-05, |
|
"loss": 2.4772, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 34.88, |
|
"learning_rate": 5.61375e-05, |
|
"loss": 2.1463, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 34.88, |
|
"eval_loss": 1.3062961101531982, |
|
"eval_runtime": 36.8007, |
|
"eval_samples_per_second": 125.541, |
|
"eval_steps_per_second": 1.984, |
|
"eval_wer": 0.9138775010707948, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 37.21, |
|
"learning_rate": 5.988749999999999e-05, |
|
"loss": 1.911, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 39.53, |
|
"learning_rate": 6.36375e-05, |
|
"loss": 1.7907, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 41.86, |
|
"learning_rate": 6.738749999999999e-05, |
|
"loss": 1.7065, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 44.18, |
|
"learning_rate": 7.11375e-05, |
|
"loss": 1.6557, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 46.51, |
|
"learning_rate": 7.48875e-05, |
|
"loss": 1.607, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 46.51, |
|
"eval_loss": 0.7204738855361938, |
|
"eval_runtime": 36.377, |
|
"eval_samples_per_second": 127.003, |
|
"eval_steps_per_second": 2.007, |
|
"eval_wer": 0.6856146362356973, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 48.83, |
|
"learning_rate": 2.6499999999999997e-05, |
|
"loss": 1.5509, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 49.99, |
|
"step": 2150, |
|
"total_flos": 6.648469351402203e+19, |
|
"train_loss": 3.356673427847929, |
|
"train_runtime": 4577.6572, |
|
"train_samples_per_second": 120.476, |
|
"train_steps_per_second": 0.47 |
|
} |
|
], |
|
"max_steps": 2150, |
|
"num_train_epochs": 50, |
|
"total_flos": 6.648469351402203e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|