|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 8200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 2.0859999999999997e-05, |
|
"loss": 6.9294, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_loss": 2.971196413040161, |
|
"eval_runtime": 36.7267, |
|
"eval_samples_per_second": 32.483, |
|
"eval_steps_per_second": 1.035, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 4.1859999999999996e-05, |
|
"loss": 3.05, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 6.285999999999999e-05, |
|
"loss": 2.8305, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"eval_loss": 1.707324504852295, |
|
"eval_runtime": 37.8146, |
|
"eval_samples_per_second": 31.549, |
|
"eval_steps_per_second": 1.005, |
|
"eval_wer": 0.9478846435368175, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 6.8075e-05, |
|
"loss": 2.0414, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 18.29, |
|
"learning_rate": 6.515833333333332e-05, |
|
"loss": 1.4795, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 18.29, |
|
"eval_loss": 0.5756120681762695, |
|
"eval_runtime": 37.4871, |
|
"eval_samples_per_second": 31.824, |
|
"eval_steps_per_second": 1.014, |
|
"eval_wer": 0.6397306397306397, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 21.95, |
|
"learning_rate": 6.224166666666666e-05, |
|
"loss": 1.3433, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 24.39, |
|
"eval_loss": 0.4967685639858246, |
|
"eval_runtime": 37.9583, |
|
"eval_samples_per_second": 31.429, |
|
"eval_steps_per_second": 1.001, |
|
"eval_wer": 0.5423803249890207, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 25.61, |
|
"learning_rate": 5.9325e-05, |
|
"loss": 1.2567, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 29.27, |
|
"learning_rate": 5.6408333333333327e-05, |
|
"loss": 1.1766, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 30.49, |
|
"eval_loss": 0.4184603989124298, |
|
"eval_runtime": 37.222, |
|
"eval_samples_per_second": 32.051, |
|
"eval_steps_per_second": 1.021, |
|
"eval_wer": 0.4743083003952569, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 32.93, |
|
"learning_rate": 5.349166666666666e-05, |
|
"loss": 1.0943, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 36.59, |
|
"learning_rate": 5.0574999999999996e-05, |
|
"loss": 1.0017, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 36.59, |
|
"eval_loss": 0.3302731513977051, |
|
"eval_runtime": 36.8764, |
|
"eval_samples_per_second": 32.351, |
|
"eval_steps_per_second": 1.03, |
|
"eval_wer": 0.35778070560679254, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 40.24, |
|
"learning_rate": 4.7658333333333324e-05, |
|
"loss": 0.9358, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 42.68, |
|
"eval_loss": 0.30026641488075256, |
|
"eval_runtime": 37.9381, |
|
"eval_samples_per_second": 31.446, |
|
"eval_steps_per_second": 1.002, |
|
"eval_wer": 0.3050797833406529, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 43.9, |
|
"learning_rate": 4.4741666666666665e-05, |
|
"loss": 0.8798, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 47.56, |
|
"learning_rate": 4.183472222222222e-05, |
|
"loss": 0.8358, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 48.78, |
|
"eval_loss": 0.30453845858573914, |
|
"eval_runtime": 36.3987, |
|
"eval_samples_per_second": 32.776, |
|
"eval_steps_per_second": 1.044, |
|
"eval_wer": 0.2883911579563753, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 51.22, |
|
"learning_rate": 3.8918055555555554e-05, |
|
"loss": 0.8059, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 54.88, |
|
"learning_rate": 3.600138888888889e-05, |
|
"loss": 0.7647, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 54.88, |
|
"eval_loss": 0.2865545451641083, |
|
"eval_runtime": 36.6315, |
|
"eval_samples_per_second": 32.568, |
|
"eval_steps_per_second": 1.037, |
|
"eval_wer": 0.2677499634021373, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 58.54, |
|
"learning_rate": 3.3084722222222216e-05, |
|
"loss": 0.7482, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 60.98, |
|
"eval_loss": 0.2829342782497406, |
|
"eval_runtime": 36.4673, |
|
"eval_samples_per_second": 32.714, |
|
"eval_steps_per_second": 1.042, |
|
"eval_wer": 0.2585273020055629, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 62.2, |
|
"learning_rate": 3.016805555555555e-05, |
|
"loss": 0.7223, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 65.85, |
|
"learning_rate": 2.725138888888889e-05, |
|
"loss": 0.6943, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 67.07, |
|
"eval_loss": 0.2782347798347473, |
|
"eval_runtime": 37.212, |
|
"eval_samples_per_second": 32.06, |
|
"eval_steps_per_second": 1.021, |
|
"eval_wer": 0.24784072610159566, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 69.51, |
|
"learning_rate": 2.433472222222222e-05, |
|
"loss": 0.6758, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 73.17, |
|
"learning_rate": 2.1418055555555552e-05, |
|
"loss": 0.6586, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 73.17, |
|
"eval_loss": 0.2911244034767151, |
|
"eval_runtime": 36.7087, |
|
"eval_samples_per_second": 32.499, |
|
"eval_steps_per_second": 1.035, |
|
"eval_wer": 0.25369638413116674, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 76.83, |
|
"learning_rate": 1.850138888888889e-05, |
|
"loss": 0.6425, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 79.27, |
|
"eval_loss": 0.2817089259624481, |
|
"eval_runtime": 36.6478, |
|
"eval_samples_per_second": 32.553, |
|
"eval_steps_per_second": 1.037, |
|
"eval_wer": 0.24623042014346364, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 80.49, |
|
"learning_rate": 1.558472222222222e-05, |
|
"loss": 0.6313, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 84.15, |
|
"learning_rate": 1.2677777777777776e-05, |
|
"loss": 0.6067, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 85.37, |
|
"eval_loss": 0.29103317856788635, |
|
"eval_runtime": 37.7988, |
|
"eval_samples_per_second": 31.562, |
|
"eval_steps_per_second": 1.005, |
|
"eval_wer": 0.24359537403015663, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 87.8, |
|
"learning_rate": 9.761111111111111e-06, |
|
"loss": 0.6064, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 91.46, |
|
"learning_rate": 6.844444444444444e-06, |
|
"loss": 0.5974, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 91.46, |
|
"eval_loss": 0.28752732276916504, |
|
"eval_runtime": 38.6504, |
|
"eval_samples_per_second": 30.866, |
|
"eval_steps_per_second": 0.983, |
|
"eval_wer": 0.24300980822719953, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 95.12, |
|
"learning_rate": 3.927777777777777e-06, |
|
"loss": 0.5812, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 97.56, |
|
"eval_loss": 0.2852196991443634, |
|
"eval_runtime": 38.817, |
|
"eval_samples_per_second": 30.734, |
|
"eval_steps_per_second": 0.979, |
|
"eval_wer": 0.23964280486019615, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 98.78, |
|
"learning_rate": 1.011111111111111e-06, |
|
"loss": 0.5804, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 8200, |
|
"total_flos": 2.6094292924080824e+19, |
|
"train_loss": 1.2572689837944218, |
|
"train_runtime": 11083.5384, |
|
"train_samples_per_second": 23.512, |
|
"train_steps_per_second": 0.74 |
|
} |
|
], |
|
"max_steps": 8200, |
|
"num_train_epochs": 100, |
|
"total_flos": 2.6094292924080824e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|