|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.9984, |
|
"global_step": 6240, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.4537074565887451, |
|
"eval_mse": 0.45370742678642273, |
|
"eval_runtime": 6.2981, |
|
"eval_samples_per_second": 158.779, |
|
"eval_steps_per_second": 19.847, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.3457813907586236e-05, |
|
"loss": 0.6153, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.19009532034397125, |
|
"eval_mse": 0.19009532034397125, |
|
"eval_runtime": 6.3224, |
|
"eval_samples_per_second": 158.167, |
|
"eval_steps_per_second": 19.771, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.2025919258594513, |
|
"eval_mse": 0.2025919258594513, |
|
"eval_runtime": 6.3503, |
|
"eval_samples_per_second": 157.472, |
|
"eval_steps_per_second": 19.684, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.2285530466158863e-05, |
|
"loss": 0.1584, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.2967708110809326, |
|
"eval_mse": 0.2967708110809326, |
|
"eval_runtime": 6.3822, |
|
"eval_samples_per_second": 156.686, |
|
"eval_steps_per_second": 19.586, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.1113247024731491e-05, |
|
"loss": 0.0958, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.2692314684391022, |
|
"eval_mse": 0.2692314386367798, |
|
"eval_runtime": 6.3165, |
|
"eval_samples_per_second": 158.314, |
|
"eval_steps_per_second": 19.789, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.2646154761314392, |
|
"eval_mse": 0.2646154761314392, |
|
"eval_runtime": 6.321, |
|
"eval_samples_per_second": 158.204, |
|
"eval_steps_per_second": 19.775, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 9.940963583304119e-06, |
|
"loss": 0.0605, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.3655566871166229, |
|
"eval_mse": 0.3655566871166229, |
|
"eval_runtime": 6.4015, |
|
"eval_samples_per_second": 156.213, |
|
"eval_steps_per_second": 19.527, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.3432270586490631, |
|
"eval_mse": 0.3432270586490631, |
|
"eval_runtime": 6.4193, |
|
"eval_samples_per_second": 155.78, |
|
"eval_steps_per_second": 19.473, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 8.768680141876746e-06, |
|
"loss": 0.0448, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.29013848304748535, |
|
"eval_mse": 0.29013848304748535, |
|
"eval_runtime": 6.369, |
|
"eval_samples_per_second": 157.01, |
|
"eval_steps_per_second": 19.626, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 7.596396700449374e-06, |
|
"loss": 0.0328, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.23616179823875427, |
|
"eval_mse": 0.23616181313991547, |
|
"eval_runtime": 6.3278, |
|
"eval_samples_per_second": 158.032, |
|
"eval_steps_per_second": 19.754, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.23755905032157898, |
|
"eval_mse": 0.23755905032157898, |
|
"eval_runtime": 6.3847, |
|
"eval_samples_per_second": 156.626, |
|
"eval_steps_per_second": 19.578, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 6.424113259022001e-06, |
|
"loss": 0.0286, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.23381923139095306, |
|
"eval_mse": 0.23381923139095306, |
|
"eval_runtime": 6.3781, |
|
"eval_samples_per_second": 156.787, |
|
"eval_steps_per_second": 19.598, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 5.2518298175946285e-06, |
|
"loss": 0.0243, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.2918383777141571, |
|
"eval_mse": 0.2918383777141571, |
|
"eval_runtime": 6.3488, |
|
"eval_samples_per_second": 157.51, |
|
"eval_steps_per_second": 19.689, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.26574844121932983, |
|
"eval_mse": 0.26574844121932983, |
|
"eval_runtime": 6.3063, |
|
"eval_samples_per_second": 158.572, |
|
"eval_steps_per_second": 19.822, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 4.079546376167256e-06, |
|
"loss": 0.021, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.2649979293346405, |
|
"eval_mse": 0.2649979293346405, |
|
"eval_runtime": 6.3763, |
|
"eval_samples_per_second": 156.832, |
|
"eval_steps_per_second": 19.604, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.2675020396709442, |
|
"eval_mse": 0.2675020396709442, |
|
"eval_runtime": 6.2951, |
|
"eval_samples_per_second": 158.855, |
|
"eval_steps_per_second": 19.857, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 2.9072629347398834e-06, |
|
"loss": 0.019, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.2602880895137787, |
|
"eval_mse": 0.2602880895137787, |
|
"eval_runtime": 6.3892, |
|
"eval_samples_per_second": 156.515, |
|
"eval_steps_per_second": 19.564, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 1.734979493312511e-06, |
|
"loss": 0.0165, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.2324579656124115, |
|
"eval_mse": 0.2324579656124115, |
|
"eval_runtime": 6.3476, |
|
"eval_samples_per_second": 157.541, |
|
"eval_steps_per_second": 19.693, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.24971328675746918, |
|
"eval_mse": 0.24971328675746918, |
|
"eval_runtime": 6.3218, |
|
"eval_samples_per_second": 158.182, |
|
"eval_steps_per_second": 19.773, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 5.626960518851388e-07, |
|
"loss": 0.015, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.24272866547107697, |
|
"eval_mse": 0.24272866547107697, |
|
"eval_runtime": 6.3685, |
|
"eval_samples_per_second": 157.022, |
|
"eval_steps_per_second": 19.628, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 6240, |
|
"total_flos": 6577309062776832.0, |
|
"train_loss": 0.09126096031604669, |
|
"train_runtime": 4399.0118, |
|
"train_samples_per_second": 22.732, |
|
"train_steps_per_second": 1.419 |
|
} |
|
], |
|
"max_steps": 6240, |
|
"num_train_epochs": 20, |
|
"total_flos": 6577309062776832.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|