|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"global_step": 8140, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_runtime": 5.9123, |
|
"eval_samples_per_second": 84.4, |
|
"eval_steps_per_second": 10.656, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.877886977886978e-05, |
|
"loss": 0.3917, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_runtime": 5.883, |
|
"eval_samples_per_second": 84.821, |
|
"eval_steps_per_second": 10.709, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.7552825552825554e-05, |
|
"loss": 0.191, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_runtime": 5.8655, |
|
"eval_samples_per_second": 85.073, |
|
"eval_steps_per_second": 10.741, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1.6324324324324326e-05, |
|
"loss": 0.1213, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_runtime": 5.8538, |
|
"eval_samples_per_second": 85.244, |
|
"eval_steps_per_second": 10.762, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 1.5095823095823097e-05, |
|
"loss": 0.0805, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_runtime": 5.8461, |
|
"eval_samples_per_second": 85.356, |
|
"eval_steps_per_second": 10.776, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_runtime": 5.8536, |
|
"eval_samples_per_second": 85.246, |
|
"eval_steps_per_second": 10.763, |
|
"step": 2442 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 1.3867321867321867e-05, |
|
"loss": 0.0494, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_runtime": 5.8462, |
|
"eval_samples_per_second": 85.354, |
|
"eval_steps_per_second": 10.776, |
|
"step": 2849 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 1.2641277641277642e-05, |
|
"loss": 0.0343, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_runtime": 5.8523, |
|
"eval_samples_per_second": 85.265, |
|
"eval_steps_per_second": 10.765, |
|
"step": 3256 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 1.1412776412776414e-05, |
|
"loss": 0.0308, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_runtime": 5.8555, |
|
"eval_samples_per_second": 85.22, |
|
"eval_steps_per_second": 10.759, |
|
"step": 3663 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 1.0184275184275186e-05, |
|
"loss": 0.0262, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_runtime": 5.8564, |
|
"eval_samples_per_second": 85.207, |
|
"eval_steps_per_second": 10.758, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_runtime": 5.8649, |
|
"eval_samples_per_second": 85.082, |
|
"eval_steps_per_second": 10.742, |
|
"step": 4477 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 8.955773955773957e-06, |
|
"loss": 0.0249, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_runtime": 5.8497, |
|
"eval_samples_per_second": 85.303, |
|
"eval_steps_per_second": 10.77, |
|
"step": 4884 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"learning_rate": 7.727272727272727e-06, |
|
"loss": 0.0122, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_runtime": 5.8618, |
|
"eval_samples_per_second": 85.127, |
|
"eval_steps_per_second": 10.748, |
|
"step": 5291 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 6.501228501228501e-06, |
|
"loss": 0.0137, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_runtime": 5.8612, |
|
"eval_samples_per_second": 85.136, |
|
"eval_steps_per_second": 10.749, |
|
"step": 5698 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 5.272727272727273e-06, |
|
"loss": 0.0071, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_runtime": 5.8722, |
|
"eval_samples_per_second": 84.977, |
|
"eval_steps_per_second": 10.729, |
|
"step": 6105 |
|
}, |
|
{ |
|
"epoch": 15.97, |
|
"learning_rate": 4.0442260442260445e-06, |
|
"loss": 0.0094, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_runtime": 5.8761, |
|
"eval_samples_per_second": 84.92, |
|
"eval_steps_per_second": 10.721, |
|
"step": 6512 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_runtime": 5.8601, |
|
"eval_samples_per_second": 85.153, |
|
"eval_steps_per_second": 10.751, |
|
"step": 6919 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"learning_rate": 2.8157248157248157e-06, |
|
"loss": 0.0094, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_runtime": 5.8528, |
|
"eval_samples_per_second": 85.258, |
|
"eval_steps_per_second": 10.764, |
|
"step": 7326 |
|
}, |
|
{ |
|
"epoch": 18.43, |
|
"learning_rate": 1.5872235872235874e-06, |
|
"loss": 0.0072, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_runtime": 5.8548, |
|
"eval_samples_per_second": 85.23, |
|
"eval_steps_per_second": 10.76, |
|
"step": 7733 |
|
}, |
|
{ |
|
"epoch": 19.66, |
|
"learning_rate": 3.587223587223587e-07, |
|
"loss": 0.0065, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_runtime": 5.8599, |
|
"eval_samples_per_second": 85.155, |
|
"eval_steps_per_second": 10.751, |
|
"step": 8140 |
|
} |
|
], |
|
"max_steps": 8140, |
|
"num_train_epochs": 20, |
|
"total_flos": 3.403128959729664e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|