|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 25.0, |
|
"global_step": 2350, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 3.4338, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.4137182235717773, |
|
"eval_runtime": 36.2713, |
|
"eval_samples_per_second": 5.514, |
|
"eval_steps_per_second": 0.689, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 2.9565, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.173758029937744, |
|
"eval_runtime": 35.902, |
|
"eval_samples_per_second": 5.571, |
|
"eval_steps_per_second": 0.696, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.7101, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.012174606323242, |
|
"eval_runtime": 35.6264, |
|
"eval_samples_per_second": 5.614, |
|
"eval_steps_per_second": 0.702, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 2.7515, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.964645504951477, |
|
"eval_runtime": 35.4193, |
|
"eval_samples_per_second": 5.647, |
|
"eval_steps_per_second": 0.706, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 2.724, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.1284220218658447, |
|
"eval_runtime": 34.8555, |
|
"eval_samples_per_second": 5.738, |
|
"eval_steps_per_second": 0.717, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 2.6193, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.9379758834838867, |
|
"eval_runtime": 36.3088, |
|
"eval_samples_per_second": 5.508, |
|
"eval_steps_per_second": 0.689, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.5032, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.9285995960235596, |
|
"eval_runtime": 35.9858, |
|
"eval_samples_per_second": 5.558, |
|
"eval_steps_per_second": 0.695, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 2.5342, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.9365949630737305, |
|
"eval_runtime": 35.1567, |
|
"eval_samples_per_second": 5.689, |
|
"eval_steps_per_second": 0.711, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4e-05, |
|
"loss": 2.5519, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.9736474752426147, |
|
"eval_runtime": 35.9727, |
|
"eval_samples_per_second": 5.56, |
|
"eval_steps_per_second": 0.695, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 2.4988, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.881581425666809, |
|
"eval_runtime": 35.6769, |
|
"eval_samples_per_second": 5.606, |
|
"eval_steps_per_second": 0.701, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 2.5101, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.8453679084777832, |
|
"eval_runtime": 33.4013, |
|
"eval_samples_per_second": 5.988, |
|
"eval_steps_per_second": 0.748, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2e-05, |
|
"loss": 2.4441, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.8143038749694824, |
|
"eval_runtime": 34.7625, |
|
"eval_samples_per_second": 5.753, |
|
"eval_steps_per_second": 0.719, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 2.3857, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 1.7919152975082397, |
|
"eval_runtime": 33.975, |
|
"eval_samples_per_second": 5.887, |
|
"eval_steps_per_second": 0.736, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 2.2877, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 1.7400457859039307, |
|
"eval_runtime": 34.7007, |
|
"eval_samples_per_second": 5.764, |
|
"eval_steps_per_second": 0.72, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.3013, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 1.7408628463745117, |
|
"eval_runtime": 34.5398, |
|
"eval_samples_per_second": 5.79, |
|
"eval_steps_per_second": 0.724, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2e-05, |
|
"loss": 2.3134, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 1.7698218822479248, |
|
"eval_runtime": 34.7625, |
|
"eval_samples_per_second": 5.753, |
|
"eval_steps_per_second": 0.719, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 1.5e-05, |
|
"loss": 2.3423, |
|
"step": 1598 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 1.7581219673156738, |
|
"eval_runtime": 35.8583, |
|
"eval_samples_per_second": 5.578, |
|
"eval_steps_per_second": 0.697, |
|
"step": 1598 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3536, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 1.7658276557922363, |
|
"eval_runtime": 35.6301, |
|
"eval_samples_per_second": 5.613, |
|
"eval_steps_per_second": 0.702, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 5e-06, |
|
"loss": 2.2957, |
|
"step": 1786 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 1.7328851222991943, |
|
"eval_runtime": 36.2644, |
|
"eval_samples_per_second": 5.515, |
|
"eval_steps_per_second": 0.689, |
|
"step": 1786 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.274, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 1.7334604263305664, |
|
"eval_runtime": 34.2732, |
|
"eval_samples_per_second": 5.835, |
|
"eval_steps_per_second": 0.729, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 2.2906, |
|
"step": 1974 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 1.734320878982544, |
|
"eval_runtime": 34.5789, |
|
"eval_samples_per_second": 5.784, |
|
"eval_steps_per_second": 0.723, |
|
"step": 1974 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 1.2e-05, |
|
"loss": 2.2492, |
|
"step": 2068 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 1.7080261707305908, |
|
"eval_runtime": 35.1218, |
|
"eval_samples_per_second": 5.694, |
|
"eval_steps_per_second": 0.712, |
|
"step": 2068 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 2.2516, |
|
"step": 2162 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 1.718016505241394, |
|
"eval_runtime": 34.8144, |
|
"eval_samples_per_second": 5.745, |
|
"eval_steps_per_second": 0.718, |
|
"step": 2162 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 2.2574, |
|
"step": 2256 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 1.7081444263458252, |
|
"eval_runtime": 34.6174, |
|
"eval_samples_per_second": 5.777, |
|
"eval_steps_per_second": 0.722, |
|
"step": 2256 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.2508, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 1.7065184116363525, |
|
"eval_runtime": 34.9055, |
|
"eval_samples_per_second": 5.73, |
|
"eval_steps_per_second": 0.716, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"step": 2350, |
|
"total_flos": 8.960483088e+18, |
|
"train_loss": 0.45198433734001, |
|
"train_runtime": 1195.2772, |
|
"train_samples_per_second": 15.687, |
|
"train_steps_per_second": 1.966 |
|
} |
|
], |
|
"max_steps": 2350, |
|
"num_train_epochs": 25, |
|
"total_flos": 8.960483088e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|