|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"global_step": 7660, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.9005221932114882e-05, |
|
"loss": 2.5049, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.3055639266967773, |
|
"eval_runtime": 6.5178, |
|
"eval_samples_per_second": 940.197, |
|
"eval_steps_per_second": 14.729, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8005221932114885e-05, |
|
"loss": 2.3896, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.246039390563965, |
|
"eval_runtime": 6.5445, |
|
"eval_samples_per_second": 936.352, |
|
"eval_steps_per_second": 14.669, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.7005221932114885e-05, |
|
"loss": 2.3458, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.2351295948028564, |
|
"eval_runtime": 6.5331, |
|
"eval_samples_per_second": 937.992, |
|
"eval_steps_per_second": 14.694, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.6005221932114884e-05, |
|
"loss": 2.3097, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.1917026042938232, |
|
"eval_runtime": 6.5235, |
|
"eval_samples_per_second": 939.379, |
|
"eval_steps_per_second": 14.716, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.5005221932114883e-05, |
|
"loss": 2.2839, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.193546772003174, |
|
"eval_runtime": 6.5261, |
|
"eval_samples_per_second": 938.992, |
|
"eval_steps_per_second": 14.71, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.4005221932114883e-05, |
|
"loss": 2.2611, |
|
"step": 2298 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.174062490463257, |
|
"eval_runtime": 6.5308, |
|
"eval_samples_per_second": 938.316, |
|
"eval_steps_per_second": 14.699, |
|
"step": 2298 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.3005221932114884e-05, |
|
"loss": 2.2397, |
|
"step": 2681 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 2.151566743850708, |
|
"eval_runtime": 6.525, |
|
"eval_samples_per_second": 939.154, |
|
"eval_steps_per_second": 14.713, |
|
"step": 2681 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.2005221932114883e-05, |
|
"loss": 2.2234, |
|
"step": 3064 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 2.14640474319458, |
|
"eval_runtime": 6.531, |
|
"eval_samples_per_second": 938.292, |
|
"eval_steps_per_second": 14.699, |
|
"step": 3064 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 1.1005221932114883e-05, |
|
"loss": 2.2121, |
|
"step": 3447 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 2.124241590499878, |
|
"eval_runtime": 6.5412, |
|
"eval_samples_per_second": 936.826, |
|
"eval_steps_per_second": 14.676, |
|
"step": 3447 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.0005221932114884e-05, |
|
"loss": 2.2041, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 2.1360511779785156, |
|
"eval_runtime": 6.5352, |
|
"eval_samples_per_second": 937.687, |
|
"eval_steps_per_second": 14.69, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 9.005221932114883e-06, |
|
"loss": 2.1883, |
|
"step": 4213 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 2.1251063346862793, |
|
"eval_runtime": 6.5334, |
|
"eval_samples_per_second": 937.956, |
|
"eval_steps_per_second": 14.694, |
|
"step": 4213 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 8.005221932114883e-06, |
|
"loss": 2.185, |
|
"step": 4596 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 2.1296956539154053, |
|
"eval_runtime": 6.5234, |
|
"eval_samples_per_second": 939.386, |
|
"eval_steps_per_second": 14.716, |
|
"step": 4596 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 7.005221932114883e-06, |
|
"loss": 2.1712, |
|
"step": 4979 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 2.1061811447143555, |
|
"eval_runtime": 6.5182, |
|
"eval_samples_per_second": 940.138, |
|
"eval_steps_per_second": 14.728, |
|
"step": 4979 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 6.005221932114883e-06, |
|
"loss": 2.1648, |
|
"step": 5362 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 2.1048877239227295, |
|
"eval_runtime": 6.5157, |
|
"eval_samples_per_second": 940.496, |
|
"eval_steps_per_second": 14.734, |
|
"step": 5362 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 5.005221932114883e-06, |
|
"loss": 2.1587, |
|
"step": 5745 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 2.106553792953491, |
|
"eval_runtime": 6.52, |
|
"eval_samples_per_second": 939.88, |
|
"eval_steps_per_second": 14.724, |
|
"step": 5745 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 4.005221932114883e-06, |
|
"loss": 2.1532, |
|
"step": 6128 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 2.0981085300445557, |
|
"eval_runtime": 6.5377, |
|
"eval_samples_per_second": 937.338, |
|
"eval_steps_per_second": 14.684, |
|
"step": 6128 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 3.005221932114883e-06, |
|
"loss": 2.1472, |
|
"step": 6511 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 2.0925848484039307, |
|
"eval_runtime": 6.5057, |
|
"eval_samples_per_second": 941.95, |
|
"eval_steps_per_second": 14.756, |
|
"step": 6511 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 2.005221932114883e-06, |
|
"loss": 2.1462, |
|
"step": 6894 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 2.083235025405884, |
|
"eval_runtime": 6.5322, |
|
"eval_samples_per_second": 938.118, |
|
"eval_steps_per_second": 14.696, |
|
"step": 6894 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 1.0052219321148825e-06, |
|
"loss": 2.1437, |
|
"step": 7277 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 2.093729257583618, |
|
"eval_runtime": 6.5339, |
|
"eval_samples_per_second": 937.883, |
|
"eval_steps_per_second": 14.693, |
|
"step": 7277 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 5.2219321148825064e-09, |
|
"loss": 2.1386, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 2.0927391052246094, |
|
"eval_runtime": 6.5139, |
|
"eval_samples_per_second": 940.762, |
|
"eval_steps_per_second": 14.738, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 7660, |
|
"total_flos": 3.24907393591296e+16, |
|
"train_loss": 2.228582644151334, |
|
"train_runtime": 1800.6594, |
|
"train_samples_per_second": 272.234, |
|
"train_steps_per_second": 4.254 |
|
} |
|
], |
|
"max_steps": 7660, |
|
"num_train_epochs": 20, |
|
"total_flos": 3.24907393591296e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|