|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.962406015037594, |
|
"global_step": 160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.25e-05, |
|
"loss": 10.8217, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.995728791936504e-05, |
|
"loss": 10.8213, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.947761466636014e-05, |
|
"loss": 10.8201, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 10.819129943847656, |
|
"eval_runtime": 6.4186, |
|
"eval_samples_per_second": 69.174, |
|
"eval_steps_per_second": 17.294, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 9.847001329696653e-05, |
|
"loss": 12.1716, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 9.694523495787149e-05, |
|
"loss": 10.8181, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 9.491954909459895e-05, |
|
"loss": 10.8167, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 10.81454086303711, |
|
"eval_runtime": 6.3633, |
|
"eval_samples_per_second": 69.775, |
|
"eval_steps_per_second": 17.444, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 9.241456985587868e-05, |
|
"loss": 12.1668, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 10.8133, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 8.60784730526531e-05, |
|
"loss": 10.8117, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 10.809527397155762, |
|
"eval_runtime": 6.247, |
|
"eval_samples_per_second": 71.074, |
|
"eval_steps_per_second": 17.769, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 8.231496189304704e-05, |
|
"loss": 12.1615, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 7.820664880476256e-05, |
|
"loss": 10.8084, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 7.379736965185368e-05, |
|
"loss": 10.8058, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"eval_loss": 10.802533149719238, |
|
"eval_runtime": 5.8936, |
|
"eval_samples_per_second": 75.335, |
|
"eval_steps_per_second": 18.834, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 12.1538, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 6.426681121245527e-05, |
|
"loss": 10.802, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 5.924722336357793e-05, |
|
"loss": 10.8007, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 5.4128967273616625e-05, |
|
"loss": 10.7997, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_loss": 10.798870086669922, |
|
"eval_runtime": 6.2383, |
|
"eval_samples_per_second": 71.174, |
|
"eval_steps_per_second": 17.793, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 4.8966654938622295e-05, |
|
"loss": 12.148, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 4.381536843653262e-05, |
|
"loss": 10.7968, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 3.87300721992097e-05, |
|
"loss": 10.7959, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_loss": 10.794721603393555, |
|
"eval_runtime": 6.3176, |
|
"eval_samples_per_second": 70.28, |
|
"eval_steps_per_second": 17.57, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 3.3765026539765834e-05, |
|
"loss": 12.1442, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 2.8973208692864624e-05, |
|
"loss": 10.7943, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 2.4405747545519963e-05, |
|
"loss": 10.7934, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"eval_loss": 10.792530059814453, |
|
"eval_runtime": 6.3054, |
|
"eval_samples_per_second": 70.416, |
|
"eval_steps_per_second": 17.604, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 2.0111378089837956e-05, |
|
"loss": 12.1419, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 1.6135921418712956e-05, |
|
"loss": 10.7926, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 1.2521795812943704e-05, |
|
"loss": 10.7924, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"eval_loss": 10.791926383972168, |
|
"eval_runtime": 5.785, |
|
"eval_samples_per_second": 76.75, |
|
"eval_steps_per_second": 19.187, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 9.307564136490254e-06, |
|
"loss": 12.1411, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 6.527522369181655e-06, |
|
"loss": 10.7921, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 4.2113336672471245e-06, |
|
"loss": 10.7921, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_loss": 10.79179859161377, |
|
"eval_runtime": 6.2936, |
|
"eval_samples_per_second": 70.548, |
|
"eval_steps_per_second": 17.637, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 2.3837118562592797e-06, |
|
"loss": 12.1411, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 1.064157733632276e-06, |
|
"loss": 10.792, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 2.667509943378721e-07, |
|
"loss": 10.792, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 0.0, |
|
"loss": 10.792, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"eval_loss": 10.791767120361328, |
|
"eval_runtime": 5.8131, |
|
"eval_samples_per_second": 76.379, |
|
"eval_steps_per_second": 19.095, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"step": 160, |
|
"total_flos": 39596851200.0, |
|
"train_loss": 11.18234748840332, |
|
"train_runtime": 256.0921, |
|
"train_samples_per_second": 332.537, |
|
"train_steps_per_second": 0.625 |
|
} |
|
], |
|
"max_steps": 160, |
|
"num_train_epochs": 10, |
|
"total_flos": 39596851200.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|