|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.925743170056565, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.759268175252769e-05, |
|
"loss": 9.8702, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.518536350505537e-05, |
|
"loss": 5.0407, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.277804525758305e-05, |
|
"loss": 3.9664, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.037072701011074e-05, |
|
"loss": 3.6556, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.796340876263843e-05, |
|
"loss": 3.4608, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.555609051516611e-05, |
|
"loss": 3.3129, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.31487722676938e-05, |
|
"loss": 3.2072, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.074145402022148e-05, |
|
"loss": 3.1383, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.833413577274916e-05, |
|
"loss": 3.0606, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.592681752527685e-05, |
|
"loss": 2.9994, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 7.351949927780452e-05, |
|
"loss": 2.9714, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.111218103033221e-05, |
|
"loss": 2.9432, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.87048627828599e-05, |
|
"loss": 2.8978, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.629754453538758e-05, |
|
"loss": 2.8998, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 6.389022628791527e-05, |
|
"loss": 2.8704, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.148290804044296e-05, |
|
"loss": 2.8642, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.907558979297063e-05, |
|
"loss": 2.832, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5.666827154549832e-05, |
|
"loss": 2.8027, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.426095329802601e-05, |
|
"loss": 2.7968, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.185363505055368e-05, |
|
"loss": 2.7954, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.46354079246521, |
|
"eval_runtime": 2.8271, |
|
"eval_samples_per_second": 353.714, |
|
"eval_steps_per_second": 44.214, |
|
"step": 2077 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.9446316803081375e-05, |
|
"loss": 2.7673, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.7038998555609055e-05, |
|
"loss": 2.7401, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.4631680308136736e-05, |
|
"loss": 2.7456, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.222436206066442e-05, |
|
"loss": 2.742, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.98170438131921e-05, |
|
"loss": 2.7218, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.740972556571979e-05, |
|
"loss": 2.7248, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.500240731824748e-05, |
|
"loss": 2.7145, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.259508907077516e-05, |
|
"loss": 2.7046, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.0187770823302842e-05, |
|
"loss": 2.7095, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.7780452575830522e-05, |
|
"loss": 2.7005, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.537313432835821e-05, |
|
"loss": 2.6941, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.2965816080885893e-05, |
|
"loss": 2.6727, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.0558497833413577e-05, |
|
"loss": 2.6959, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.8151179585941264e-05, |
|
"loss": 2.6839, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.5743861338468945e-05, |
|
"loss": 2.6781, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.333654309099663e-05, |
|
"loss": 2.6732, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.0929224843524314e-05, |
|
"loss": 2.6776, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 8.521906596051998e-06, |
|
"loss": 2.673, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.114588348579683e-06, |
|
"loss": 2.6832, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.7072701011073664e-06, |
|
"loss": 2.6659, |
|
"step": 4000 |
|
} |
|
], |
|
"max_steps": 4154, |
|
"num_train_epochs": 2, |
|
"total_flos": 2.1297995361473126e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|