|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 49.09090909090909, |
|
"eval_steps": 500, |
|
"global_step": 31, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0, |
|
"eval_loss": 2.495208501815796, |
|
"eval_runtime": 2.7046, |
|
"eval_samples_per_second": 86.518, |
|
"eval_steps_per_second": 2.958, |
|
"step": 0 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002999814948722491, |
|
"loss": 2.5615, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 2.527021646499634, |
|
"eval_runtime": 2.6357, |
|
"eval_samples_per_second": 88.78, |
|
"eval_steps_per_second": 3.035, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 2.5362284183502197, |
|
"eval_runtime": 2.6589, |
|
"eval_samples_per_second": 88.005, |
|
"eval_steps_per_second": 3.009, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_loss": 2.5341787338256836, |
|
"eval_runtime": 2.6589, |
|
"eval_samples_per_second": 88.007, |
|
"eval_steps_per_second": 3.009, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"eval_loss": 2.2734625339508057, |
|
"eval_runtime": 2.6648, |
|
"eval_samples_per_second": 87.812, |
|
"eval_steps_per_second": 3.002, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"eval_loss": 2.3209266662597656, |
|
"eval_runtime": 2.6531, |
|
"eval_samples_per_second": 88.198, |
|
"eval_steps_per_second": 3.015, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_loss": 2.1017019748687744, |
|
"eval_runtime": 2.6605, |
|
"eval_samples_per_second": 87.954, |
|
"eval_steps_per_second": 3.007, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 0.00029953760005996916, |
|
"loss": 2.363, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"eval_loss": 2.012136697769165, |
|
"eval_runtime": 2.6573, |
|
"eval_samples_per_second": 88.061, |
|
"eval_steps_per_second": 3.011, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"eval_loss": 2.0751442909240723, |
|
"eval_runtime": 2.6638, |
|
"eval_samples_per_second": 87.844, |
|
"eval_steps_per_second": 3.003, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_loss": 1.964595079421997, |
|
"eval_runtime": 2.7098, |
|
"eval_samples_per_second": 86.353, |
|
"eval_steps_per_second": 2.952, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_loss": 1.8911688327789307, |
|
"eval_runtime": 2.6643, |
|
"eval_samples_per_second": 87.829, |
|
"eval_steps_per_second": 3.003, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"eval_loss": 1.809972882270813, |
|
"eval_runtime": 2.6547, |
|
"eval_samples_per_second": 88.146, |
|
"eval_steps_per_second": 3.014, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"eval_loss": 1.8143646717071533, |
|
"eval_runtime": 2.6683, |
|
"eval_samples_per_second": 87.697, |
|
"eval_steps_per_second": 2.998, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"eval_loss": 1.7983335256576538, |
|
"eval_runtime": 2.6503, |
|
"eval_samples_per_second": 88.291, |
|
"eval_steps_per_second": 3.018, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"eval_loss": 1.7633870840072632, |
|
"eval_runtime": 2.6612, |
|
"eval_samples_per_second": 87.931, |
|
"eval_steps_per_second": 3.006, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"learning_rate": 0.00029815325108927063, |
|
"loss": 1.9009, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"eval_loss": 1.762792706489563, |
|
"eval_runtime": 2.6498, |
|
"eval_samples_per_second": 88.31, |
|
"eval_steps_per_second": 3.019, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"eval_loss": 1.7354298830032349, |
|
"eval_runtime": 2.6595, |
|
"eval_samples_per_second": 87.986, |
|
"eval_steps_per_second": 3.008, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"eval_loss": 1.7343316078186035, |
|
"eval_runtime": 2.6543, |
|
"eval_samples_per_second": 88.159, |
|
"eval_steps_per_second": 3.014, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"eval_loss": 1.7231522798538208, |
|
"eval_runtime": 2.6679, |
|
"eval_samples_per_second": 87.709, |
|
"eval_steps_per_second": 2.999, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"eval_loss": 1.6737045049667358, |
|
"eval_runtime": 2.6731, |
|
"eval_samples_per_second": 87.538, |
|
"eval_steps_per_second": 2.993, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 20.12, |
|
"eval_loss": 1.6417571306228638, |
|
"eval_runtime": 2.6611, |
|
"eval_samples_per_second": 87.935, |
|
"eval_steps_per_second": 3.006, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 20.12, |
|
"eval_loss": 1.663546085357666, |
|
"eval_runtime": 2.7084, |
|
"eval_samples_per_second": 86.399, |
|
"eval_steps_per_second": 2.954, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 22.06, |
|
"eval_loss": 1.6280120611190796, |
|
"eval_runtime": 2.6541, |
|
"eval_samples_per_second": 88.166, |
|
"eval_steps_per_second": 3.014, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 23.15, |
|
"learning_rate": 0.0002958554880596515, |
|
"loss": 1.7031, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 23.15, |
|
"eval_loss": 1.6042001247406006, |
|
"eval_runtime": 2.6431, |
|
"eval_samples_per_second": 88.533, |
|
"eval_steps_per_second": 3.027, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 23.15, |
|
"eval_loss": 1.6120343208312988, |
|
"eval_runtime": 2.6568, |
|
"eval_samples_per_second": 88.076, |
|
"eval_steps_per_second": 3.011, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 25.09, |
|
"eval_loss": 1.579213261604309, |
|
"eval_runtime": 2.6609, |
|
"eval_samples_per_second": 87.94, |
|
"eval_steps_per_second": 3.007, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 25.09, |
|
"eval_loss": 1.6127510070800781, |
|
"eval_runtime": 2.6566, |
|
"eval_samples_per_second": 88.082, |
|
"eval_steps_per_second": 3.011, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 27.03, |
|
"eval_loss": 1.5467751026153564, |
|
"eval_runtime": 2.655, |
|
"eval_samples_per_second": 88.136, |
|
"eval_steps_per_second": 3.013, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 28.12, |
|
"eval_loss": 1.530348539352417, |
|
"eval_runtime": 2.6531, |
|
"eval_samples_per_second": 88.197, |
|
"eval_steps_per_second": 3.015, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 28.12, |
|
"eval_loss": 1.5159918069839478, |
|
"eval_runtime": 2.6518, |
|
"eval_samples_per_second": 88.241, |
|
"eval_steps_per_second": 3.017, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 30.06, |
|
"eval_loss": 1.5194865465164185, |
|
"eval_runtime": 2.6595, |
|
"eval_samples_per_second": 87.987, |
|
"eval_steps_per_second": 3.008, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 31.15, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 1.5968, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 31.15, |
|
"eval_loss": 1.5098381042480469, |
|
"eval_runtime": 2.6396, |
|
"eval_samples_per_second": 88.651, |
|
"eval_steps_per_second": 3.031, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 31.15, |
|
"eval_loss": 1.4774686098098755, |
|
"eval_runtime": 2.6606, |
|
"eval_samples_per_second": 87.951, |
|
"eval_steps_per_second": 3.007, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 33.09, |
|
"eval_loss": 1.4770317077636719, |
|
"eval_runtime": 2.6523, |
|
"eval_samples_per_second": 88.225, |
|
"eval_steps_per_second": 3.016, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 33.09, |
|
"eval_loss": 1.4588351249694824, |
|
"eval_runtime": 2.6529, |
|
"eval_samples_per_second": 88.205, |
|
"eval_steps_per_second": 3.016, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 35.03, |
|
"eval_loss": 1.4474384784698486, |
|
"eval_runtime": 2.6678, |
|
"eval_samples_per_second": 87.711, |
|
"eval_steps_per_second": 2.999, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 36.12, |
|
"eval_loss": 1.424033761024475, |
|
"eval_runtime": 2.6514, |
|
"eval_samples_per_second": 88.254, |
|
"eval_steps_per_second": 3.017, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 36.12, |
|
"eval_loss": 1.4164339303970337, |
|
"eval_runtime": 2.6554, |
|
"eval_samples_per_second": 88.121, |
|
"eval_steps_per_second": 3.013, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 38.06, |
|
"eval_loss": 1.4059854745864868, |
|
"eval_runtime": 2.6536, |
|
"eval_samples_per_second": 88.181, |
|
"eval_steps_per_second": 3.015, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 39.15, |
|
"learning_rate": 0.000288581929876693, |
|
"loss": 1.4776, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 39.15, |
|
"eval_loss": 1.3752561807632446, |
|
"eval_runtime": 2.6459, |
|
"eval_samples_per_second": 88.439, |
|
"eval_steps_per_second": 3.024, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 39.15, |
|
"eval_loss": 1.385780930519104, |
|
"eval_runtime": 2.667, |
|
"eval_samples_per_second": 87.738, |
|
"eval_steps_per_second": 3.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 41.09, |
|
"eval_loss": 1.3821604251861572, |
|
"eval_runtime": 2.6548, |
|
"eval_samples_per_second": 88.141, |
|
"eval_steps_per_second": 3.013, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 41.09, |
|
"eval_loss": 1.3268494606018066, |
|
"eval_runtime": 2.6901, |
|
"eval_samples_per_second": 86.986, |
|
"eval_steps_per_second": 2.974, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 43.03, |
|
"eval_loss": 1.3443068265914917, |
|
"eval_runtime": 2.6512, |
|
"eval_samples_per_second": 88.263, |
|
"eval_steps_per_second": 3.018, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 44.12, |
|
"eval_loss": 1.3258930444717407, |
|
"eval_runtime": 2.6544, |
|
"eval_samples_per_second": 88.156, |
|
"eval_steps_per_second": 3.014, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 44.12, |
|
"eval_loss": 1.311697006225586, |
|
"eval_runtime": 2.6815, |
|
"eval_samples_per_second": 87.264, |
|
"eval_steps_per_second": 2.983, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 46.06, |
|
"eval_loss": 1.3104833364486694, |
|
"eval_runtime": 2.6829, |
|
"eval_samples_per_second": 87.218, |
|
"eval_steps_per_second": 2.982, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 47.15, |
|
"learning_rate": 0.00028365097862825513, |
|
"loss": 1.3585, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 47.15, |
|
"eval_loss": 1.2553305625915527, |
|
"eval_runtime": 2.8251, |
|
"eval_samples_per_second": 82.83, |
|
"eval_steps_per_second": 2.832, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 47.15, |
|
"eval_loss": 1.275472640991211, |
|
"eval_runtime": 2.6596, |
|
"eval_samples_per_second": 87.983, |
|
"eval_steps_per_second": 3.008, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 49.09, |
|
"eval_loss": 1.2036432027816772, |
|
"eval_runtime": 2.6726, |
|
"eval_samples_per_second": 87.554, |
|
"eval_steps_per_second": 2.993, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 49.09, |
|
"step": 31, |
|
"total_flos": 8700902454067200.0, |
|
"train_loss": 1.7344687215743526, |
|
"train_runtime": 3811.391, |
|
"train_samples_per_second": 27.51, |
|
"train_steps_per_second": 0.052 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 200, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 8700902454067200.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|