|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0129032258064514, |
|
"eval_steps": 10, |
|
"global_step": 116, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.025806451612903226, |
|
"eval_loss": NaN, |
|
"eval_runtime": 0.89, |
|
"eval_samples_per_second": 73.037, |
|
"eval_steps_per_second": 10.113, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07741935483870968, |
|
"grad_norm": NaN, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.15483870967741936, |
|
"grad_norm": NaN, |
|
"learning_rate": 6e-05, |
|
"loss": 0.0, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.23225806451612904, |
|
"grad_norm": NaN, |
|
"learning_rate": 9e-05, |
|
"loss": 0.0, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.25806451612903225, |
|
"eval_loss": NaN, |
|
"eval_runtime": 0.4804, |
|
"eval_samples_per_second": 135.309, |
|
"eval_steps_per_second": 18.735, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3096774193548387, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.991218658821608e-05, |
|
"loss": 0.0, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.3870967741935484, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.94520093661082e-05, |
|
"loss": 0.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.4645161290322581, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.860114570402054e-05, |
|
"loss": 0.0, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.5161290322580645, |
|
"eval_loss": NaN, |
|
"eval_runtime": 0.4793, |
|
"eval_samples_per_second": 135.617, |
|
"eval_steps_per_second": 18.778, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5419354838709678, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.736631769270957e-05, |
|
"loss": 0.0, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.6193548387096774, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.575728086215092e-05, |
|
"loss": 0.0, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.6967741935483871, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.378674710978185e-05, |
|
"loss": 0.0, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.7741935483870968, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.14702842725101e-05, |
|
"loss": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7741935483870968, |
|
"eval_loss": NaN, |
|
"eval_runtime": 0.4793, |
|
"eval_samples_per_second": 135.604, |
|
"eval_steps_per_second": 18.776, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8516129032258064, |
|
"grad_norm": NaN, |
|
"learning_rate": 8.882619313590212e-05, |
|
"loss": 0.0, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.9290322580645162, |
|
"grad_norm": NaN, |
|
"learning_rate": 8.587536285221656e-05, |
|
"loss": 0.0, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.0129032258064516, |
|
"grad_norm": NaN, |
|
"learning_rate": 8.264110590952609e-05, |
|
"loss": 0.0, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.038709677419355, |
|
"eval_loss": NaN, |
|
"eval_runtime": 0.4747, |
|
"eval_samples_per_second": 136.925, |
|
"eval_steps_per_second": 18.959, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0903225806451613, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.91489739557236e-05, |
|
"loss": 0.0, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.167741935483871, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.542655593246103e-05, |
|
"loss": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.2451612903225806, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.150326011382604e-05, |
|
"loss": 0.0, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.2967741935483872, |
|
"eval_loss": NaN, |
|
"eval_runtime": 0.4769, |
|
"eval_samples_per_second": 136.301, |
|
"eval_steps_per_second": 18.872, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.3225806451612903, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.741008177171995e-05, |
|
"loss": 0.0, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.317935830345338e-05, |
|
"loss": 0.0, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.4774193548387098, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.884451375612865e-05, |
|
"loss": 0.0, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.5548387096774192, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.4439794766146746e-05, |
|
"loss": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.5548387096774192, |
|
"eval_loss": NaN, |
|
"eval_runtime": 0.4836, |
|
"eval_samples_per_second": 134.417, |
|
"eval_steps_per_second": 18.612, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.632258064516129, |
|
"grad_norm": NaN, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.7096774193548387, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.5560205233853266e-05, |
|
"loss": 0.0, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.7870967741935484, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.115548624387137e-05, |
|
"loss": 0.0, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.8129032258064517, |
|
"eval_loss": NaN, |
|
"eval_runtime": 0.4769, |
|
"eval_samples_per_second": 136.31, |
|
"eval_steps_per_second": 18.874, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.864516129032258, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.682064169654663e-05, |
|
"loss": 0.0, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.9419354838709677, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.258991822828007e-05, |
|
"loss": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.0258064516129033, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.8496739886173995e-05, |
|
"loss": 0.0, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 2.07741935483871, |
|
"eval_loss": NaN, |
|
"eval_runtime": 0.4796, |
|
"eval_samples_per_second": 135.533, |
|
"eval_steps_per_second": 18.766, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.1032258064516127, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.4573444067538986e-05, |
|
"loss": 0.0, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 2.1806451612903226, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.0851026044276406e-05, |
|
"loss": 0.0, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 2.258064516129032, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.7358894090473925e-05, |
|
"loss": 0.0, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 2.335483870967742, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.4124637147783432e-05, |
|
"loss": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.335483870967742, |
|
"eval_loss": NaN, |
|
"eval_runtime": 0.4778, |
|
"eval_samples_per_second": 136.038, |
|
"eval_steps_per_second": 18.836, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.412903225806452, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.1173806864097886e-05, |
|
"loss": 0.0, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 2.490322580645161, |
|
"grad_norm": NaN, |
|
"learning_rate": 8.529715727489912e-06, |
|
"loss": 0.0, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.567741935483871, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.213252890218163e-06, |
|
"loss": 0.0, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 2.5935483870967744, |
|
"eval_loss": NaN, |
|
"eval_runtime": 0.4814, |
|
"eval_samples_per_second": 135.034, |
|
"eval_steps_per_second": 18.697, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.6451612903225805, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.242719137849077e-06, |
|
"loss": 0.0, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 2.7225806451612904, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.6336823072904304e-06, |
|
"loss": 0.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.3988542959794627e-06, |
|
"loss": 0.0, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.8516129032258064, |
|
"eval_loss": NaN, |
|
"eval_runtime": 0.4762, |
|
"eval_samples_per_second": 136.507, |
|
"eval_steps_per_second": 18.901, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.8774193548387097, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.479906338917984e-07, |
|
"loss": 0.0, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 2.9548387096774196, |
|
"grad_norm": NaN, |
|
"learning_rate": 8.781341178393244e-08, |
|
"loss": 0.0, |
|
"step": 114 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 116, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 447372853248.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|