gavrilstep's picture
Training in progress, step 30, checkpoint
b9094c2 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.0017792011386887287,
"eval_steps": 5,
"global_step": 30,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 5.930670462295763e-05,
"eval_loss": 12.147173881530762,
"eval_runtime": 290.7949,
"eval_samples_per_second": 24.416,
"eval_steps_per_second": 12.208,
"step": 1
},
{
"epoch": 0.00017792011386887289,
"grad_norm": 11.14660930633545,
"learning_rate": 6e-05,
"loss": 12.1674,
"step": 3
},
{
"epoch": 0.0002965335231147881,
"eval_loss": 11.853402137756348,
"eval_runtime": 294.1314,
"eval_samples_per_second": 24.139,
"eval_steps_per_second": 12.069,
"step": 5
},
{
"epoch": 0.00035584022773774577,
"grad_norm": 10.97592830657959,
"learning_rate": 0.00012,
"loss": 11.9565,
"step": 6
},
{
"epoch": 0.0005337603416066186,
"grad_norm": 8.313521385192871,
"learning_rate": 0.00018,
"loss": 11.4876,
"step": 9
},
{
"epoch": 0.0005930670462295762,
"eval_loss": 10.954883575439453,
"eval_runtime": 291.2838,
"eval_samples_per_second": 24.375,
"eval_steps_per_second": 12.187,
"step": 10
},
{
"epoch": 0.0007116804554754915,
"grad_norm": 7.10131311416626,
"learning_rate": 0.00019510565162951537,
"loss": 10.9684,
"step": 12
},
{
"epoch": 0.0008896005693443643,
"grad_norm": 5.987206935882568,
"learning_rate": 0.00017071067811865476,
"loss": 10.4647,
"step": 15
},
{
"epoch": 0.0008896005693443643,
"eval_loss": 10.177637100219727,
"eval_runtime": 294.0051,
"eval_samples_per_second": 24.149,
"eval_steps_per_second": 12.075,
"step": 15
},
{
"epoch": 0.0010675206832132373,
"grad_norm": 5.192551612854004,
"learning_rate": 0.00013090169943749476,
"loss": 10.0547,
"step": 18
},
{
"epoch": 0.0011861340924591525,
"eval_loss": 9.714789390563965,
"eval_runtime": 292.8811,
"eval_samples_per_second": 24.242,
"eval_steps_per_second": 12.121,
"step": 20
},
{
"epoch": 0.0012454407970821102,
"grad_norm": 4.78351354598999,
"learning_rate": 8.435655349597689e-05,
"loss": 9.7974,
"step": 21
},
{
"epoch": 0.001423360910950983,
"grad_norm": 4.210782527923584,
"learning_rate": 4.12214747707527e-05,
"loss": 9.623,
"step": 24
},
{
"epoch": 0.0014826676155739406,
"eval_loss": 9.522984504699707,
"eval_runtime": 293.8342,
"eval_samples_per_second": 24.163,
"eval_steps_per_second": 12.082,
"step": 25
},
{
"epoch": 0.0016012810248198558,
"grad_norm": 3.7491424083709717,
"learning_rate": 1.0899347581163221e-05,
"loss": 9.5269,
"step": 27
},
{
"epoch": 0.0017792011386887287,
"grad_norm": 3.693744421005249,
"learning_rate": 0.0,
"loss": 9.4953,
"step": 30
},
{
"epoch": 0.0017792011386887287,
"eval_loss": 9.486776351928711,
"eval_runtime": 293.7624,
"eval_samples_per_second": 24.169,
"eval_steps_per_second": 12.085,
"step": 30
}
],
"logging_steps": 3,
"max_steps": 30,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7771550121984.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}