|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 8920, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.2753658294677734, |
|
"learning_rate": 4e-05, |
|
"loss": 1.233, |
|
"step": 1784 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 1.0, |
|
"eval_loss": 0.6087062954902649, |
|
"eval_rouge1": 0.4912, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.4859, |
|
"eval_rougeLsum": 0.4881, |
|
"eval_runtime": 112.5011, |
|
"eval_samples_per_second": 6.64, |
|
"eval_steps_per_second": 0.213, |
|
"step": 1784 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.76861834526062, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7935, |
|
"step": 3568 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 1.0, |
|
"eval_loss": 0.5581778883934021, |
|
"eval_rouge1": 0.4176, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.4157, |
|
"eval_rougeLsum": 0.4171, |
|
"eval_runtime": 112.1842, |
|
"eval_samples_per_second": 6.659, |
|
"eval_steps_per_second": 0.214, |
|
"step": 3568 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.8127230405807495, |
|
"learning_rate": 2e-05, |
|
"loss": 0.7385, |
|
"step": 5352 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 1.0, |
|
"eval_loss": 0.5451030731201172, |
|
"eval_rouge1": 0.4227, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.4192, |
|
"eval_rougeLsum": 0.4214, |
|
"eval_runtime": 112.2353, |
|
"eval_samples_per_second": 6.656, |
|
"eval_steps_per_second": 0.214, |
|
"step": 5352 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.9295331239700317, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7114, |
|
"step": 7136 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 1.0, |
|
"eval_loss": 0.5406314730644226, |
|
"eval_rouge1": 0.4115, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.4115, |
|
"eval_rougeLsum": 0.4106, |
|
"eval_runtime": 112.3402, |
|
"eval_samples_per_second": 6.649, |
|
"eval_steps_per_second": 0.214, |
|
"step": 7136 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.6341142654418945, |
|
"learning_rate": 0.0, |
|
"loss": 0.6996, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 1.0, |
|
"eval_loss": 0.5392429828643799, |
|
"eval_rouge1": 0.4255, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.4247, |
|
"eval_rougeLsum": 0.4257, |
|
"eval_runtime": 113.1717, |
|
"eval_samples_per_second": 6.601, |
|
"eval_steps_per_second": 0.212, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 8920, |
|
"total_flos": 7.526178534957466e+16, |
|
"train_loss": 0.8352148081689672, |
|
"train_runtime": 5166.7453, |
|
"train_samples_per_second": 13.806, |
|
"train_steps_per_second": 1.726 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 8920, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 7.526178534957466e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|