summarization-lora-4 / trainer_state.json
apwic's picture
End of training
c183960 verified
raw
history blame
3.15 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 8920,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 3.2753658294677734,
"learning_rate": 4e-05,
"loss": 1.233,
"step": 1784
},
{
"epoch": 1.0,
"eval_gen_len": 1.0,
"eval_loss": 0.6087062954902649,
"eval_rouge1": 0.4912,
"eval_rouge2": 0.0,
"eval_rougeL": 0.4859,
"eval_rougeLsum": 0.4881,
"eval_runtime": 112.5011,
"eval_samples_per_second": 6.64,
"eval_steps_per_second": 0.213,
"step": 1784
},
{
"epoch": 2.0,
"grad_norm": 2.76861834526062,
"learning_rate": 3e-05,
"loss": 0.7935,
"step": 3568
},
{
"epoch": 2.0,
"eval_gen_len": 1.0,
"eval_loss": 0.5581778883934021,
"eval_rouge1": 0.4176,
"eval_rouge2": 0.0,
"eval_rougeL": 0.4157,
"eval_rougeLsum": 0.4171,
"eval_runtime": 112.1842,
"eval_samples_per_second": 6.659,
"eval_steps_per_second": 0.214,
"step": 3568
},
{
"epoch": 3.0,
"grad_norm": 1.8127230405807495,
"learning_rate": 2e-05,
"loss": 0.7385,
"step": 5352
},
{
"epoch": 3.0,
"eval_gen_len": 1.0,
"eval_loss": 0.5451030731201172,
"eval_rouge1": 0.4227,
"eval_rouge2": 0.0,
"eval_rougeL": 0.4192,
"eval_rougeLsum": 0.4214,
"eval_runtime": 112.2353,
"eval_samples_per_second": 6.656,
"eval_steps_per_second": 0.214,
"step": 5352
},
{
"epoch": 4.0,
"grad_norm": 1.9295331239700317,
"learning_rate": 1e-05,
"loss": 0.7114,
"step": 7136
},
{
"epoch": 4.0,
"eval_gen_len": 1.0,
"eval_loss": 0.5406314730644226,
"eval_rouge1": 0.4115,
"eval_rouge2": 0.0,
"eval_rougeL": 0.4115,
"eval_rougeLsum": 0.4106,
"eval_runtime": 112.3402,
"eval_samples_per_second": 6.649,
"eval_steps_per_second": 0.214,
"step": 7136
},
{
"epoch": 5.0,
"grad_norm": 1.6341142654418945,
"learning_rate": 0.0,
"loss": 0.6996,
"step": 8920
},
{
"epoch": 5.0,
"eval_gen_len": 1.0,
"eval_loss": 0.5392429828643799,
"eval_rouge1": 0.4255,
"eval_rouge2": 0.0,
"eval_rougeL": 0.4247,
"eval_rougeLsum": 0.4257,
"eval_runtime": 113.1717,
"eval_samples_per_second": 6.601,
"eval_steps_per_second": 0.212,
"step": 8920
},
{
"epoch": 5.0,
"step": 8920,
"total_flos": 7.526178534957466e+16,
"train_loss": 0.8352148081689672,
"train_runtime": 5166.7453,
"train_samples_per_second": 13.806,
"train_steps_per_second": 1.726
}
],
"logging_steps": 500,
"max_steps": 8920,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 7.526178534957466e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}