|
{ |
|
"best_metric": 24.761, |
|
"best_model_checkpoint": "./output\\checkpoint-1052", |
|
"epoch": 20.0, |
|
"global_step": 10520, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.5e-06, |
|
"loss": 3.0166, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 15.76, |
|
"eval_rouge2": 6.32, |
|
"eval_rougeL": 21.606, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9e-06, |
|
"loss": 2.7368, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 22.06, |
|
"eval_rouge2": 8.84, |
|
"eval_rougeL": 24.761, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.5e-06, |
|
"loss": 2.5849, |
|
"step": 1578 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 19.17, |
|
"eval_rouge2": 7.97, |
|
"eval_rougeL": 23.301, |
|
"step": 1578 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 2.4639, |
|
"step": 2104 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 16.53, |
|
"eval_rouge2": 6.92, |
|
"eval_rougeL": 22.252, |
|
"step": 2104 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 2.3647, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 15.61, |
|
"eval_rouge2": 6.25, |
|
"eval_rougeL": 21.732, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 7e-06, |
|
"loss": 2.2886, |
|
"step": 3156 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 16.78, |
|
"eval_rouge2": 7.3, |
|
"eval_rougeL": 23.088, |
|
"step": 3156 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 2.202, |
|
"step": 3682 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 18.84, |
|
"eval_rouge2": 6.67, |
|
"eval_rougeL": 22.378, |
|
"step": 3682 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 6e-06, |
|
"loss": 2.1348, |
|
"step": 4208 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 14.52, |
|
"eval_rouge2": 6.22, |
|
"eval_rougeL": 22.215, |
|
"step": 4208 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 2.0736, |
|
"step": 4734 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_gen_len": 17.94, |
|
"eval_rouge2": 6.56, |
|
"eval_rougeL": 22.336, |
|
"step": 4734 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 5e-06, |
|
"loss": 2.0202, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_gen_len": 16.91, |
|
"eval_rouge2": 6.38, |
|
"eval_rougeL": 21.603, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.5e-06, |
|
"loss": 1.9656, |
|
"step": 5786 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_gen_len": 17.7, |
|
"eval_rouge2": 6.6, |
|
"eval_rougeL": 22.292, |
|
"step": 5786 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.9173, |
|
"step": 6312 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_gen_len": 17.21, |
|
"eval_rouge2": 6.32, |
|
"eval_rougeL": 21.212, |
|
"step": 6312 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.5e-06, |
|
"loss": 1.8784, |
|
"step": 6838 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_gen_len": 18.93, |
|
"eval_rouge2": 6.95, |
|
"eval_rougeL": 22.939, |
|
"step": 6838 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3e-06, |
|
"loss": 1.8346, |
|
"step": 7364 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_gen_len": 19.19, |
|
"eval_rouge2": 6.42, |
|
"eval_rougeL": 21.364, |
|
"step": 7364 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.8006, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_gen_len": 18.08, |
|
"eval_rouge2": 5.86, |
|
"eval_rougeL": 21.227, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.7784, |
|
"step": 8416 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_gen_len": 17.06, |
|
"eval_rouge2": 6.49, |
|
"eval_rougeL": 21.942, |
|
"step": 8416 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 1.5e-06, |
|
"loss": 1.7526, |
|
"step": 8942 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_gen_len": 18.66, |
|
"eval_rouge2": 7.02, |
|
"eval_rougeL": 22.17, |
|
"step": 8942 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.7409, |
|
"step": 9468 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_gen_len": 17.86, |
|
"eval_rouge2": 6.65, |
|
"eval_rougeL": 21.959, |
|
"step": 9468 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 1.7238, |
|
"step": 9994 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_gen_len": 17.61, |
|
"eval_rouge2": 6.23, |
|
"eval_rougeL": 21.571, |
|
"step": 9994 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.7105, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_gen_len": 17.14, |
|
"eval_rouge2": 6.42, |
|
"eval_rougeL": 21.664, |
|
"step": 10520 |
|
} |
|
], |
|
"max_steps": 10520, |
|
"num_train_epochs": 20, |
|
"total_flos": 9621641311027200.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|