LLM_Teached_Pegasus / trainer_state.json
GlycerinLOL's picture
End of training
9f7119a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 500,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.8,
"learning_rate": 1.6000000000000003e-05,
"loss": 2.0887,
"step": 500
},
{
"epoch": 1.0,
"eval_f1": 0.9023,
"eval_gen_len": 31.24818181818182,
"eval_loss": 1.7361507415771484,
"eval_precision": 0.9035,
"eval_recall": 0.9015,
"eval_rouge1": 0.4326,
"eval_rouge2": 0.1871,
"eval_rougeL": 0.3375,
"eval_rougeLsum": 0.3373,
"eval_runtime": 386.0456,
"eval_samples_per_second": 2.849,
"eval_steps_per_second": 0.357,
"step": 625
},
{
"epoch": 1.6,
"learning_rate": 1.2e-05,
"loss": 1.8362,
"step": 1000
},
{
"epoch": 2.0,
"eval_f1": 0.905,
"eval_gen_len": 30.303636363636365,
"eval_loss": 1.6843606233596802,
"eval_precision": 0.9071,
"eval_recall": 0.9032,
"eval_rouge1": 0.4466,
"eval_rouge2": 0.1942,
"eval_rougeL": 0.3511,
"eval_rougeLsum": 0.3507,
"eval_runtime": 351.8932,
"eval_samples_per_second": 3.126,
"eval_steps_per_second": 0.392,
"step": 1250
},
{
"epoch": 2.4,
"learning_rate": 8.000000000000001e-06,
"loss": 1.7784,
"step": 1500
},
{
"epoch": 3.0,
"eval_f1": 0.9056,
"eval_gen_len": 30.79909090909091,
"eval_loss": 1.6666187047958374,
"eval_precision": 0.907,
"eval_recall": 0.9045,
"eval_rouge1": 0.451,
"eval_rouge2": 0.1992,
"eval_rougeL": 0.3554,
"eval_rougeLsum": 0.3551,
"eval_runtime": 352.5825,
"eval_samples_per_second": 3.12,
"eval_steps_per_second": 0.391,
"step": 1875
},
{
"epoch": 3.2,
"learning_rate": 4.000000000000001e-06,
"loss": 1.7543,
"step": 2000
},
{
"epoch": 4.0,
"learning_rate": 0.0,
"loss": 1.7261,
"step": 2500
},
{
"epoch": 4.0,
"eval_f1": 0.9064,
"eval_gen_len": 30.85090909090909,
"eval_loss": 1.6605653762817383,
"eval_precision": 0.9078,
"eval_recall": 0.9053,
"eval_rouge1": 0.4557,
"eval_rouge2": 0.2019,
"eval_rougeL": 0.3603,
"eval_rougeLsum": 0.3597,
"eval_runtime": 359.5712,
"eval_samples_per_second": 3.059,
"eval_steps_per_second": 0.384,
"step": 2500
},
{
"epoch": 4.0,
"step": 2500,
"total_flos": 1.9957763220524237e+17,
"train_loss": 1.8367231689453125,
"train_runtime": 11495.8338,
"train_samples_per_second": 6.959,
"train_steps_per_second": 0.217
}
],
"logging_steps": 500,
"max_steps": 2500,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"total_flos": 1.9957763220524237e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}