flanT5-xl-3 / last-checkpoint /trainer_state.json
devvanshhh's picture
Training in progress, epoch 6, checkpoint
ae01208
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.0,
"eval_steps": 500,
"global_step": 2172,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_gen_len": 11.021739130434783,
"eval_loss": 5.008267879486084,
"eval_rouge1": 29.3556,
"eval_rouge2": 23.1164,
"eval_rougeL": 26.8076,
"eval_rougeLsum": 26.8263,
"eval_runtime": 173.025,
"eval_samples_per_second": 1.861,
"eval_steps_per_second": 0.237,
"step": 362
},
{
"epoch": 1.38,
"learning_rate": 3e-05,
"loss": 19.6995,
"step": 500
},
{
"epoch": 2.0,
"eval_gen_len": 10.335403726708075,
"eval_loss": 0.8298134207725525,
"eval_rouge1": 32.0346,
"eval_rouge2": 24.9074,
"eval_rougeL": 28.9355,
"eval_rougeLsum": 29.0842,
"eval_runtime": 150.6955,
"eval_samples_per_second": 2.137,
"eval_steps_per_second": 0.272,
"step": 724
},
{
"epoch": 2.76,
"learning_rate": 2.5192307692307694e-05,
"loss": 0.9076,
"step": 1000
},
{
"epoch": 3.0,
"eval_gen_len": 10.863354037267081,
"eval_loss": 0.7763211131095886,
"eval_rouge1": 31.1555,
"eval_rouge2": 23.6164,
"eval_rougeL": 27.9825,
"eval_rougeLsum": 28.1607,
"eval_runtime": 162.2266,
"eval_samples_per_second": 1.985,
"eval_steps_per_second": 0.253,
"step": 1086
},
{
"epoch": 4.0,
"eval_gen_len": 12.23913043478261,
"eval_loss": 0.7397241592407227,
"eval_rouge1": 28.3909,
"eval_rouge2": 20.1283,
"eval_rougeL": 24.9491,
"eval_rougeLsum": 25.0633,
"eval_runtime": 160.7559,
"eval_samples_per_second": 2.003,
"eval_steps_per_second": 0.255,
"step": 1448
},
{
"epoch": 4.14,
"learning_rate": 2.0384615384615387e-05,
"loss": 0.8095,
"step": 1500
},
{
"epoch": 5.0,
"eval_gen_len": 11.658385093167702,
"eval_loss": 0.7186636328697205,
"eval_rouge1": 30.3081,
"eval_rouge2": 22.0426,
"eval_rougeL": 26.8549,
"eval_rougeLsum": 27.0402,
"eval_runtime": 161.9975,
"eval_samples_per_second": 1.988,
"eval_steps_per_second": 0.253,
"step": 1810
},
{
"epoch": 5.52,
"learning_rate": 1.557692307692308e-05,
"loss": 0.7683,
"step": 2000
},
{
"epoch": 6.0,
"eval_gen_len": 11.70807453416149,
"eval_loss": 0.7056237459182739,
"eval_rouge1": 30.3901,
"eval_rouge2": 21.9556,
"eval_rougeL": 26.8053,
"eval_rougeLsum": 27.0162,
"eval_runtime": 161.96,
"eval_samples_per_second": 1.988,
"eval_steps_per_second": 0.253,
"step": 2172
}
],
"logging_steps": 500,
"max_steps": 3620,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 1.164912412852224e+16,
"trial_name": null,
"trial_params": null
}