beto2beto-mlsum / trainer_state.json
LeoCordoba's picture
commit files to HF hub
b52cb14
raw
history blame
5.08 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.999519461797213,
"global_step": 7280,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.48,
"learning_rate": 4.656593406593407e-05,
"loss": 1.5943,
"step": 500
},
{
"epoch": 0.96,
"learning_rate": 4.3131868131868134e-05,
"loss": 1.6732,
"step": 1000
},
{
"epoch": 1.0,
"eval_gen_len": 19.2321,
"eval_loss": 2.4691147804260254,
"eval_rouge1": 25.8418,
"eval_rouge2": 8.7011,
"eval_rougeL": 21.1569,
"eval_rougeLsum": 21.4706,
"eval_runtime": 1188.0405,
"eval_samples_per_second": 8.719,
"step": 1040
},
{
"epoch": 1.44,
"learning_rate": 3.96978021978022e-05,
"loss": 1.9331,
"step": 1500
},
{
"epoch": 1.92,
"learning_rate": 3.6263736263736266e-05,
"loss": 1.9543,
"step": 2000
},
{
"epoch": 2.0,
"eval_gen_len": 19.2853,
"eval_loss": 2.4091384410858154,
"eval_rouge1": 26.1697,
"eval_rouge2": 9.222,
"eval_rougeL": 21.5166,
"eval_rougeLsum": 21.8758,
"eval_runtime": 1168.6884,
"eval_samples_per_second": 8.863,
"step": 2080
},
{
"epoch": 2.4,
"learning_rate": 3.282967032967033e-05,
"loss": 1.8371,
"step": 2500
},
{
"epoch": 2.88,
"learning_rate": 2.9395604395604398e-05,
"loss": 1.8282,
"step": 3000
},
{
"epoch": 3.0,
"eval_gen_len": 19.2576,
"eval_loss": 2.42261004447937,
"eval_rouge1": 26.1165,
"eval_rouge2": 9.2438,
"eval_rougeL": 21.5311,
"eval_rougeLsum": 21.8577,
"eval_runtime": 1236.3561,
"eval_samples_per_second": 8.378,
"step": 3120
},
{
"epoch": 3.37,
"learning_rate": 2.5961538461538464e-05,
"loss": 1.7419,
"step": 3500
},
{
"epoch": 3.85,
"learning_rate": 2.252747252747253e-05,
"loss": 1.7287,
"step": 4000
},
{
"epoch": 4.0,
"eval_gen_len": 19.2905,
"eval_loss": 2.4440300464630127,
"eval_rouge1": 26.2529,
"eval_rouge2": 9.3031,
"eval_rougeL": 21.5955,
"eval_rougeLsum": 21.9465,
"eval_runtime": 1217.0172,
"eval_samples_per_second": 8.511,
"step": 4160
},
{
"epoch": 4.33,
"learning_rate": 1.9093406593406592e-05,
"loss": 1.6642,
"step": 4500
},
{
"epoch": 4.81,
"learning_rate": 1.565934065934066e-05,
"loss": 1.6481,
"step": 5000
},
{
"epoch": 5.0,
"eval_gen_len": 19.1729,
"eval_loss": 2.471247673034668,
"eval_rouge1": 26.1789,
"eval_rouge2": 9.2847,
"eval_rougeL": 21.5357,
"eval_rougeLsum": 21.8938,
"eval_runtime": 1247.3952,
"eval_samples_per_second": 8.304,
"step": 5200
},
{
"epoch": 5.29,
"learning_rate": 1.2225274725274726e-05,
"loss": 1.6071,
"step": 5500
},
{
"epoch": 5.77,
"learning_rate": 8.791208791208792e-06,
"loss": 1.5796,
"step": 6000
},
{
"epoch": 6.0,
"eval_gen_len": 19.2078,
"eval_loss": 2.485071897506714,
"eval_rouge1": 26.07,
"eval_rouge2": 9.1429,
"eval_rougeL": 21.414,
"eval_rougeLsum": 21.7483,
"eval_runtime": 1301.5565,
"eval_samples_per_second": 7.958,
"step": 6240
},
{
"epoch": 6.25,
"learning_rate": 5.357142857142857e-06,
"loss": 1.5558,
"step": 6500
},
{
"epoch": 6.73,
"learning_rate": 1.9230769230769234e-06,
"loss": 1.5319,
"step": 7000
},
{
"epoch": 7.0,
"eval_gen_len": 19.2394,
"eval_loss": 2.5021677017211914,
"eval_rouge1": 26.1256,
"eval_rouge2": 9.2552,
"eval_rougeL": 21.4899,
"eval_rougeLsum": 21.8194,
"eval_runtime": 1250.4536,
"eval_samples_per_second": 8.283,
"step": 7280
},
{
"epoch": 7.0,
"step": 7280,
"total_flos": 1.5986426003154386e+18,
"train_runtime": 137981.946,
"train_samples_per_second": 0.053
},
{
"epoch": 7.0,
"eval_gen_len": 19.2394,
"eval_loss": 2.5021677017211914,
"eval_rouge1": 26.1256,
"eval_rouge2": 9.2552,
"eval_rougeL": 21.4899,
"eval_rougeLsum": 21.8194,
"eval_runtime": 1276.236,
"eval_samples_per_second": 8.116,
"step": 7280
},
{
"epoch": 7.0,
"eval_gen_len": 19.2463,
"eval_loss": 2.57672381401062,
"eval_rouge1": 25.8639,
"eval_rouge2": 8.911,
"eval_rougeL": 21.2426,
"eval_rougeLsum": 21.5859,
"eval_runtime": 1713.2216,
"eval_samples_per_second": 8.125,
"step": 7280
}
],
"max_steps": 7280,
"num_train_epochs": 7,
"total_flos": 1.5986426003154386e+18,
"trial_name": null,
"trial_params": null
}