sci-five-radsum23 / trainer_state.json
chizhikchi's picture
initial commit
439800e
raw
history blame
9.6 kB
{
"best_metric": 27.4584,
"best_model_checkpoint": "vilmedic/SciFiverouge-1/checkpoint-66726",
"epoch": 35.99029126213592,
"global_step": 66726,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.0,
"learning_rate": 2.9538455297488754e-05,
"loss": 1.8057,
"step": 3707
},
{
"epoch": 2.0,
"eval_bleu": 83.8658,
"eval_f1_radgraph": 0.2199,
"eval_loss": 1.5417813062667847,
"eval_rouge1": 28.1224,
"eval_rouge2": 14.9915,
"eval_rougeL": 25.1977,
"eval_rougeLsum": 27.135,
"eval_runtime": 525.382,
"eval_samples_per_second": 14.11,
"eval_steps_per_second": 0.442,
"step": 3707
},
{
"epoch": 4.0,
"learning_rate": 2.8935795570433828e-05,
"loss": 1.5951,
"step": 7414
},
{
"epoch": 4.0,
"eval_bleu": 91.197,
"eval_f1_radgraph": 0.227,
"eval_loss": 1.4663870334625244,
"eval_rouge1": 28.885,
"eval_rouge2": 15.7599,
"eval_rougeL": 25.9785,
"eval_rougeLsum": 27.819,
"eval_runtime": 515.8882,
"eval_samples_per_second": 14.369,
"eval_steps_per_second": 0.45,
"step": 7414
},
{
"epoch": 6.0,
"learning_rate": 2.8333135843378902e-05,
"loss": 1.5199,
"step": 11121
},
{
"epoch": 6.0,
"eval_bleu": 87.253,
"eval_f1_radgraph": 0.2314,
"eval_loss": 1.4249236583709717,
"eval_rouge1": 29.0849,
"eval_rouge2": 16.0903,
"eval_rougeL": 26.1967,
"eval_rougeLsum": 28.0078,
"eval_runtime": 513.5301,
"eval_samples_per_second": 14.435,
"eval_steps_per_second": 0.452,
"step": 11121
},
{
"epoch": 8.0,
"learning_rate": 2.7730476116323972e-05,
"loss": 1.4705,
"step": 14828
},
{
"epoch": 8.0,
"eval_bleu": 85.3504,
"eval_f1_radgraph": 0.2342,
"eval_loss": 1.3992971181869507,
"eval_rouge1": 29.3725,
"eval_rouge2": 16.356,
"eval_rougeL": 26.4644,
"eval_rougeLsum": 28.2535,
"eval_runtime": 522.0475,
"eval_samples_per_second": 14.2,
"eval_steps_per_second": 0.444,
"step": 14828
},
{
"epoch": 10.0,
"learning_rate": 2.7127816389269046e-05,
"loss": 1.4326,
"step": 18535
},
{
"epoch": 10.0,
"eval_bleu": 90.5282,
"eval_f1_radgraph": 0.2341,
"eval_loss": 1.38131844997406,
"eval_rouge1": 29.5246,
"eval_rouge2": 16.41,
"eval_rougeL": 26.5568,
"eval_rougeLsum": 28.3933,
"eval_runtime": 515.287,
"eval_samples_per_second": 14.386,
"eval_steps_per_second": 0.45,
"step": 18535
},
{
"epoch": 12.0,
"learning_rate": 2.652515666221412e-05,
"loss": 1.4015,
"step": 22242
},
{
"epoch": 12.0,
"eval_bleu": 91.7941,
"eval_f1_radgraph": 0.2346,
"eval_loss": 1.366599440574646,
"eval_rouge1": 29.7344,
"eval_rouge2": 16.6795,
"eval_rougeL": 26.7433,
"eval_rougeLsum": 28.603,
"eval_runtime": 515.9852,
"eval_samples_per_second": 14.367,
"eval_steps_per_second": 0.45,
"step": 22242
},
{
"epoch": 14.0,
"learning_rate": 2.5922496935159194e-05,
"loss": 1.3756,
"step": 25949
},
{
"epoch": 14.0,
"eval_bleu": 92.9315,
"eval_f1_radgraph": 0.2377,
"eval_loss": 1.356188178062439,
"eval_rouge1": 29.8231,
"eval_rouge2": 16.7219,
"eval_rougeL": 26.8298,
"eval_rougeLsum": 28.6547,
"eval_runtime": 526.1612,
"eval_samples_per_second": 14.089,
"eval_steps_per_second": 0.441,
"step": 25949
},
{
"epoch": 16.0,
"learning_rate": 2.5319837208104264e-05,
"loss": 1.3534,
"step": 29656
},
{
"epoch": 16.0,
"eval_bleu": 90.1592,
"eval_f1_radgraph": 0.2402,
"eval_loss": 1.348175048828125,
"eval_rouge1": 29.9244,
"eval_rouge2": 16.8767,
"eval_rougeL": 26.9699,
"eval_rougeLsum": 28.7771,
"eval_runtime": 517.9401,
"eval_samples_per_second": 14.312,
"eval_steps_per_second": 0.448,
"step": 29656
},
{
"epoch": 18.0,
"learning_rate": 2.4717177481049338e-05,
"loss": 1.3335,
"step": 33363
},
{
"epoch": 18.0,
"eval_bleu": 93.8733,
"eval_f1_radgraph": 0.2412,
"eval_loss": 1.3401567935943604,
"eval_rouge1": 29.9866,
"eval_rouge2": 16.8753,
"eval_rougeL": 26.967,
"eval_rougeLsum": 28.8217,
"eval_runtime": 520.2162,
"eval_samples_per_second": 14.25,
"eval_steps_per_second": 0.446,
"step": 33363
},
{
"epoch": 19.99,
"learning_rate": 2.4114517753994412e-05,
"loss": 1.3157,
"step": 37070
},
{
"epoch": 19.99,
"eval_bleu": 90.6225,
"eval_f1_radgraph": 0.2409,
"eval_loss": 1.3334068059921265,
"eval_rouge1": 30.0245,
"eval_rouge2": 17.0323,
"eval_rougeL": 27.0771,
"eval_rougeLsum": 28.8866,
"eval_runtime": 523.6658,
"eval_samples_per_second": 14.156,
"eval_steps_per_second": 0.443,
"step": 37070
},
{
"epoch": 21.99,
"learning_rate": 2.3511858026939486e-05,
"loss": 1.2994,
"step": 40777
},
{
"epoch": 21.99,
"eval_bleu": 93.6108,
"eval_f1_radgraph": 0.2403,
"eval_loss": 1.3307315111160278,
"eval_rouge1": 29.9589,
"eval_rouge2": 16.9066,
"eval_rougeL": 26.9681,
"eval_rougeLsum": 28.7908,
"eval_runtime": 516.1874,
"eval_samples_per_second": 14.361,
"eval_steps_per_second": 0.449,
"step": 40777
},
{
"epoch": 23.99,
"learning_rate": 2.290919829988456e-05,
"loss": 1.2843,
"step": 44484
},
{
"epoch": 23.99,
"eval_bleu": 95.5973,
"eval_f1_radgraph": 0.241,
"eval_loss": 1.3280918598175049,
"eval_rouge1": 30.1835,
"eval_rouge2": 17.1623,
"eval_rougeL": 27.182,
"eval_rougeLsum": 29.0235,
"eval_runtime": 517.0183,
"eval_samples_per_second": 14.338,
"eval_steps_per_second": 0.449,
"step": 44484
},
{
"epoch": 25.99,
"learning_rate": 2.230653857282963e-05,
"loss": 1.2693,
"step": 48191
},
{
"epoch": 25.99,
"eval_bleu": 94.2149,
"eval_f1_radgraph": 0.2427,
"eval_loss": 1.3227483034133911,
"eval_rouge1": 30.2847,
"eval_rouge2": 17.2726,
"eval_rougeL": 27.2884,
"eval_rougeLsum": 29.1106,
"eval_runtime": 529.2786,
"eval_samples_per_second": 14.006,
"eval_steps_per_second": 0.438,
"step": 48191
},
{
"epoch": 27.99,
"learning_rate": 2.1703878845774707e-05,
"loss": 1.2564,
"step": 51898
},
{
"epoch": 27.99,
"eval_bleu": 94.0952,
"eval_f1_radgraph": 0.2446,
"eval_loss": 1.3205522298812866,
"eval_rouge1": 30.2975,
"eval_rouge2": 17.3352,
"eval_rougeL": 27.3306,
"eval_rougeLsum": 29.1631,
"eval_runtime": 522.5234,
"eval_samples_per_second": 14.187,
"eval_steps_per_second": 0.444,
"step": 51898
},
{
"epoch": 29.99,
"learning_rate": 2.1101219118719778e-05,
"loss": 1.2453,
"step": 55605
},
{
"epoch": 29.99,
"eval_bleu": 93.4725,
"eval_f1_radgraph": 0.2429,
"eval_loss": 1.3210633993148804,
"eval_rouge1": 30.2609,
"eval_rouge2": 17.3045,
"eval_rougeL": 27.2887,
"eval_rougeLsum": 29.0797,
"eval_runtime": 520.8063,
"eval_samples_per_second": 14.234,
"eval_steps_per_second": 0.445,
"step": 55605
},
{
"epoch": 31.99,
"learning_rate": 2.0498559391664852e-05,
"loss": 1.2332,
"step": 59312
},
{
"epoch": 31.99,
"eval_bleu": 90.7444,
"eval_f1_radgraph": 0.2423,
"eval_loss": 1.3173481225967407,
"eval_rouge1": 30.2939,
"eval_rouge2": 17.3007,
"eval_rougeL": 27.2658,
"eval_rougeLsum": 29.1106,
"eval_runtime": 520.4251,
"eval_samples_per_second": 14.244,
"eval_steps_per_second": 0.446,
"step": 59312
},
{
"epoch": 33.99,
"learning_rate": 1.9895899664609926e-05,
"loss": 1.2229,
"step": 63019
},
{
"epoch": 33.99,
"eval_bleu": 90.9915,
"eval_f1_radgraph": 0.2444,
"eval_loss": 1.317107081413269,
"eval_rouge1": 30.3679,
"eval_rouge2": 17.3809,
"eval_rougeL": 27.3829,
"eval_rougeLsum": 29.1488,
"eval_runtime": 520.2024,
"eval_samples_per_second": 14.25,
"eval_steps_per_second": 0.446,
"step": 63019
},
{
"epoch": 35.99,
"learning_rate": 1.9293239937555e-05,
"loss": 1.2123,
"step": 66726
},
{
"epoch": 35.99,
"eval_bleu": 96.0056,
"eval_f1_radgraph": 0.2452,
"eval_loss": 1.3161959648132324,
"eval_rouge1": 30.438,
"eval_rouge2": 17.4316,
"eval_rougeL": 27.4584,
"eval_rougeLsum": 29.2665,
"eval_runtime": 518.103,
"eval_samples_per_second": 14.308,
"eval_steps_per_second": 0.448,
"step": 66726
}
],
"max_steps": 185400,
"num_train_epochs": 100,
"total_flos": 1.1641360206370867e+18,
"trial_name": null,
"trial_params": null
}