|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.776824034334766, |
|
"eval_steps": 500, |
|
"global_step": 720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001, |
|
"loss": 3.6024, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_bleu": 4.4454, |
|
"eval_bp": 0.6832, |
|
"eval_counts_1": 5645, |
|
"eval_counts_2": 1343, |
|
"eval_counts_3": 424, |
|
"eval_counts_4": 109, |
|
"eval_exact_match": 0.0005, |
|
"eval_f1": 0.2236, |
|
"eval_gen_len": 11.6338, |
|
"eval_loss": 2.468198776245117, |
|
"eval_precisions_1": 36.6844, |
|
"eval_precisions_2": 10.1866, |
|
"eval_precisions_3": 3.8616, |
|
"eval_precisions_4": 1.242, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2285, |
|
"eval_rouge2": 0.0824, |
|
"eval_rougeL": 0.2192, |
|
"eval_rougeLsum": 0.2188, |
|
"eval_runtime": 813.9917, |
|
"eval_samples_per_second": 2.708, |
|
"eval_steps_per_second": 0.677, |
|
"eval_sys_len": 15388, |
|
"eval_totals_1": 15388, |
|
"eval_totals_2": 13184, |
|
"eval_totals_3": 10980, |
|
"eval_totals_4": 8776, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0001, |
|
"loss": 2.9671, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_bleu": 5.7163, |
|
"eval_bp": 0.7259, |
|
"eval_counts_1": 5988, |
|
"eval_counts_2": 1562, |
|
"eval_counts_3": 569, |
|
"eval_counts_4": 179, |
|
"eval_exact_match": 0.0018, |
|
"eval_f1": 0.2401, |
|
"eval_gen_len": 12.314, |
|
"eval_loss": 2.244511842727661, |
|
"eval_precisions_1": 37.2064, |
|
"eval_precisions_2": 11.2455, |
|
"eval_precisions_3": 4.8691, |
|
"eval_precisions_4": 1.8878, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2465, |
|
"eval_rouge2": 0.0971, |
|
"eval_rougeL": 0.2371, |
|
"eval_rougeLsum": 0.2371, |
|
"eval_runtime": 802.4783, |
|
"eval_samples_per_second": 2.746, |
|
"eval_steps_per_second": 0.687, |
|
"eval_sys_len": 16094, |
|
"eval_totals_1": 16094, |
|
"eval_totals_2": 13890, |
|
"eval_totals_3": 11686, |
|
"eval_totals_4": 9482, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0001, |
|
"loss": 2.6324, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_bleu": 6.9028, |
|
"eval_bp": 0.7887, |
|
"eval_counts_1": 6539, |
|
"eval_counts_2": 1846, |
|
"eval_counts_3": 702, |
|
"eval_counts_4": 240, |
|
"eval_exact_match": 0.0027, |
|
"eval_f1": 0.2663, |
|
"eval_gen_len": 13.2319, |
|
"eval_loss": 2.122749090194702, |
|
"eval_precisions_1": 38.0772, |
|
"eval_precisions_2": 12.3322, |
|
"eval_precisions_3": 5.4994, |
|
"eval_precisions_4": 2.2725, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2729, |
|
"eval_rouge2": 0.1154, |
|
"eval_rougeL": 0.2601, |
|
"eval_rougeLsum": 0.2604, |
|
"eval_runtime": 822.9261, |
|
"eval_samples_per_second": 2.678, |
|
"eval_steps_per_second": 0.67, |
|
"eval_sys_len": 17173, |
|
"eval_totals_1": 17173, |
|
"eval_totals_2": 14969, |
|
"eval_totals_3": 12765, |
|
"eval_totals_4": 10561, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 0.0001, |
|
"loss": 2.5557, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"eval_bleu": 7.3331, |
|
"eval_bp": 0.7179, |
|
"eval_counts_1": 6491, |
|
"eval_counts_2": 1923, |
|
"eval_counts_3": 752, |
|
"eval_counts_4": 275, |
|
"eval_exact_match": 0.0059, |
|
"eval_f1": 0.2729, |
|
"eval_gen_len": 12.0962, |
|
"eval_loss": 2.035691022872925, |
|
"eval_precisions_1": 40.6679, |
|
"eval_precisions_2": 13.9783, |
|
"eval_precisions_3": 6.5091, |
|
"eval_precisions_4": 2.9415, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2783, |
|
"eval_rouge2": 0.1214, |
|
"eval_rougeL": 0.2676, |
|
"eval_rougeLsum": 0.2678, |
|
"eval_runtime": 786.7967, |
|
"eval_samples_per_second": 2.801, |
|
"eval_steps_per_second": 0.7, |
|
"eval_sys_len": 15961, |
|
"eval_totals_1": 15961, |
|
"eval_totals_2": 13757, |
|
"eval_totals_3": 11553, |
|
"eval_totals_4": 9349, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0001, |
|
"loss": 2.3785, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 8.2007, |
|
"eval_bp": 0.7463, |
|
"eval_counts_1": 6808, |
|
"eval_counts_2": 2113, |
|
"eval_counts_3": 855, |
|
"eval_counts_4": 328, |
|
"eval_exact_match": 0.0064, |
|
"eval_f1": 0.2892, |
|
"eval_gen_len": 12.6819, |
|
"eval_loss": 1.9824347496032715, |
|
"eval_precisions_1": 41.4137, |
|
"eval_precisions_2": 14.8437, |
|
"eval_precisions_3": 7.1066, |
|
"eval_precisions_4": 3.3377, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2948, |
|
"eval_rouge2": 0.1326, |
|
"eval_rougeL": 0.2825, |
|
"eval_rougeLsum": 0.2825, |
|
"eval_runtime": 806.3535, |
|
"eval_samples_per_second": 2.733, |
|
"eval_steps_per_second": 0.683, |
|
"eval_sys_len": 16439, |
|
"eval_totals_1": 16439, |
|
"eval_totals_2": 14235, |
|
"eval_totals_3": 12031, |
|
"eval_totals_4": 9827, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.0001, |
|
"loss": 2.3396, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_bleu": 8.639, |
|
"eval_bp": 0.7702, |
|
"eval_counts_1": 7033, |
|
"eval_counts_2": 2194, |
|
"eval_counts_3": 886, |
|
"eval_counts_4": 364, |
|
"eval_exact_match": 0.0086, |
|
"eval_f1": 0.3, |
|
"eval_gen_len": 13.0254, |
|
"eval_loss": 1.9448895454406738, |
|
"eval_precisions_1": 41.7364, |
|
"eval_precisions_2": 14.9792, |
|
"eval_precisions_3": 7.1205, |
|
"eval_precisions_4": 3.555, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3044, |
|
"eval_rouge2": 0.1373, |
|
"eval_rougeL": 0.292, |
|
"eval_rougeLsum": 0.2922, |
|
"eval_runtime": 473.2306, |
|
"eval_samples_per_second": 4.657, |
|
"eval_steps_per_second": 1.164, |
|
"eval_sys_len": 16851, |
|
"eval_totals_1": 16851, |
|
"eval_totals_2": 14647, |
|
"eval_totals_3": 12443, |
|
"eval_totals_4": 10239, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 0.0001, |
|
"loss": 2.2557, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_bleu": 9.049, |
|
"eval_bp": 0.7515, |
|
"eval_counts_1": 7167, |
|
"eval_counts_2": 2285, |
|
"eval_counts_3": 939, |
|
"eval_counts_4": 389, |
|
"eval_exact_match": 0.0095, |
|
"eval_f1": 0.3119, |
|
"eval_gen_len": 12.7119, |
|
"eval_loss": 1.8937886953353882, |
|
"eval_precisions_1": 43.3602, |
|
"eval_precisions_2": 15.9511, |
|
"eval_precisions_3": 7.7469, |
|
"eval_precisions_4": 3.9226, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3166, |
|
"eval_rouge2": 0.1428, |
|
"eval_rougeL": 0.3043, |
|
"eval_rougeLsum": 0.3046, |
|
"eval_runtime": 453.3958, |
|
"eval_samples_per_second": 4.861, |
|
"eval_steps_per_second": 1.215, |
|
"eval_sys_len": 16529, |
|
"eval_totals_1": 16529, |
|
"eval_totals_2": 14325, |
|
"eval_totals_3": 12121, |
|
"eval_totals_4": 9917, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 0.0001, |
|
"loss": 2.1168, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"eval_bleu": 9.6447, |
|
"eval_bp": 0.7708, |
|
"eval_counts_1": 7347, |
|
"eval_counts_2": 2425, |
|
"eval_counts_3": 1021, |
|
"eval_counts_4": 425, |
|
"eval_exact_match": 0.0104, |
|
"eval_f1": 0.3211, |
|
"eval_gen_len": 12.9374, |
|
"eval_loss": 1.857459306716919, |
|
"eval_precisions_1": 43.5765, |
|
"eval_precisions_2": 16.5461, |
|
"eval_precisions_3": 8.1995, |
|
"eval_precisions_4": 4.1472, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3258, |
|
"eval_rouge2": 0.1505, |
|
"eval_rougeL": 0.3137, |
|
"eval_rougeLsum": 0.3142, |
|
"eval_runtime": 457.8255, |
|
"eval_samples_per_second": 4.814, |
|
"eval_steps_per_second": 1.204, |
|
"eval_sys_len": 16860, |
|
"eval_totals_1": 16860, |
|
"eval_totals_2": 14656, |
|
"eval_totals_3": 12452, |
|
"eval_totals_4": 10248, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 0.0001, |
|
"loss": 2.1105, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"eval_bleu": 9.9436, |
|
"eval_bp": 0.7807, |
|
"eval_counts_1": 7460, |
|
"eval_counts_2": 2461, |
|
"eval_counts_3": 1061, |
|
"eval_counts_4": 449, |
|
"eval_exact_match": 0.0095, |
|
"eval_f1": 0.3267, |
|
"eval_gen_len": 13.1828, |
|
"eval_loss": 1.8283559083938599, |
|
"eval_precisions_1": 43.7948, |
|
"eval_precisions_2": 16.5947, |
|
"eval_precisions_3": 8.4033, |
|
"eval_precisions_4": 4.3082, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3317, |
|
"eval_rouge2": 0.1521, |
|
"eval_rougeL": 0.3187, |
|
"eval_rougeLsum": 0.3191, |
|
"eval_runtime": 464.6, |
|
"eval_samples_per_second": 4.744, |
|
"eval_steps_per_second": 1.186, |
|
"eval_sys_len": 17034, |
|
"eval_totals_1": 17034, |
|
"eval_totals_2": 14830, |
|
"eval_totals_3": 12626, |
|
"eval_totals_4": 10422, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9913, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 10.3601, |
|
"eval_bp": 0.7791, |
|
"eval_counts_1": 7547, |
|
"eval_counts_2": 2537, |
|
"eval_counts_3": 1105, |
|
"eval_counts_4": 487, |
|
"eval_exact_match": 0.0113, |
|
"eval_f1": 0.3316, |
|
"eval_gen_len": 13.0358, |
|
"eval_loss": 1.8056522607803345, |
|
"eval_precisions_1": 44.3811, |
|
"eval_precisions_2": 17.1407, |
|
"eval_precisions_3": 8.7719, |
|
"eval_precisions_4": 4.6858, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.335, |
|
"eval_rouge2": 0.1566, |
|
"eval_rougeL": 0.323, |
|
"eval_rougeLsum": 0.3233, |
|
"eval_runtime": 492.674, |
|
"eval_samples_per_second": 4.474, |
|
"eval_steps_per_second": 1.118, |
|
"eval_sys_len": 17005, |
|
"eval_totals_1": 17005, |
|
"eval_totals_2": 14801, |
|
"eval_totals_3": 12597, |
|
"eval_totals_4": 10393, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9943, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_bleu": 10.5378, |
|
"eval_bp": 0.7697, |
|
"eval_counts_1": 7629, |
|
"eval_counts_2": 2574, |
|
"eval_counts_3": 1131, |
|
"eval_counts_4": 496, |
|
"eval_exact_match": 0.0113, |
|
"eval_f1": 0.3385, |
|
"eval_gen_len": 13.0154, |
|
"eval_loss": 1.7973003387451172, |
|
"eval_precisions_1": 45.2975, |
|
"eval_precisions_2": 17.5844, |
|
"eval_precisions_3": 9.096, |
|
"eval_precisions_4": 4.8485, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.343, |
|
"eval_rouge2": 0.1594, |
|
"eval_rougeL": 0.3296, |
|
"eval_rougeLsum": 0.33, |
|
"eval_runtime": 454.7448, |
|
"eval_samples_per_second": 4.847, |
|
"eval_steps_per_second": 1.212, |
|
"eval_sys_len": 16842, |
|
"eval_totals_1": 16842, |
|
"eval_totals_2": 14638, |
|
"eval_totals_3": 12434, |
|
"eval_totals_4": 10230, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"learning_rate": 0.0001, |
|
"loss": 1.941, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"eval_bleu": 10.8273, |
|
"eval_bp": 0.7848, |
|
"eval_counts_1": 7681, |
|
"eval_counts_2": 2606, |
|
"eval_counts_3": 1164, |
|
"eval_counts_4": 528, |
|
"eval_exact_match": 0.0132, |
|
"eval_f1": 0.3385, |
|
"eval_gen_len": 13.1361, |
|
"eval_loss": 1.777303695678711, |
|
"eval_precisions_1": 44.905, |
|
"eval_precisions_2": 17.4888, |
|
"eval_precisions_3": 9.1675, |
|
"eval_precisions_4": 5.0319, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3421, |
|
"eval_rouge2": 0.1607, |
|
"eval_rougeL": 0.3295, |
|
"eval_rougeLsum": 0.3294, |
|
"eval_runtime": 458.5033, |
|
"eval_samples_per_second": 4.807, |
|
"eval_steps_per_second": 1.202, |
|
"eval_sys_len": 17105, |
|
"eval_totals_1": 17105, |
|
"eval_totals_2": 14901, |
|
"eval_totals_3": 12697, |
|
"eval_totals_4": 10493, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8453, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_bleu": 11.2687, |
|
"eval_bp": 0.7972, |
|
"eval_counts_1": 7817, |
|
"eval_counts_2": 2700, |
|
"eval_counts_3": 1224, |
|
"eval_counts_4": 560, |
|
"eval_exact_match": 0.0127, |
|
"eval_f1": 0.3447, |
|
"eval_gen_len": 13.5018, |
|
"eval_loss": 1.7595148086547852, |
|
"eval_precisions_1": 45.1224, |
|
"eval_precisions_2": 17.8571, |
|
"eval_precisions_3": 9.4766, |
|
"eval_precisions_4": 5.2278, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3492, |
|
"eval_rouge2": 0.1662, |
|
"eval_rougeL": 0.3367, |
|
"eval_rougeLsum": 0.3367, |
|
"eval_runtime": 465.5444, |
|
"eval_samples_per_second": 4.734, |
|
"eval_steps_per_second": 1.184, |
|
"eval_sys_len": 17324, |
|
"eval_totals_1": 17324, |
|
"eval_totals_2": 15120, |
|
"eval_totals_3": 12916, |
|
"eval_totals_4": 10712, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"learning_rate": 0.0001, |
|
"loss": 1.85, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"eval_bleu": 10.9825, |
|
"eval_bp": 0.8025, |
|
"eval_counts_1": 7792, |
|
"eval_counts_2": 2642, |
|
"eval_counts_3": 1182, |
|
"eval_counts_4": 537, |
|
"eval_exact_match": 0.0127, |
|
"eval_f1": 0.3416, |
|
"eval_gen_len": 13.5395, |
|
"eval_loss": 1.7414402961730957, |
|
"eval_precisions_1": 44.7379, |
|
"eval_precisions_2": 17.3667, |
|
"eval_precisions_3": 9.086, |
|
"eval_precisions_4": 4.9699, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3458, |
|
"eval_rouge2": 0.1632, |
|
"eval_rougeL": 0.3322, |
|
"eval_rougeLsum": 0.3322, |
|
"eval_runtime": 468.8552, |
|
"eval_samples_per_second": 4.701, |
|
"eval_steps_per_second": 1.175, |
|
"eval_sys_len": 17417, |
|
"eval_totals_1": 17417, |
|
"eval_totals_2": 15213, |
|
"eval_totals_3": 13009, |
|
"eval_totals_4": 10805, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7588, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 11.3189, |
|
"eval_bp": 0.7939, |
|
"eval_counts_1": 7827, |
|
"eval_counts_2": 2702, |
|
"eval_counts_3": 1223, |
|
"eval_counts_4": 569, |
|
"eval_exact_match": 0.015, |
|
"eval_f1": 0.3446, |
|
"eval_gen_len": 13.3026, |
|
"eval_loss": 1.7346255779266357, |
|
"eval_precisions_1": 45.3345, |
|
"eval_precisions_2": 17.9404, |
|
"eval_precisions_3": 9.5123, |
|
"eval_precisions_4": 5.3412, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3487, |
|
"eval_rouge2": 0.1661, |
|
"eval_rougeL": 0.3355, |
|
"eval_rougeLsum": 0.3354, |
|
"eval_runtime": 464.8491, |
|
"eval_samples_per_second": 4.741, |
|
"eval_steps_per_second": 1.185, |
|
"eval_sys_len": 17265, |
|
"eval_totals_1": 17265, |
|
"eval_totals_2": 15061, |
|
"eval_totals_3": 12857, |
|
"eval_totals_4": 10653, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7663, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"eval_bleu": 11.5245, |
|
"eval_bp": 0.8032, |
|
"eval_counts_1": 7946, |
|
"eval_counts_2": 2757, |
|
"eval_counts_3": 1245, |
|
"eval_counts_4": 581, |
|
"eval_exact_match": 0.0154, |
|
"eval_f1": 0.3501, |
|
"eval_gen_len": 13.4515, |
|
"eval_loss": 1.7190728187561035, |
|
"eval_precisions_1": 45.5855, |
|
"eval_precisions_2": 18.106, |
|
"eval_precisions_3": 9.56, |
|
"eval_precisions_4": 5.3702, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3544, |
|
"eval_rouge2": 0.1695, |
|
"eval_rougeL": 0.3418, |
|
"eval_rougeLsum": 0.3416, |
|
"eval_runtime": 465.8123, |
|
"eval_samples_per_second": 4.732, |
|
"eval_steps_per_second": 1.183, |
|
"eval_sys_len": 17431, |
|
"eval_totals_1": 17431, |
|
"eval_totals_2": 15227, |
|
"eval_totals_3": 13023, |
|
"eval_totals_4": 10819, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7317, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"eval_bleu": 12.0845, |
|
"eval_bp": 0.8212, |
|
"eval_counts_1": 8068, |
|
"eval_counts_2": 2844, |
|
"eval_counts_3": 1325, |
|
"eval_counts_4": 633, |
|
"eval_exact_match": 0.0163, |
|
"eval_f1": 0.3527, |
|
"eval_gen_len": 13.77, |
|
"eval_loss": 1.7133468389511108, |
|
"eval_precisions_1": 45.4484, |
|
"eval_precisions_2": 18.2917, |
|
"eval_precisions_3": 9.9296, |
|
"eval_precisions_4": 5.6822, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3575, |
|
"eval_rouge2": 0.1746, |
|
"eval_rougeL": 0.3445, |
|
"eval_rougeLsum": 0.3447, |
|
"eval_runtime": 458.8154, |
|
"eval_samples_per_second": 4.804, |
|
"eval_steps_per_second": 1.201, |
|
"eval_sys_len": 17752, |
|
"eval_totals_1": 17752, |
|
"eval_totals_2": 15548, |
|
"eval_totals_3": 13344, |
|
"eval_totals_4": 11140, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6421, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"eval_bleu": 11.877, |
|
"eval_bp": 0.8091, |
|
"eval_counts_1": 8003, |
|
"eval_counts_2": 2823, |
|
"eval_counts_3": 1301, |
|
"eval_counts_4": 609, |
|
"eval_exact_match": 0.015, |
|
"eval_f1": 0.353, |
|
"eval_gen_len": 13.4669, |
|
"eval_loss": 1.719835877418518, |
|
"eval_precisions_1": 45.6401, |
|
"eval_precisions_2": 18.4137, |
|
"eval_precisions_3": 9.9109, |
|
"eval_precisions_4": 5.5754, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3576, |
|
"eval_rouge2": 0.1737, |
|
"eval_rougeL": 0.3447, |
|
"eval_rougeLsum": 0.3448, |
|
"eval_runtime": 467.8501, |
|
"eval_samples_per_second": 4.711, |
|
"eval_steps_per_second": 1.178, |
|
"eval_sys_len": 17535, |
|
"eval_totals_1": 17535, |
|
"eval_totals_2": 15331, |
|
"eval_totals_3": 13127, |
|
"eval_totals_4": 10923, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6543, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"eval_bleu": 11.8679, |
|
"eval_bp": 0.824, |
|
"eval_counts_1": 8031, |
|
"eval_counts_2": 2817, |
|
"eval_counts_3": 1294, |
|
"eval_counts_4": 612, |
|
"eval_exact_match": 0.015, |
|
"eval_f1": 0.351, |
|
"eval_gen_len": 13.8648, |
|
"eval_loss": 1.715085506439209, |
|
"eval_precisions_1": 45.1104, |
|
"eval_precisions_2": 18.0588, |
|
"eval_precisions_3": 9.6603, |
|
"eval_precisions_4": 5.4687, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3567, |
|
"eval_rouge2": 0.1734, |
|
"eval_rougeL": 0.3435, |
|
"eval_rougeLsum": 0.3431, |
|
"eval_runtime": 748.2265, |
|
"eval_samples_per_second": 2.946, |
|
"eval_steps_per_second": 0.736, |
|
"eval_sys_len": 17803, |
|
"eval_totals_1": 17803, |
|
"eval_totals_2": 15599, |
|
"eval_totals_3": 13395, |
|
"eval_totals_4": 11191, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5702, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"eval_bleu": 12.1229, |
|
"eval_bp": 0.7945, |
|
"eval_counts_1": 7996, |
|
"eval_counts_2": 2850, |
|
"eval_counts_3": 1330, |
|
"eval_counts_4": 639, |
|
"eval_exact_match": 0.0168, |
|
"eval_f1": 0.3569, |
|
"eval_gen_len": 13.3367, |
|
"eval_loss": 1.7079344987869263, |
|
"eval_precisions_1": 46.2865, |
|
"eval_precisions_2": 18.9105, |
|
"eval_precisions_3": 10.3365, |
|
"eval_precisions_4": 5.9927, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3618, |
|
"eval_rouge2": 0.1769, |
|
"eval_rougeL": 0.3485, |
|
"eval_rougeLsum": 0.348, |
|
"eval_runtime": 880.8231, |
|
"eval_samples_per_second": 2.502, |
|
"eval_steps_per_second": 0.626, |
|
"eval_sys_len": 17275, |
|
"eval_totals_1": 17275, |
|
"eval_totals_2": 15071, |
|
"eval_totals_3": 12867, |
|
"eval_totals_4": 10663, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"step": 720, |
|
"total_flos": 2.52283256045568e+17, |
|
"train_loss": 2.1398978657192655, |
|
"train_runtime": 23260.8504, |
|
"train_samples_per_second": 8.008, |
|
"train_steps_per_second": 0.031 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 720, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 2.52283256045568e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|