|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.789564097058193, |
|
"eval_steps": 500, |
|
"global_step": 1440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001, |
|
"loss": 3.9659, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_bleu": 10.8438, |
|
"eval_bp": 0.7379, |
|
"eval_counts_1": 7244, |
|
"eval_counts_2": 2547, |
|
"eval_counts_3": 1183, |
|
"eval_counts_4": 565, |
|
"eval_exact_match": 0.0136, |
|
"eval_f1": 0.3139, |
|
"eval_gen_len": 11.7786, |
|
"eval_loss": 1.4144511222839355, |
|
"eval_precisions_1": 44.4526, |
|
"eval_precisions_2": 18.0741, |
|
"eval_precisions_3": 9.9512, |
|
"eval_precisions_4": 5.8344, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3213, |
|
"eval_rouge2": 0.1608, |
|
"eval_rougeL": 0.3091, |
|
"eval_rougeLsum": 0.309, |
|
"eval_runtime": 2106.9539, |
|
"eval_samples_per_second": 1.046, |
|
"eval_steps_per_second": 1.046, |
|
"eval_sys_len": 16296, |
|
"eval_totals_1": 16296, |
|
"eval_totals_2": 14092, |
|
"eval_totals_3": 11888, |
|
"eval_totals_4": 9684, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7081, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_bleu": 13.2044, |
|
"eval_bp": 0.7697, |
|
"eval_counts_1": 7865, |
|
"eval_counts_2": 3037, |
|
"eval_counts_3": 1498, |
|
"eval_counts_4": 759, |
|
"eval_exact_match": 0.0181, |
|
"eval_f1": 0.3481, |
|
"eval_gen_len": 12.225, |
|
"eval_loss": 1.263157844543457, |
|
"eval_precisions_1": 46.7015, |
|
"eval_precisions_2": 20.7488, |
|
"eval_precisions_3": 12.0486, |
|
"eval_precisions_4": 7.4201, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3577, |
|
"eval_rouge2": 0.189, |
|
"eval_rougeL": 0.3438, |
|
"eval_rougeLsum": 0.3439, |
|
"eval_runtime": 3942.8178, |
|
"eval_samples_per_second": 0.559, |
|
"eval_steps_per_second": 0.559, |
|
"eval_sys_len": 16841, |
|
"eval_totals_1": 16841, |
|
"eval_totals_2": 14637, |
|
"eval_totals_3": 12433, |
|
"eval_totals_4": 10229, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4856, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 15.6014, |
|
"eval_bp": 0.8142, |
|
"eval_counts_1": 8608, |
|
"eval_counts_2": 3519, |
|
"eval_counts_3": 1818, |
|
"eval_counts_4": 969, |
|
"eval_exact_match": 0.0268, |
|
"eval_f1": 0.3882, |
|
"eval_gen_len": 13.0027, |
|
"eval_loss": 1.1974213123321533, |
|
"eval_precisions_1": 48.8342, |
|
"eval_precisions_2": 22.8166, |
|
"eval_precisions_3": 13.7529, |
|
"eval_precisions_4": 8.7971, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3969, |
|
"eval_rouge2": 0.2181, |
|
"eval_rougeL": 0.381, |
|
"eval_rougeLsum": 0.3812, |
|
"eval_runtime": 4069.754, |
|
"eval_samples_per_second": 0.542, |
|
"eval_steps_per_second": 0.542, |
|
"eval_sys_len": 17627, |
|
"eval_totals_1": 17627, |
|
"eval_totals_2": 15423, |
|
"eval_totals_3": 13219, |
|
"eval_totals_4": 11015, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3277, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 16.4313, |
|
"eval_bp": 0.8052, |
|
"eval_counts_1": 9018, |
|
"eval_counts_2": 3702, |
|
"eval_counts_3": 1907, |
|
"eval_counts_4": 1029, |
|
"eval_exact_match": 0.0313, |
|
"eval_f1": 0.4156, |
|
"eval_gen_len": 12.8716, |
|
"eval_loss": 1.1393847465515137, |
|
"eval_precisions_1": 51.6347, |
|
"eval_precisions_2": 24.2579, |
|
"eval_precisions_3": 14.6052, |
|
"eval_precisions_4": 9.4812, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.424, |
|
"eval_rouge2": 0.2321, |
|
"eval_rougeL": 0.4087, |
|
"eval_rougeLsum": 0.4085, |
|
"eval_runtime": 4037.7601, |
|
"eval_samples_per_second": 0.546, |
|
"eval_steps_per_second": 0.546, |
|
"eval_sys_len": 17465, |
|
"eval_totals_1": 17465, |
|
"eval_totals_2": 15261, |
|
"eval_totals_3": 13057, |
|
"eval_totals_4": 10853, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2314, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_bleu": 17.0718, |
|
"eval_bp": 0.8235, |
|
"eval_counts_1": 9240, |
|
"eval_counts_2": 3869, |
|
"eval_counts_3": 1994, |
|
"eval_counts_4": 1076, |
|
"eval_exact_match": 0.0363, |
|
"eval_f1": 0.4256, |
|
"eval_gen_len": 13.2137, |
|
"eval_loss": 1.1193382740020752, |
|
"eval_precisions_1": 51.9276, |
|
"eval_precisions_2": 24.8172, |
|
"eval_precisions_3": 14.8962, |
|
"eval_precisions_4": 9.6226, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4336, |
|
"eval_rouge2": 0.2413, |
|
"eval_rougeL": 0.4183, |
|
"eval_rougeLsum": 0.418, |
|
"eval_runtime": 4116.6581, |
|
"eval_samples_per_second": 0.535, |
|
"eval_steps_per_second": 0.535, |
|
"eval_sys_len": 17794, |
|
"eval_totals_1": 17794, |
|
"eval_totals_2": 15590, |
|
"eval_totals_3": 13386, |
|
"eval_totals_4": 11182, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1264, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_bleu": 17.4744, |
|
"eval_bp": 0.8072, |
|
"eval_counts_1": 9263, |
|
"eval_counts_2": 3908, |
|
"eval_counts_3": 2055, |
|
"eval_counts_4": 1127, |
|
"eval_exact_match": 0.0372, |
|
"eval_f1": 0.4309, |
|
"eval_gen_len": 13.034, |
|
"eval_loss": 1.1085509061813354, |
|
"eval_precisions_1": 52.9254, |
|
"eval_precisions_2": 25.5458, |
|
"eval_precisions_3": 15.6942, |
|
"eval_precisions_4": 10.3489, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4383, |
|
"eval_rouge2": 0.2452, |
|
"eval_rougeL": 0.4239, |
|
"eval_rougeLsum": 0.4237, |
|
"eval_runtime": 3709.3886, |
|
"eval_samples_per_second": 0.594, |
|
"eval_steps_per_second": 0.594, |
|
"eval_sys_len": 17502, |
|
"eval_totals_1": 17502, |
|
"eval_totals_2": 15298, |
|
"eval_totals_3": 13094, |
|
"eval_totals_4": 10890, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0469, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 18.0906, |
|
"eval_bp": 0.8363, |
|
"eval_counts_1": 9434, |
|
"eval_counts_2": 4034, |
|
"eval_counts_3": 2146, |
|
"eval_counts_4": 1189, |
|
"eval_exact_match": 0.039, |
|
"eval_f1": 0.4348, |
|
"eval_gen_len": 13.422, |
|
"eval_loss": 1.103752851486206, |
|
"eval_precisions_1": 52.3297, |
|
"eval_precisions_2": 25.4929, |
|
"eval_precisions_3": 15.7562, |
|
"eval_precisions_4": 10.4152, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4433, |
|
"eval_rouge2": 0.2505, |
|
"eval_rougeL": 0.4286, |
|
"eval_rougeLsum": 0.4282, |
|
"eval_runtime": 4081.2971, |
|
"eval_samples_per_second": 0.54, |
|
"eval_steps_per_second": 0.54, |
|
"eval_sys_len": 18028, |
|
"eval_totals_1": 18028, |
|
"eval_totals_2": 15824, |
|
"eval_totals_3": 13620, |
|
"eval_totals_4": 11416, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9874, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 19.1287, |
|
"eval_bp": 0.8539, |
|
"eval_counts_1": 9746, |
|
"eval_counts_2": 4265, |
|
"eval_counts_3": 2287, |
|
"eval_counts_4": 1285, |
|
"eval_exact_match": 0.0454, |
|
"eval_f1": 0.4498, |
|
"eval_gen_len": 13.6466, |
|
"eval_loss": 1.0989724397659302, |
|
"eval_precisions_1": 53.1088, |
|
"eval_precisions_2": 26.4136, |
|
"eval_precisions_3": 16.4025, |
|
"eval_precisions_4": 10.9464, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.457, |
|
"eval_rouge2": 0.2627, |
|
"eval_rougeL": 0.4417, |
|
"eval_rougeLsum": 0.4416, |
|
"eval_runtime": 2875.9709, |
|
"eval_samples_per_second": 0.766, |
|
"eval_steps_per_second": 0.766, |
|
"eval_sys_len": 18351, |
|
"eval_totals_1": 18351, |
|
"eval_totals_2": 16147, |
|
"eval_totals_3": 13943, |
|
"eval_totals_4": 11739, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9488, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_bleu": 18.2172, |
|
"eval_bp": 0.8255, |
|
"eval_counts_1": 9484, |
|
"eval_counts_2": 4062, |
|
"eval_counts_3": 2158, |
|
"eval_counts_4": 1197, |
|
"eval_exact_match": 0.0431, |
|
"eval_f1": 0.4399, |
|
"eval_gen_len": 13.2763, |
|
"eval_loss": 1.1175453662872314, |
|
"eval_precisions_1": 53.1883, |
|
"eval_precisions_2": 25.9935, |
|
"eval_precisions_3": 16.0769, |
|
"eval_precisions_4": 10.6694, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4482, |
|
"eval_rouge2": 0.2548, |
|
"eval_rougeL": 0.4338, |
|
"eval_rougeLsum": 0.4333, |
|
"eval_runtime": 4231.6184, |
|
"eval_samples_per_second": 0.521, |
|
"eval_steps_per_second": 0.521, |
|
"eval_sys_len": 17831, |
|
"eval_totals_1": 17831, |
|
"eval_totals_2": 15627, |
|
"eval_totals_3": 13423, |
|
"eval_totals_4": 11219, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8893, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_bleu": 19.064, |
|
"eval_bp": 0.8357, |
|
"eval_counts_1": 9650, |
|
"eval_counts_2": 4205, |
|
"eval_counts_3": 2289, |
|
"eval_counts_4": 1289, |
|
"eval_exact_match": 0.0463, |
|
"eval_f1": 0.4472, |
|
"eval_gen_len": 13.4251, |
|
"eval_loss": 1.1221915483474731, |
|
"eval_precisions_1": 53.5605, |
|
"eval_precisions_2": 26.592, |
|
"eval_precisions_3": 16.8198, |
|
"eval_precisions_4": 11.3021, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4543, |
|
"eval_rouge2": 0.262, |
|
"eval_rougeL": 0.4396, |
|
"eval_rougeLsum": 0.4394, |
|
"eval_runtime": 4369.7974, |
|
"eval_samples_per_second": 0.504, |
|
"eval_steps_per_second": 0.504, |
|
"eval_sys_len": 18017, |
|
"eval_totals_1": 18017, |
|
"eval_totals_2": 15813, |
|
"eval_totals_3": 13609, |
|
"eval_totals_4": 11405, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8362, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_bleu": 19.052, |
|
"eval_bp": 0.8474, |
|
"eval_counts_1": 9706, |
|
"eval_counts_2": 4232, |
|
"eval_counts_3": 2279, |
|
"eval_counts_4": 1281, |
|
"eval_exact_match": 0.0472, |
|
"eval_f1": 0.4473, |
|
"eval_gen_len": 13.6021, |
|
"eval_loss": 1.1342219114303589, |
|
"eval_precisions_1": 53.2361, |
|
"eval_precisions_2": 26.4038, |
|
"eval_precisions_3": 16.4858, |
|
"eval_precisions_4": 11.0241, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4551, |
|
"eval_rouge2": 0.2632, |
|
"eval_rougeL": 0.4395, |
|
"eval_rougeLsum": 0.4393, |
|
"eval_runtime": 4741.4712, |
|
"eval_samples_per_second": 0.465, |
|
"eval_steps_per_second": 0.465, |
|
"eval_sys_len": 18232, |
|
"eval_totals_1": 18232, |
|
"eval_totals_2": 16028, |
|
"eval_totals_3": 13824, |
|
"eval_totals_4": 11620, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7835, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 19.169, |
|
"eval_bp": 0.8614, |
|
"eval_counts_1": 9802, |
|
"eval_counts_2": 4280, |
|
"eval_counts_3": 2292, |
|
"eval_counts_4": 1285, |
|
"eval_exact_match": 0.0472, |
|
"eval_f1": 0.4497, |
|
"eval_gen_len": 14.0168, |
|
"eval_loss": 1.1426819562911987, |
|
"eval_precisions_1": 53.0096, |
|
"eval_precisions_2": 26.2786, |
|
"eval_precisions_3": 16.2749, |
|
"eval_precisions_4": 10.8174, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.458, |
|
"eval_rouge2": 0.2634, |
|
"eval_rougeL": 0.4414, |
|
"eval_rougeLsum": 0.4412, |
|
"eval_runtime": 2858.9204, |
|
"eval_samples_per_second": 0.771, |
|
"eval_steps_per_second": 0.771, |
|
"eval_sys_len": 18491, |
|
"eval_totals_1": 18491, |
|
"eval_totals_2": 16287, |
|
"eval_totals_3": 14083, |
|
"eval_totals_4": 11879, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7441, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_bleu": 19.3443, |
|
"eval_bp": 0.8618, |
|
"eval_counts_1": 9816, |
|
"eval_counts_2": 4323, |
|
"eval_counts_3": 2334, |
|
"eval_counts_4": 1294, |
|
"eval_exact_match": 0.0463, |
|
"eval_f1": 0.4493, |
|
"eval_gen_len": 13.8348, |
|
"eval_loss": 1.1669002771377563, |
|
"eval_precisions_1": 53.0652, |
|
"eval_precisions_2": 26.5312, |
|
"eval_precisions_3": 16.5649, |
|
"eval_precisions_4": 10.8868, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4577, |
|
"eval_rouge2": 0.2659, |
|
"eval_rougeL": 0.4418, |
|
"eval_rougeLsum": 0.4417, |
|
"eval_runtime": 2130.8, |
|
"eval_samples_per_second": 1.034, |
|
"eval_steps_per_second": 1.034, |
|
"eval_sys_len": 18498, |
|
"eval_totals_1": 18498, |
|
"eval_totals_2": 16294, |
|
"eval_totals_3": 14090, |
|
"eval_totals_4": 11886, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7012, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_bleu": 19.7341, |
|
"eval_bp": 0.8639, |
|
"eval_counts_1": 9856, |
|
"eval_counts_2": 4364, |
|
"eval_counts_3": 2375, |
|
"eval_counts_4": 1360, |
|
"eval_exact_match": 0.0476, |
|
"eval_f1": 0.4514, |
|
"eval_gen_len": 13.976, |
|
"eval_loss": 1.1739834547042847, |
|
"eval_precisions_1": 53.1693, |
|
"eval_precisions_2": 26.7189, |
|
"eval_precisions_3": 16.8094, |
|
"eval_precisions_4": 11.4046, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4591, |
|
"eval_rouge2": 0.2653, |
|
"eval_rougeL": 0.443, |
|
"eval_rougeLsum": 0.4428, |
|
"eval_runtime": 2149.1056, |
|
"eval_samples_per_second": 1.026, |
|
"eval_steps_per_second": 1.026, |
|
"eval_sys_len": 18537, |
|
"eval_totals_1": 18537, |
|
"eval_totals_2": 16333, |
|
"eval_totals_3": 14129, |
|
"eval_totals_4": 11925, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6597, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_bleu": 19.3289, |
|
"eval_bp": 0.8602, |
|
"eval_counts_1": 9780, |
|
"eval_counts_2": 4292, |
|
"eval_counts_3": 2336, |
|
"eval_counts_4": 1302, |
|
"eval_exact_match": 0.0485, |
|
"eval_f1": 0.4492, |
|
"eval_gen_len": 13.8802, |
|
"eval_loss": 1.1987030506134033, |
|
"eval_precisions_1": 52.9565, |
|
"eval_precisions_2": 26.3896, |
|
"eval_precisions_3": 16.6145, |
|
"eval_precisions_4": 10.9818, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.457, |
|
"eval_rouge2": 0.2633, |
|
"eval_rougeL": 0.4418, |
|
"eval_rougeLsum": 0.4416, |
|
"eval_runtime": 2149.2833, |
|
"eval_samples_per_second": 1.025, |
|
"eval_steps_per_second": 1.025, |
|
"eval_sys_len": 18468, |
|
"eval_totals_1": 18468, |
|
"eval_totals_2": 16264, |
|
"eval_totals_3": 14060, |
|
"eval_totals_4": 11856, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6236, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 19.8055, |
|
"eval_bp": 0.8734, |
|
"eval_counts_1": 9931, |
|
"eval_counts_2": 4388, |
|
"eval_counts_3": 2390, |
|
"eval_counts_4": 1359, |
|
"eval_exact_match": 0.0495, |
|
"eval_f1": 0.4538, |
|
"eval_gen_len": 14.044, |
|
"eval_loss": 1.2135030031204224, |
|
"eval_precisions_1": 53.0587, |
|
"eval_precisions_2": 26.573, |
|
"eval_precisions_3": 16.7028, |
|
"eval_precisions_4": 11.2268, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4618, |
|
"eval_rouge2": 0.2682, |
|
"eval_rougeL": 0.4452, |
|
"eval_rougeLsum": 0.445, |
|
"eval_runtime": 2168.1341, |
|
"eval_samples_per_second": 1.017, |
|
"eval_steps_per_second": 1.017, |
|
"eval_sys_len": 18717, |
|
"eval_totals_1": 18717, |
|
"eval_totals_2": 16513, |
|
"eval_totals_3": 14309, |
|
"eval_totals_4": 12105, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5933, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 19.5893, |
|
"eval_bp": 0.8654, |
|
"eval_counts_1": 9806, |
|
"eval_counts_2": 4316, |
|
"eval_counts_3": 2366, |
|
"eval_counts_4": 1348, |
|
"eval_exact_match": 0.049, |
|
"eval_f1": 0.4485, |
|
"eval_gen_len": 14.0622, |
|
"eval_loss": 1.2305341958999634, |
|
"eval_precisions_1": 52.817, |
|
"eval_precisions_2": 26.3782, |
|
"eval_precisions_3": 16.7114, |
|
"eval_precisions_4": 11.2766, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4571, |
|
"eval_rouge2": 0.2628, |
|
"eval_rougeL": 0.4407, |
|
"eval_rougeLsum": 0.4409, |
|
"eval_runtime": 2171.7325, |
|
"eval_samples_per_second": 1.015, |
|
"eval_steps_per_second": 1.015, |
|
"eval_sys_len": 18566, |
|
"eval_totals_1": 18566, |
|
"eval_totals_2": 16362, |
|
"eval_totals_3": 14158, |
|
"eval_totals_4": 11954, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5622, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"eval_bleu": 19.4914, |
|
"eval_bp": 0.865, |
|
"eval_counts_1": 9787, |
|
"eval_counts_2": 4306, |
|
"eval_counts_3": 2346, |
|
"eval_counts_4": 1338, |
|
"eval_exact_match": 0.0476, |
|
"eval_f1": 0.447, |
|
"eval_gen_len": 13.7763, |
|
"eval_loss": 1.2796473503112793, |
|
"eval_precisions_1": 52.7345, |
|
"eval_precisions_2": 26.3283, |
|
"eval_precisions_3": 16.5783, |
|
"eval_precisions_4": 11.1995, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4549, |
|
"eval_rouge2": 0.2609, |
|
"eval_rougeL": 0.4383, |
|
"eval_rougeLsum": 0.4382, |
|
"eval_runtime": 2158.5699, |
|
"eval_samples_per_second": 1.021, |
|
"eval_steps_per_second": 1.021, |
|
"eval_sys_len": 18559, |
|
"eval_totals_1": 18559, |
|
"eval_totals_2": 16355, |
|
"eval_totals_3": 14151, |
|
"eval_totals_4": 11947, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5275, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_bleu": 19.6947, |
|
"eval_bp": 0.8857, |
|
"eval_counts_1": 9918, |
|
"eval_counts_2": 4363, |
|
"eval_counts_3": 2374, |
|
"eval_counts_4": 1355, |
|
"eval_exact_match": 0.0508, |
|
"eval_f1": 0.4499, |
|
"eval_gen_len": 14.1647, |
|
"eval_loss": 1.2833356857299805, |
|
"eval_precisions_1": 52.3377, |
|
"eval_precisions_2": 26.054, |
|
"eval_precisions_3": 16.3251, |
|
"eval_precisions_4": 10.9823, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4573, |
|
"eval_rouge2": 0.2624, |
|
"eval_rougeL": 0.441, |
|
"eval_rougeLsum": 0.4408, |
|
"eval_runtime": 2190.1704, |
|
"eval_samples_per_second": 1.006, |
|
"eval_steps_per_second": 1.006, |
|
"eval_sys_len": 18950, |
|
"eval_totals_1": 18950, |
|
"eval_totals_2": 16746, |
|
"eval_totals_3": 14542, |
|
"eval_totals_4": 12338, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4986, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"eval_bleu": 19.4544, |
|
"eval_bp": 0.8847, |
|
"eval_counts_1": 9879, |
|
"eval_counts_2": 4315, |
|
"eval_counts_3": 2347, |
|
"eval_counts_4": 1324, |
|
"eval_exact_match": 0.0495, |
|
"eval_f1": 0.4478, |
|
"eval_gen_len": 14.2827, |
|
"eval_loss": 1.3059108257293701, |
|
"eval_precisions_1": 52.1842, |
|
"eval_precisions_2": 25.7966, |
|
"eval_precisions_3": 16.1606, |
|
"eval_precisions_4": 10.7476, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4564, |
|
"eval_rouge2": 0.2622, |
|
"eval_rougeL": 0.4407, |
|
"eval_rougeLsum": 0.4403, |
|
"eval_runtime": 3646.8693, |
|
"eval_samples_per_second": 0.604, |
|
"eval_steps_per_second": 0.604, |
|
"eval_sys_len": 18931, |
|
"eval_totals_1": 18931, |
|
"eval_totals_2": 16727, |
|
"eval_totals_3": 14523, |
|
"eval_totals_4": 12319, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"step": 1440, |
|
"total_flos": 1.102412878184448e+18, |
|
"train_loss": 1.0667428798145717, |
|
"train_runtime": 140813.6912, |
|
"train_samples_per_second": 1.323, |
|
"train_steps_per_second": 0.01 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1440, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 1.102412878184448e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|