|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.78531558608845, |
|
"eval_steps": 500, |
|
"global_step": 1440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001, |
|
"loss": 6.6905, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_bleu": 3.7816, |
|
"eval_bp": 1.0, |
|
"eval_counts_1": 5515, |
|
"eval_counts_2": 1394, |
|
"eval_counts_3": 522, |
|
"eval_counts_4": 191, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.2106, |
|
"eval_gen_len": 11.2786, |
|
"eval_loss": 2.097219705581665, |
|
"eval_precisions_1": 19.5762, |
|
"eval_precisions_2": 5.3681, |
|
"eval_precisions_3": 2.1966, |
|
"eval_precisions_4": 0.8859, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.1942, |
|
"eval_rouge2": 0.0761, |
|
"eval_rougeL": 0.1837, |
|
"eval_rougeLsum": 0.1841, |
|
"eval_runtime": 456.7865, |
|
"eval_samples_per_second": 4.825, |
|
"eval_steps_per_second": 1.206, |
|
"eval_sys_len": 28172, |
|
"eval_totals_1": 28172, |
|
"eval_totals_2": 25968, |
|
"eval_totals_3": 23764, |
|
"eval_totals_4": 21560, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0001, |
|
"loss": 2.4978, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_bleu": 9.6021, |
|
"eval_bp": 0.7524, |
|
"eval_counts_1": 7079, |
|
"eval_counts_2": 2339, |
|
"eval_counts_3": 1027, |
|
"eval_counts_4": 446, |
|
"eval_exact_match": 0.01, |
|
"eval_f1": 0.3032, |
|
"eval_gen_len": 12.0159, |
|
"eval_loss": 1.6211049556732178, |
|
"eval_precisions_1": 42.7889, |
|
"eval_precisions_2": 16.311, |
|
"eval_precisions_3": 8.4624, |
|
"eval_precisions_4": 4.4905, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3097, |
|
"eval_rouge2": 0.1455, |
|
"eval_rougeL": 0.2971, |
|
"eval_rougeLsum": 0.2969, |
|
"eval_runtime": 435.2772, |
|
"eval_samples_per_second": 5.063, |
|
"eval_steps_per_second": 1.266, |
|
"eval_sys_len": 16544, |
|
"eval_totals_1": 16544, |
|
"eval_totals_2": 14340, |
|
"eval_totals_3": 12136, |
|
"eval_totals_4": 9932, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0001, |
|
"loss": 2.1021, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 11.162, |
|
"eval_bp": 0.7908, |
|
"eval_counts_1": 7507, |
|
"eval_counts_2": 2637, |
|
"eval_counts_3": 1222, |
|
"eval_counts_4": 575, |
|
"eval_exact_match": 0.0141, |
|
"eval_f1": 0.3228, |
|
"eval_gen_len": 12.6375, |
|
"eval_loss": 1.5342339277267456, |
|
"eval_precisions_1": 43.6175, |
|
"eval_precisions_2": 17.5718, |
|
"eval_precisions_3": 9.5446, |
|
"eval_precisions_4": 5.425, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3304, |
|
"eval_rouge2": 0.1642, |
|
"eval_rougeL": 0.3172, |
|
"eval_rougeLsum": 0.3171, |
|
"eval_runtime": 446.8682, |
|
"eval_samples_per_second": 4.932, |
|
"eval_steps_per_second": 1.233, |
|
"eval_sys_len": 17211, |
|
"eval_totals_1": 17211, |
|
"eval_totals_2": 15007, |
|
"eval_totals_3": 12803, |
|
"eval_totals_4": 10599, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9208, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 11.7136, |
|
"eval_bp": 0.7714, |
|
"eval_counts_1": 7599, |
|
"eval_counts_2": 2755, |
|
"eval_counts_3": 1296, |
|
"eval_counts_4": 620, |
|
"eval_exact_match": 0.015, |
|
"eval_f1": 0.33, |
|
"eval_gen_len": 12.3938, |
|
"eval_loss": 1.4861969947814941, |
|
"eval_precisions_1": 45.0418, |
|
"eval_precisions_2": 18.7837, |
|
"eval_precisions_3": 10.3988, |
|
"eval_precisions_4": 6.0435, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3377, |
|
"eval_rouge2": 0.1721, |
|
"eval_rougeL": 0.3232, |
|
"eval_rougeLsum": 0.3229, |
|
"eval_runtime": 440.9926, |
|
"eval_samples_per_second": 4.998, |
|
"eval_steps_per_second": 1.249, |
|
"eval_sys_len": 16871, |
|
"eval_totals_1": 16871, |
|
"eval_totals_2": 14667, |
|
"eval_totals_3": 12463, |
|
"eval_totals_4": 10259, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8135, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_bleu": 12.6402, |
|
"eval_bp": 0.7893, |
|
"eval_counts_1": 7831, |
|
"eval_counts_2": 2955, |
|
"eval_counts_3": 1424, |
|
"eval_counts_4": 694, |
|
"eval_exact_match": 0.0177, |
|
"eval_f1": 0.3417, |
|
"eval_gen_len": 12.6366, |
|
"eval_loss": 1.4626398086547852, |
|
"eval_precisions_1": 45.5715, |
|
"eval_precisions_2": 19.7263, |
|
"eval_precisions_3": 11.1459, |
|
"eval_precisions_4": 6.5645, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3497, |
|
"eval_rouge2": 0.1837, |
|
"eval_rougeL": 0.3358, |
|
"eval_rougeLsum": 0.3354, |
|
"eval_runtime": 448.9344, |
|
"eval_samples_per_second": 4.909, |
|
"eval_steps_per_second": 1.227, |
|
"eval_sys_len": 17184, |
|
"eval_totals_1": 17184, |
|
"eval_totals_2": 14980, |
|
"eval_totals_3": 12776, |
|
"eval_totals_4": 10572, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6907, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_bleu": 13.0722, |
|
"eval_bp": 0.7735, |
|
"eval_counts_1": 7872, |
|
"eval_counts_2": 3023, |
|
"eval_counts_3": 1482, |
|
"eval_counts_4": 740, |
|
"eval_exact_match": 0.0177, |
|
"eval_f1": 0.3483, |
|
"eval_gen_len": 12.564, |
|
"eval_loss": 1.439197301864624, |
|
"eval_precisions_1": 46.5606, |
|
"eval_precisions_2": 20.5604, |
|
"eval_precisions_3": 11.8569, |
|
"eval_precisions_4": 7.188, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3566, |
|
"eval_rouge2": 0.1896, |
|
"eval_rougeL": 0.3432, |
|
"eval_rougeLsum": 0.343, |
|
"eval_runtime": 718.6776, |
|
"eval_samples_per_second": 3.067, |
|
"eval_steps_per_second": 0.767, |
|
"eval_sys_len": 16907, |
|
"eval_totals_1": 16907, |
|
"eval_totals_2": 14703, |
|
"eval_totals_3": 12499, |
|
"eval_totals_4": 10295, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6159, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_bleu": 13.5053, |
|
"eval_bp": 0.7797, |
|
"eval_counts_1": 7981, |
|
"eval_counts_2": 3128, |
|
"eval_counts_3": 1542, |
|
"eval_counts_4": 773, |
|
"eval_exact_match": 0.0191, |
|
"eval_f1": 0.3543, |
|
"eval_gen_len": 12.5749, |
|
"eval_loss": 1.4288065433502197, |
|
"eval_precisions_1": 46.9029, |
|
"eval_precisions_2": 21.118, |
|
"eval_precisions_3": 12.2303, |
|
"eval_precisions_4": 7.4298, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.363, |
|
"eval_rouge2": 0.1952, |
|
"eval_rougeL": 0.3504, |
|
"eval_rougeLsum": 0.3502, |
|
"eval_runtime": 709.1881, |
|
"eval_samples_per_second": 3.108, |
|
"eval_steps_per_second": 0.777, |
|
"eval_sys_len": 17016, |
|
"eval_totals_1": 17016, |
|
"eval_totals_2": 14812, |
|
"eval_totals_3": 12608, |
|
"eval_totals_4": 10404, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.556, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 13.2095, |
|
"eval_bp": 0.797, |
|
"eval_counts_1": 8014, |
|
"eval_counts_2": 3046, |
|
"eval_counts_3": 1496, |
|
"eval_counts_4": 748, |
|
"eval_exact_match": 0.0222, |
|
"eval_f1": 0.355, |
|
"eval_gen_len": 12.7641, |
|
"eval_loss": 1.4131838083267212, |
|
"eval_precisions_1": 46.2702, |
|
"eval_precisions_2": 20.1508, |
|
"eval_precisions_3": 11.5861, |
|
"eval_precisions_4": 6.9854, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3632, |
|
"eval_rouge2": 0.1903, |
|
"eval_rougeL": 0.3489, |
|
"eval_rougeLsum": 0.3491, |
|
"eval_runtime": 736.4055, |
|
"eval_samples_per_second": 2.993, |
|
"eval_steps_per_second": 0.748, |
|
"eval_sys_len": 17320, |
|
"eval_totals_1": 17320, |
|
"eval_totals_2": 15116, |
|
"eval_totals_3": 12912, |
|
"eval_totals_4": 10708, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4951, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 14.1831, |
|
"eval_bp": 0.789, |
|
"eval_counts_1": 8342, |
|
"eval_counts_2": 3271, |
|
"eval_counts_3": 1622, |
|
"eval_counts_4": 819, |
|
"eval_exact_match": 0.0218, |
|
"eval_f1": 0.3769, |
|
"eval_gen_len": 12.7654, |
|
"eval_loss": 1.3926042318344116, |
|
"eval_precisions_1": 48.5621, |
|
"eval_precisions_2": 21.8445, |
|
"eval_precisions_3": 12.7016, |
|
"eval_precisions_4": 7.7513, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3843, |
|
"eval_rouge2": 0.2059, |
|
"eval_rougeL": 0.3704, |
|
"eval_rougeLsum": 0.3704, |
|
"eval_runtime": 695.8554, |
|
"eval_samples_per_second": 3.167, |
|
"eval_steps_per_second": 0.792, |
|
"eval_sys_len": 17178, |
|
"eval_totals_1": 17178, |
|
"eval_totals_2": 14974, |
|
"eval_totals_3": 12770, |
|
"eval_totals_4": 10566, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4522, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_bleu": 15.0442, |
|
"eval_bp": 0.8187, |
|
"eval_counts_1": 8639, |
|
"eval_counts_2": 3449, |
|
"eval_counts_3": 1740, |
|
"eval_counts_4": 891, |
|
"eval_exact_match": 0.024, |
|
"eval_f1": 0.3895, |
|
"eval_gen_len": 13.1016, |
|
"eval_loss": 1.3769304752349854, |
|
"eval_precisions_1": 48.7859, |
|
"eval_precisions_2": 22.2459, |
|
"eval_precisions_3": 13.0827, |
|
"eval_precisions_4": 8.0299, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3972, |
|
"eval_rouge2": 0.2129, |
|
"eval_rougeL": 0.3821, |
|
"eval_rougeLsum": 0.3823, |
|
"eval_runtime": 733.5109, |
|
"eval_samples_per_second": 3.005, |
|
"eval_steps_per_second": 0.751, |
|
"eval_sys_len": 17708, |
|
"eval_totals_1": 17708, |
|
"eval_totals_2": 15504, |
|
"eval_totals_3": 13300, |
|
"eval_totals_4": 11096, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3663, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_bleu": 15.2622, |
|
"eval_bp": 0.8168, |
|
"eval_counts_1": 8736, |
|
"eval_counts_2": 3468, |
|
"eval_counts_3": 1747, |
|
"eval_counts_4": 924, |
|
"eval_exact_match": 0.0245, |
|
"eval_f1": 0.3946, |
|
"eval_gen_len": 13.0399, |
|
"eval_loss": 1.3676577806472778, |
|
"eval_precisions_1": 49.4285, |
|
"eval_precisions_2": 22.4176, |
|
"eval_precisions_3": 13.169, |
|
"eval_precisions_4": 8.3529, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4027, |
|
"eval_rouge2": 0.215, |
|
"eval_rougeL": 0.3871, |
|
"eval_rougeLsum": 0.387, |
|
"eval_runtime": 746.3261, |
|
"eval_samples_per_second": 2.953, |
|
"eval_steps_per_second": 0.738, |
|
"eval_sys_len": 17674, |
|
"eval_totals_1": 17674, |
|
"eval_totals_2": 15470, |
|
"eval_totals_3": 13266, |
|
"eval_totals_4": 11062, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3122, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"eval_bleu": 15.3943, |
|
"eval_bp": 0.8308, |
|
"eval_counts_1": 8833, |
|
"eval_counts_2": 3533, |
|
"eval_counts_3": 1780, |
|
"eval_counts_4": 915, |
|
"eval_exact_match": 0.0222, |
|
"eval_f1": 0.3975, |
|
"eval_gen_len": 13.3494, |
|
"eval_loss": 1.352068305015564, |
|
"eval_precisions_1": 49.272, |
|
"eval_precisions_2": 22.4703, |
|
"eval_precisions_3": 13.1667, |
|
"eval_precisions_4": 8.0866, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4055, |
|
"eval_rouge2": 0.219, |
|
"eval_rougeL": 0.3915, |
|
"eval_rougeLsum": 0.3915, |
|
"eval_runtime": 815.025, |
|
"eval_samples_per_second": 2.704, |
|
"eval_steps_per_second": 0.676, |
|
"eval_sys_len": 17927, |
|
"eval_totals_1": 17927, |
|
"eval_totals_2": 15723, |
|
"eval_totals_3": 13519, |
|
"eval_totals_4": 11315, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2641, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 16.1011, |
|
"eval_bp": 0.848, |
|
"eval_counts_1": 9048, |
|
"eval_counts_2": 3668, |
|
"eval_counts_3": 1864, |
|
"eval_counts_4": 989, |
|
"eval_exact_match": 0.0268, |
|
"eval_f1": 0.408, |
|
"eval_gen_len": 13.5508, |
|
"eval_loss": 1.3493599891662598, |
|
"eval_precisions_1": 49.5998, |
|
"eval_precisions_2": 22.8707, |
|
"eval_precisions_3": 13.474, |
|
"eval_precisions_4": 8.5039, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4165, |
|
"eval_rouge2": 0.2265, |
|
"eval_rougeL": 0.4011, |
|
"eval_rougeLsum": 0.401, |
|
"eval_runtime": 726.7867, |
|
"eval_samples_per_second": 3.033, |
|
"eval_steps_per_second": 0.758, |
|
"eval_sys_len": 18242, |
|
"eval_totals_1": 18242, |
|
"eval_totals_2": 16038, |
|
"eval_totals_3": 13834, |
|
"eval_totals_4": 11630, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2359, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_bleu": 16.3595, |
|
"eval_bp": 0.8402, |
|
"eval_counts_1": 9075, |
|
"eval_counts_2": 3709, |
|
"eval_counts_3": 1907, |
|
"eval_counts_4": 1013, |
|
"eval_exact_match": 0.0259, |
|
"eval_f1": 0.4113, |
|
"eval_gen_len": 13.5681, |
|
"eval_loss": 1.3488041162490845, |
|
"eval_precisions_1": 50.1437, |
|
"eval_precisions_2": 23.3359, |
|
"eval_precisions_3": 13.9299, |
|
"eval_precisions_4": 8.8194, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4195, |
|
"eval_rouge2": 0.2298, |
|
"eval_rougeL": 0.4041, |
|
"eval_rougeLsum": 0.4038, |
|
"eval_runtime": 701.8557, |
|
"eval_samples_per_second": 3.14, |
|
"eval_steps_per_second": 0.785, |
|
"eval_sys_len": 18098, |
|
"eval_totals_1": 18098, |
|
"eval_totals_2": 15894, |
|
"eval_totals_3": 13690, |
|
"eval_totals_4": 11486, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1754, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_bleu": 16.7083, |
|
"eval_bp": 0.8547, |
|
"eval_counts_1": 9182, |
|
"eval_counts_2": 3777, |
|
"eval_counts_3": 1957, |
|
"eval_counts_4": 1048, |
|
"eval_exact_match": 0.0268, |
|
"eval_f1": 0.4145, |
|
"eval_gen_len": 13.6534, |
|
"eval_loss": 1.3482075929641724, |
|
"eval_precisions_1": 49.9946, |
|
"eval_precisions_2": 23.3696, |
|
"eval_precisions_3": 14.0206, |
|
"eval_precisions_4": 8.9161, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4227, |
|
"eval_rouge2": 0.2314, |
|
"eval_rougeL": 0.406, |
|
"eval_rougeLsum": 0.4058, |
|
"eval_runtime": 469.6435, |
|
"eval_samples_per_second": 4.693, |
|
"eval_steps_per_second": 1.173, |
|
"eval_sys_len": 18366, |
|
"eval_totals_1": 18366, |
|
"eval_totals_2": 16162, |
|
"eval_totals_3": 13958, |
|
"eval_totals_4": 11754, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1367, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"eval_bleu": 16.5803, |
|
"eval_bp": 0.8517, |
|
"eval_counts_1": 9164, |
|
"eval_counts_2": 3761, |
|
"eval_counts_3": 1935, |
|
"eval_counts_4": 1033, |
|
"eval_exact_match": 0.0245, |
|
"eval_f1": 0.4147, |
|
"eval_gen_len": 13.6152, |
|
"eval_loss": 1.3501369953155518, |
|
"eval_precisions_1": 50.0492, |
|
"eval_precisions_2": 23.3515, |
|
"eval_precisions_3": 13.9189, |
|
"eval_precisions_4": 8.8306, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4225, |
|
"eval_rouge2": 0.2316, |
|
"eval_rougeL": 0.4078, |
|
"eval_rougeLsum": 0.4079, |
|
"eval_runtime": 480.2308, |
|
"eval_samples_per_second": 4.589, |
|
"eval_steps_per_second": 1.147, |
|
"eval_sys_len": 18310, |
|
"eval_totals_1": 18310, |
|
"eval_totals_2": 16106, |
|
"eval_totals_3": 13902, |
|
"eval_totals_4": 11698, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.096, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 16.5513, |
|
"eval_bp": 0.8499, |
|
"eval_counts_1": 9126, |
|
"eval_counts_2": 3712, |
|
"eval_counts_3": 1922, |
|
"eval_counts_4": 1050, |
|
"eval_exact_match": 0.0295, |
|
"eval_f1": 0.4141, |
|
"eval_gen_len": 13.6325, |
|
"eval_loss": 1.358604907989502, |
|
"eval_precisions_1": 49.9316, |
|
"eval_precisions_2": 23.0946, |
|
"eval_precisions_3": 13.8582, |
|
"eval_precisions_4": 9.0013, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4217, |
|
"eval_rouge2": 0.2304, |
|
"eval_rougeL": 0.4066, |
|
"eval_rougeLsum": 0.4066, |
|
"eval_runtime": 465.7019, |
|
"eval_samples_per_second": 4.733, |
|
"eval_steps_per_second": 1.183, |
|
"eval_sys_len": 18277, |
|
"eval_totals_1": 18277, |
|
"eval_totals_2": 16073, |
|
"eval_totals_3": 13869, |
|
"eval_totals_4": 11665, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0571, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 16.4708, |
|
"eval_bp": 0.8446, |
|
"eval_counts_1": 9087, |
|
"eval_counts_2": 3707, |
|
"eval_counts_3": 1923, |
|
"eval_counts_4": 1033, |
|
"eval_exact_match": 0.029, |
|
"eval_f1": 0.4116, |
|
"eval_gen_len": 13.5172, |
|
"eval_loss": 1.3658462762832642, |
|
"eval_precisions_1": 49.9862, |
|
"eval_precisions_2": 23.205, |
|
"eval_precisions_3": 13.9641, |
|
"eval_precisions_4": 8.9306, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4196, |
|
"eval_rouge2": 0.2301, |
|
"eval_rougeL": 0.4049, |
|
"eval_rougeLsum": 0.4049, |
|
"eval_runtime": 463.8447, |
|
"eval_samples_per_second": 4.752, |
|
"eval_steps_per_second": 1.188, |
|
"eval_sys_len": 18179, |
|
"eval_totals_1": 18179, |
|
"eval_totals_2": 15975, |
|
"eval_totals_3": 13771, |
|
"eval_totals_4": 11567, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.036, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_bleu": 16.8386, |
|
"eval_bp": 0.8528, |
|
"eval_counts_1": 9206, |
|
"eval_counts_2": 3806, |
|
"eval_counts_3": 1976, |
|
"eval_counts_4": 1059, |
|
"eval_exact_match": 0.0309, |
|
"eval_f1": 0.4174, |
|
"eval_gen_len": 13.7205, |
|
"eval_loss": 1.367233395576477, |
|
"eval_precisions_1": 50.2182, |
|
"eval_precisions_2": 23.5987, |
|
"eval_precisions_3": 14.1913, |
|
"eval_precisions_4": 9.0358, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4254, |
|
"eval_rouge2": 0.2348, |
|
"eval_rougeL": 0.4106, |
|
"eval_rougeLsum": 0.4107, |
|
"eval_runtime": 489.8628, |
|
"eval_samples_per_second": 4.499, |
|
"eval_steps_per_second": 1.125, |
|
"eval_sys_len": 18332, |
|
"eval_totals_1": 18332, |
|
"eval_totals_2": 16128, |
|
"eval_totals_3": 13924, |
|
"eval_totals_4": 11720, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9785, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"eval_bleu": 16.8234, |
|
"eval_bp": 0.8438, |
|
"eval_counts_1": 9180, |
|
"eval_counts_2": 3796, |
|
"eval_counts_3": 1973, |
|
"eval_counts_4": 1059, |
|
"eval_exact_match": 0.0327, |
|
"eval_f1": 0.4172, |
|
"eval_gen_len": 13.5113, |
|
"eval_loss": 1.381914496421814, |
|
"eval_precisions_1": 50.5395, |
|
"eval_precisions_2": 23.7845, |
|
"eval_precisions_3": 14.3428, |
|
"eval_precisions_4": 9.1672, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4254, |
|
"eval_rouge2": 0.2344, |
|
"eval_rougeL": 0.4116, |
|
"eval_rougeLsum": 0.4117, |
|
"eval_runtime": 465.8344, |
|
"eval_samples_per_second": 4.731, |
|
"eval_steps_per_second": 1.183, |
|
"eval_sys_len": 18164, |
|
"eval_totals_1": 18164, |
|
"eval_totals_2": 15960, |
|
"eval_totals_3": 13756, |
|
"eval_totals_4": 11552, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"step": 1440, |
|
"total_flos": 4.419252384883016e+17, |
|
"train_loss": 1.7299000342686972, |
|
"train_runtime": 27815.7883, |
|
"train_samples_per_second": 6.697, |
|
"train_steps_per_second": 0.052 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1440, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 4.419252384883016e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|