{ "best_metric": 37.6244, "best_model_checkpoint": "./save/multi_news_longt5_base_weighted_ce/checkpoint-32000", "epoch": 3.0, "global_step": 33723, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 9.851733238442606e-05, "loss": 5.4403, "step": 500 }, { "epoch": 0.09, "learning_rate": 9.703466476885213e-05, "loss": 4.2866, "step": 1000 }, { "epoch": 0.09, "eval_block_avg": 0.0, "eval_gen_len": 425.48674613058176, "eval_loss": 2.7076995372772217, "eval_rouge1": 23.4899, "eval_rouge2": 6.1612, "eval_rougeL": 15.6671, "eval_rougeLsum": 20.2981, "eval_runtime": 3496.9942, "eval_samples_per_second": 1.607, "eval_steps_per_second": 0.05, "step": 1000 }, { "epoch": 0.13, "learning_rate": 9.555199715327818e-05, "loss": 3.9482, "step": 1500 }, { "epoch": 0.18, "learning_rate": 9.406932953770424e-05, "loss": 3.7678, "step": 2000 }, { "epoch": 0.18, "eval_block_avg": 0.0, "eval_gen_len": 345.54794520547944, "eval_loss": 2.533926248550415, "eval_rouge1": 29.7909, "eval_rouge2": 9.3938, "eval_rougeL": 18.5202, "eval_rougeLsum": 26.3629, "eval_runtime": 3480.628, "eval_samples_per_second": 1.615, "eval_steps_per_second": 0.051, "step": 2000 }, { "epoch": 0.22, "learning_rate": 9.25866619221303e-05, "loss": 3.6022, "step": 2500 }, { "epoch": 0.27, "learning_rate": 9.110399430655636e-05, "loss": 3.5226, "step": 3000 }, { "epoch": 0.27, "eval_block_avg": 0.0, "eval_gen_len": 404.8505603985056, "eval_loss": 2.408491849899292, "eval_rouge1": 30.3236, "eval_rouge2": 9.683, "eval_rougeL": 18.7128, "eval_rougeLsum": 26.801, "eval_runtime": 3552.594, "eval_samples_per_second": 1.582, "eval_steps_per_second": 0.05, "step": 3000 }, { "epoch": 0.31, "learning_rate": 8.962132669098243e-05, "loss": 3.4401, "step": 3500 }, { "epoch": 0.36, "learning_rate": 8.813865907540848e-05, "loss": 3.3831, "step": 4000 }, { "epoch": 0.36, "eval_block_avg": 0.0, "eval_gen_len": 285.1569115815691, "eval_loss": 2.3446125984191895, "eval_rouge1": 33.4103, "eval_rouge2": 11.0056, "eval_rougeL": 20.1061, "eval_rougeLsum": 29.8896, "eval_runtime": 3417.5986, "eval_samples_per_second": 1.645, "eval_steps_per_second": 0.051, "step": 4000 }, { "epoch": 0.4, "learning_rate": 8.665599145983454e-05, "loss": 3.2864, "step": 4500 }, { "epoch": 0.44, "learning_rate": 8.51733238442606e-05, "loss": 3.2544, "step": 5000 }, { "epoch": 0.44, "eval_block_avg": 0.0, "eval_gen_len": 287.08663938800925, "eval_loss": 2.2861385345458984, "eval_rouge1": 34.4828, "eval_rouge2": 11.5674, "eval_rougeL": 20.789, "eval_rougeLsum": 31.0001, "eval_runtime": 3421.9145, "eval_samples_per_second": 1.643, "eval_steps_per_second": 0.051, "step": 5000 }, { "epoch": 0.49, "learning_rate": 8.369065622868666e-05, "loss": 3.2155, "step": 5500 }, { "epoch": 0.53, "learning_rate": 8.220798861311271e-05, "loss": 3.1714, "step": 6000 }, { "epoch": 0.53, "eval_block_avg": 0.0, "eval_gen_len": 294.1647393702188, "eval_loss": 2.2531518936157227, "eval_rouge1": 35.2859, "eval_rouge2": 12.0002, "eval_rougeL": 20.8964, "eval_rougeLsum": 31.7411, "eval_runtime": 3423.9736, "eval_samples_per_second": 1.642, "eval_steps_per_second": 0.051, "step": 6000 }, { "epoch": 0.58, "learning_rate": 8.072532099753878e-05, "loss": 3.1413, "step": 6500 }, { "epoch": 0.62, "learning_rate": 7.924265338196484e-05, "loss": 3.1154, "step": 7000 }, { "epoch": 0.62, "eval_block_avg": 0.0, "eval_gen_len": 310.6292474648639, "eval_loss": 2.2275736331939697, "eval_rouge1": 35.8542, "eval_rouge2": 12.3243, "eval_rougeL": 21.1165, "eval_rougeLsum": 32.2099, "eval_runtime": 3465.1719, "eval_samples_per_second": 1.622, "eval_steps_per_second": 0.051, "step": 7000 }, { "epoch": 0.67, "learning_rate": 7.775998576639089e-05, "loss": 3.0784, "step": 7500 }, { "epoch": 0.71, "learning_rate": 7.627731815081696e-05, "loss": 3.0757, "step": 8000 }, { "epoch": 0.71, "eval_block_avg": 0.0, "eval_gen_len": 203.26721223981497, "eval_loss": 2.194120407104492, "eval_rouge1": 36.9629, "eval_rouge2": 13.0355, "eval_rougeL": 21.6003, "eval_rougeLsum": 33.5146, "eval_runtime": 3112.0404, "eval_samples_per_second": 1.806, "eval_steps_per_second": 0.057, "step": 8000 }, { "epoch": 0.76, "learning_rate": 7.479465053524301e-05, "loss": 3.0279, "step": 8500 }, { "epoch": 0.8, "learning_rate": 7.331198291966908e-05, "loss": 3.0082, "step": 9000 }, { "epoch": 0.8, "eval_block_avg": 0.0, "eval_gen_len": 225.13271659847, "eval_loss": 2.182365655899048, "eval_rouge1": 36.8991, "eval_rouge2": 12.9026, "eval_rougeL": 21.6323, "eval_rougeLsum": 33.4198, "eval_runtime": 3290.7301, "eval_samples_per_second": 1.708, "eval_steps_per_second": 0.053, "step": 9000 }, { "epoch": 0.85, "learning_rate": 7.182931530409514e-05, "loss": 3.0088, "step": 9500 }, { "epoch": 0.89, "learning_rate": 7.034664768852119e-05, "loss": 2.9619, "step": 10000 }, { "epoch": 0.89, "eval_block_avg": 0.0, "eval_gen_len": 249.00177904287494, "eval_loss": 2.1777162551879883, "eval_rouge1": 38.0417, "eval_rouge2": 13.2345, "eval_rougeL": 21.668, "eval_rougeLsum": 34.3626, "eval_runtime": 3366.257, "eval_samples_per_second": 1.67, "eval_steps_per_second": 0.052, "step": 10000 }, { "epoch": 0.93, "learning_rate": 6.886398007294726e-05, "loss": 2.9599, "step": 10500 }, { "epoch": 0.98, "learning_rate": 6.738131245737331e-05, "loss": 2.9378, "step": 11000 }, { "epoch": 0.98, "eval_block_avg": 0.0, "eval_gen_len": 259.15815691158156, "eval_loss": 2.151700496673584, "eval_rouge1": 38.0194, "eval_rouge2": 13.365, "eval_rougeL": 21.9451, "eval_rougeLsum": 34.4388, "eval_runtime": 3366.4307, "eval_samples_per_second": 1.67, "eval_steps_per_second": 0.052, "step": 11000 }, { "epoch": 1.02, "learning_rate": 6.589864484179937e-05, "loss": 2.911, "step": 11500 }, { "epoch": 1.07, "learning_rate": 6.441597722622544e-05, "loss": 2.8819, "step": 12000 }, { "epoch": 1.07, "eval_block_avg": 0.0, "eval_gen_len": 238.5306884895926, "eval_loss": 2.1349971294403076, "eval_rouge1": 38.6339, "eval_rouge2": 13.5411, "eval_rougeL": 22.0768, "eval_rougeLsum": 35.0001, "eval_runtime": 3302.6091, "eval_samples_per_second": 1.702, "eval_steps_per_second": 0.053, "step": 12000 }, { "epoch": 1.11, "learning_rate": 6.293330961065149e-05, "loss": 2.876, "step": 12500 }, { "epoch": 1.16, "learning_rate": 6.145064199507754e-05, "loss": 2.8647, "step": 13000 }, { "epoch": 1.16, "eval_block_avg": 0.0, "eval_gen_len": 249.58797367016544, "eval_loss": 2.135904550552368, "eval_rouge1": 39.6584, "eval_rouge2": 14.1798, "eval_rougeL": 22.2953, "eval_rougeLsum": 35.9444, "eval_runtime": 3354.399, "eval_samples_per_second": 1.676, "eval_steps_per_second": 0.052, "step": 13000 }, { "epoch": 1.2, "learning_rate": 5.996797437950361e-05, "loss": 2.8483, "step": 13500 }, { "epoch": 1.25, "learning_rate": 5.848530676392967e-05, "loss": 2.8431, "step": 14000 }, { "epoch": 1.25, "eval_block_avg": 0.0, "eval_gen_len": 246.02045899306174, "eval_loss": 2.119384288787842, "eval_rouge1": 39.2036, "eval_rouge2": 13.9567, "eval_rougeL": 22.2848, "eval_rougeLsum": 35.6373, "eval_runtime": 3329.6362, "eval_samples_per_second": 1.688, "eval_steps_per_second": 0.053, "step": 14000 }, { "epoch": 1.29, "learning_rate": 5.700263914835573e-05, "loss": 2.834, "step": 14500 }, { "epoch": 1.33, "learning_rate": 5.551997153278178e-05, "loss": 2.8489, "step": 15000 }, { "epoch": 1.33, "eval_block_avg": 0.0, "eval_gen_len": 273.44209215442095, "eval_loss": 2.114163875579834, "eval_rouge1": 39.0102, "eval_rouge2": 13.665, "eval_rougeL": 22.2047, "eval_rougeLsum": 35.3681, "eval_runtime": 3369.9399, "eval_samples_per_second": 1.668, "eval_steps_per_second": 0.052, "step": 15000 }, { "epoch": 1.38, "learning_rate": 5.403730391720784e-05, "loss": 2.8122, "step": 15500 }, { "epoch": 1.42, "learning_rate": 5.25546363016339e-05, "loss": 2.7927, "step": 16000 }, { "epoch": 1.42, "eval_block_avg": 0.0, "eval_gen_len": 244.72140188578544, "eval_loss": 2.1085472106933594, "eval_rouge1": 39.9059, "eval_rouge2": 14.4122, "eval_rougeL": 22.5028, "eval_rougeLsum": 36.2593, "eval_runtime": 3298.5033, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.053, "step": 16000 }, { "epoch": 1.47, "learning_rate": 5.1071968686059954e-05, "loss": 2.8072, "step": 16500 }, { "epoch": 1.51, "learning_rate": 4.958930107048602e-05, "loss": 2.8051, "step": 17000 }, { "epoch": 1.51, "eval_block_avg": 0.0, "eval_gen_len": 230.1517523572318, "eval_loss": 2.1006970405578613, "eval_rouge1": 39.3406, "eval_rouge2": 13.9501, "eval_rougeL": 22.2845, "eval_rougeLsum": 35.8231, "eval_runtime": 3168.0344, "eval_samples_per_second": 1.774, "eval_steps_per_second": 0.056, "step": 17000 }, { "epoch": 1.56, "learning_rate": 4.810663345491208e-05, "loss": 2.8069, "step": 17500 }, { "epoch": 1.6, "learning_rate": 4.662396583933814e-05, "loss": 2.7997, "step": 18000 }, { "epoch": 1.6, "eval_block_avg": 0.0, "eval_gen_len": 266.1441024728696, "eval_loss": 2.0949618816375732, "eval_rouge1": 39.4434, "eval_rouge2": 14.046, "eval_rougeL": 22.3371, "eval_rougeLsum": 35.8579, "eval_runtime": 3355.3929, "eval_samples_per_second": 1.675, "eval_steps_per_second": 0.052, "step": 18000 }, { "epoch": 1.65, "learning_rate": 4.51412982237642e-05, "loss": 2.766, "step": 18500 }, { "epoch": 1.69, "learning_rate": 4.365863060819026e-05, "loss": 2.7894, "step": 19000 }, { "epoch": 1.69, "eval_block_avg": 0.0, "eval_gen_len": 237.53833837395482, "eval_loss": 2.0886101722717285, "eval_rouge1": 40.6408, "eval_rouge2": 14.9391, "eval_rougeL": 22.8721, "eval_rougeLsum": 37.0402, "eval_runtime": 3398.8421, "eval_samples_per_second": 1.654, "eval_steps_per_second": 0.052, "step": 19000 }, { "epoch": 1.73, "learning_rate": 4.217596299261632e-05, "loss": 2.7678, "step": 19500 }, { "epoch": 1.78, "learning_rate": 4.0693295377042376e-05, "loss": 2.7881, "step": 20000 }, { "epoch": 1.78, "eval_block_avg": 0.0, "eval_gen_len": 238.2777085927771, "eval_loss": 2.082338809967041, "eval_rouge1": 40.1463, "eval_rouge2": 14.5452, "eval_rougeL": 22.6645, "eval_rougeLsum": 36.568, "eval_runtime": 3243.3567, "eval_samples_per_second": 1.733, "eval_steps_per_second": 0.054, "step": 20000 }, { "epoch": 1.82, "learning_rate": 3.921062776146844e-05, "loss": 2.7382, "step": 20500 }, { "epoch": 1.87, "learning_rate": 3.77279601458945e-05, "loss": 2.7694, "step": 21000 }, { "epoch": 1.87, "eval_block_avg": 0.0, "eval_gen_len": 242.4057996797723, "eval_loss": 2.0775482654571533, "eval_rouge1": 39.9889, "eval_rouge2": 14.4044, "eval_rougeL": 22.5574, "eval_rougeLsum": 36.3926, "eval_runtime": 3265.4881, "eval_samples_per_second": 1.721, "eval_steps_per_second": 0.054, "step": 21000 }, { "epoch": 1.91, "learning_rate": 3.624529253032056e-05, "loss": 2.7371, "step": 21500 }, { "epoch": 1.96, "learning_rate": 3.4762624914746615e-05, "loss": 2.7382, "step": 22000 }, { "epoch": 1.96, "eval_block_avg": 0.0, "eval_gen_len": 243.57089485856608, "eval_loss": 2.0729196071624756, "eval_rouge1": 40.4506, "eval_rouge2": 14.6977, "eval_rougeL": 22.7451, "eval_rougeLsum": 36.9094, "eval_runtime": 3321.1737, "eval_samples_per_second": 1.692, "eval_steps_per_second": 0.053, "step": 22000 }, { "epoch": 2.0, "learning_rate": 3.327995729917267e-05, "loss": 2.7799, "step": 22500 }, { "epoch": 2.05, "learning_rate": 3.179728968359873e-05, "loss": 2.7137, "step": 23000 }, { "epoch": 2.05, "eval_block_avg": 0.0, "eval_gen_len": 238.74114926169722, "eval_loss": 2.0669145584106445, "eval_rouge1": 40.7645, "eval_rouge2": 14.864, "eval_rougeL": 22.8929, "eval_rougeLsum": 37.2304, "eval_runtime": 4021.2465, "eval_samples_per_second": 1.398, "eval_steps_per_second": 0.044, "step": 23000 }, { "epoch": 2.09, "learning_rate": 3.031462206802479e-05, "loss": 2.7336, "step": 23500 }, { "epoch": 2.14, "learning_rate": 2.883195445245085e-05, "loss": 2.7185, "step": 24000 }, { "epoch": 2.14, "eval_block_avg": 0.0, "eval_gen_len": 238.28713752001423, "eval_loss": 2.07470965385437, "eval_rouge1": 40.5652, "eval_rouge2": 14.7185, "eval_rougeL": 22.7403, "eval_rougeLsum": 36.9759, "eval_runtime": 3243.9209, "eval_samples_per_second": 1.733, "eval_steps_per_second": 0.054, "step": 24000 }, { "epoch": 2.18, "learning_rate": 2.7349286836876908e-05, "loss": 2.715, "step": 24500 }, { "epoch": 2.22, "learning_rate": 2.586661922130297e-05, "loss": 2.7104, "step": 25000 }, { "epoch": 2.22, "eval_block_avg": 0.0, "eval_gen_len": 257.9969756271126, "eval_loss": 2.0688774585723877, "eval_rouge1": 41.0927, "eval_rouge2": 15.0971, "eval_rougeL": 22.9602, "eval_rougeLsum": 37.455, "eval_runtime": 3349.2799, "eval_samples_per_second": 1.678, "eval_steps_per_second": 0.053, "step": 25000 }, { "epoch": 2.27, "learning_rate": 2.438395160572903e-05, "loss": 2.7097, "step": 25500 }, { "epoch": 2.31, "learning_rate": 2.290128399015509e-05, "loss": 2.7038, "step": 26000 }, { "epoch": 2.31, "eval_block_avg": 0.0, "eval_gen_len": 245.09464508094646, "eval_loss": 2.0660908222198486, "eval_rouge1": 40.7524, "eval_rouge2": 14.7791, "eval_rougeL": 22.7677, "eval_rougeLsum": 37.1449, "eval_runtime": 3282.519, "eval_samples_per_second": 1.712, "eval_steps_per_second": 0.054, "step": 26000 }, { "epoch": 2.36, "learning_rate": 2.1418616374581146e-05, "loss": 2.7061, "step": 26500 }, { "epoch": 2.4, "learning_rate": 1.9935948759007204e-05, "loss": 2.6955, "step": 27000 }, { "epoch": 2.4, "eval_block_avg": 0.0, "eval_gen_len": 239.2723714641523, "eval_loss": 2.067803144454956, "eval_rouge1": 41.0961, "eval_rouge2": 15.0948, "eval_rougeL": 22.9771, "eval_rougeLsum": 37.4876, "eval_runtime": 3276.1425, "eval_samples_per_second": 1.716, "eval_steps_per_second": 0.054, "step": 27000 }, { "epoch": 2.45, "learning_rate": 1.8453281143433266e-05, "loss": 2.6783, "step": 27500 }, { "epoch": 2.49, "learning_rate": 1.6970613527859324e-05, "loss": 2.6906, "step": 28000 }, { "epoch": 2.49, "eval_block_avg": 0.0, "eval_gen_len": 249.8028820494574, "eval_loss": 2.0625176429748535, "eval_rouge1": 41.1383, "eval_rouge2": 14.9956, "eval_rougeL": 22.9293, "eval_rougeLsum": 37.5443, "eval_runtime": 3296.0562, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.053, "step": 28000 }, { "epoch": 2.54, "learning_rate": 1.5487945912285385e-05, "loss": 2.6802, "step": 28500 }, { "epoch": 2.58, "learning_rate": 1.4005278296711445e-05, "loss": 2.7057, "step": 29000 }, { "epoch": 2.58, "eval_block_avg": 0.0, "eval_gen_len": 238.6906244440491, "eval_loss": 2.060208559036255, "eval_rouge1": 41.1908, "eval_rouge2": 15.1675, "eval_rougeL": 23.0798, "eval_rougeLsum": 37.5906, "eval_runtime": 3211.8848, "eval_samples_per_second": 1.75, "eval_steps_per_second": 0.055, "step": 29000 }, { "epoch": 2.62, "learning_rate": 1.2522610681137503e-05, "loss": 2.6806, "step": 29500 }, { "epoch": 2.67, "learning_rate": 1.1039943065563562e-05, "loss": 2.6925, "step": 30000 }, { "epoch": 2.67, "eval_block_avg": 0.0, "eval_gen_len": 237.07045009784736, "eval_loss": 2.0573508739471436, "eval_rouge1": 40.9739, "eval_rouge2": 14.8989, "eval_rougeL": 22.8636, "eval_rougeLsum": 37.3584, "eval_runtime": 3212.5536, "eval_samples_per_second": 1.75, "eval_steps_per_second": 0.055, "step": 30000 }, { "epoch": 2.71, "learning_rate": 9.557275449989622e-06, "loss": 2.6881, "step": 30500 }, { "epoch": 2.76, "learning_rate": 8.074607834415681e-06, "loss": 2.6629, "step": 31000 }, { "epoch": 2.76, "eval_block_avg": 0.0, "eval_gen_len": 236.07898950364705, "eval_loss": 2.0589113235473633, "eval_rouge1": 41.2017, "eval_rouge2": 15.1175, "eval_rougeL": 22.9679, "eval_rougeLsum": 37.599, "eval_runtime": 3258.1795, "eval_samples_per_second": 1.725, "eval_steps_per_second": 0.054, "step": 31000 }, { "epoch": 2.8, "learning_rate": 6.59194021884174e-06, "loss": 2.65, "step": 31500 }, { "epoch": 2.85, "learning_rate": 5.1092726032678e-06, "loss": 2.6899, "step": 32000 }, { "epoch": 2.85, "eval_block_avg": 0.0, "eval_gen_len": 237.81409001956948, "eval_loss": 2.058128595352173, "eval_rouge1": 41.2137, "eval_rouge2": 15.15, "eval_rougeL": 23.0369, "eval_rougeLsum": 37.6244, "eval_runtime": 3226.4024, "eval_samples_per_second": 1.742, "eval_steps_per_second": 0.055, "step": 32000 }, { "epoch": 2.89, "learning_rate": 3.6266049876938586e-06, "loss": 2.6844, "step": 32500 }, { "epoch": 2.94, "learning_rate": 2.1439373721199182e-06, "loss": 2.6685, "step": 33000 }, { "epoch": 2.94, "eval_block_avg": 0.0, "eval_gen_len": 239.2396370752535, "eval_loss": 2.0559887886047363, "eval_rouge1": 41.2047, "eval_rouge2": 15.0825, "eval_rougeL": 22.9882, "eval_rougeLsum": 37.5913, "eval_runtime": 3189.4588, "eval_samples_per_second": 1.762, "eval_steps_per_second": 0.055, "step": 33000 }, { "epoch": 2.98, "learning_rate": 6.612697565459775e-07, "loss": 2.6767, "step": 33500 }, { "epoch": 3.0, "step": 33723, "total_flos": 3.64032524422656e+17, "train_loss": 2.960794737135194, "train_runtime": 148191.3733, "train_samples_per_second": 0.91, "train_steps_per_second": 0.228 } ], "max_steps": 33723, "num_train_epochs": 3, "total_flos": 3.64032524422656e+17, "trial_name": null, "trial_params": null }