{ "best_metric": 2.0570528507232666, "best_model_checkpoint": "t5-base-snl/checkpoint-2890", "epoch": 19.0, "global_step": 3230, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15, "learning_rate": 4.9632352941176476e-05, "loss": 4.9792, "step": 25 }, { "epoch": 0.29, "learning_rate": 4.9264705882352944e-05, "loss": 3.7166, "step": 50 }, { "epoch": 0.44, "learning_rate": 4.889705882352941e-05, "loss": 3.2528, "step": 75 }, { "epoch": 0.59, "learning_rate": 4.8529411764705885e-05, "loss": 3.0823, "step": 100 }, { "epoch": 0.74, "learning_rate": 4.816176470588236e-05, "loss": 3.0381, "step": 125 }, { "epoch": 0.88, "learning_rate": 4.7794117647058826e-05, "loss": 2.9943, "step": 150 }, { "epoch": 1.0, "eval_gen_len": 18.976800976800977, "eval_loss": 2.2042253017425537, "eval_rouge1": 28.1135, "eval_rouge2": 13.7477, "eval_rougeL": 25.4842, "eval_rougeLsum": 26.6467, "eval_runtime": 22.885, "eval_samples_per_second": 35.788, "eval_steps_per_second": 2.272, "step": 170 }, { "epoch": 1.03, "learning_rate": 4.742647058823529e-05, "loss": 2.9188, "step": 175 }, { "epoch": 1.18, "learning_rate": 4.705882352941177e-05, "loss": 2.8824, "step": 200 }, { "epoch": 1.32, "learning_rate": 4.669117647058824e-05, "loss": 2.8751, "step": 225 }, { "epoch": 1.47, "learning_rate": 4.632352941176471e-05, "loss": 2.8037, "step": 250 }, { "epoch": 1.62, "learning_rate": 4.5955882352941176e-05, "loss": 2.7824, "step": 275 }, { "epoch": 1.76, "learning_rate": 4.558823529411765e-05, "loss": 2.7789, "step": 300 }, { "epoch": 1.91, "learning_rate": 4.522058823529412e-05, "loss": 2.7955, "step": 325 }, { "epoch": 2.0, "eval_gen_len": 18.985347985347985, "eval_loss": 2.1561412811279297, "eval_rouge1": 28.5159, "eval_rouge2": 14.3492, "eval_rougeL": 26.0596, "eval_rougeLsum": 27.2431, "eval_runtime": 23.1378, "eval_samples_per_second": 35.397, "eval_steps_per_second": 2.247, "step": 340 }, { "epoch": 2.06, "learning_rate": 4.485294117647059e-05, "loss": 2.7598, "step": 350 }, { "epoch": 2.21, "learning_rate": 4.448529411764706e-05, "loss": 2.7091, "step": 375 }, { "epoch": 2.35, "learning_rate": 4.411764705882353e-05, "loss": 2.7055, "step": 400 }, { "epoch": 2.5, "learning_rate": 4.375e-05, "loss": 2.7163, "step": 425 }, { "epoch": 2.65, "learning_rate": 4.3382352941176474e-05, "loss": 2.6844, "step": 450 }, { "epoch": 2.79, "learning_rate": 4.301470588235295e-05, "loss": 2.686, "step": 475 }, { "epoch": 2.94, "learning_rate": 4.2647058823529415e-05, "loss": 2.6378, "step": 500 }, { "epoch": 3.0, "eval_gen_len": 18.99145299145299, "eval_loss": 2.130974531173706, "eval_rouge1": 28.9554, "eval_rouge2": 14.6901, "eval_rougeL": 26.4208, "eval_rougeLsum": 27.5523, "eval_runtime": 23.1305, "eval_samples_per_second": 35.408, "eval_steps_per_second": 2.248, "step": 510 }, { "epoch": 3.09, "learning_rate": 4.227941176470588e-05, "loss": 2.6666, "step": 525 }, { "epoch": 3.24, "learning_rate": 4.1911764705882356e-05, "loss": 2.6372, "step": 550 }, { "epoch": 3.38, "learning_rate": 4.154411764705883e-05, "loss": 2.6506, "step": 575 }, { "epoch": 3.53, "learning_rate": 4.11764705882353e-05, "loss": 2.6104, "step": 600 }, { "epoch": 3.68, "learning_rate": 4.0808823529411765e-05, "loss": 2.5946, "step": 625 }, { "epoch": 3.82, "learning_rate": 4.044117647058824e-05, "loss": 2.6182, "step": 650 }, { "epoch": 3.97, "learning_rate": 4.007352941176471e-05, "loss": 2.5962, "step": 675 }, { "epoch": 4.0, "eval_gen_len": 18.99145299145299, "eval_loss": 2.1109659671783447, "eval_rouge1": 29.381, "eval_rouge2": 15.1503, "eval_rougeL": 26.8406, "eval_rougeLsum": 27.9653, "eval_runtime": 23.0996, "eval_samples_per_second": 35.455, "eval_steps_per_second": 2.251, "step": 680 }, { "epoch": 4.12, "learning_rate": 3.970588235294117e-05, "loss": 2.5478, "step": 700 }, { "epoch": 4.26, "learning_rate": 3.933823529411765e-05, "loss": 2.5601, "step": 725 }, { "epoch": 4.41, "learning_rate": 3.897058823529412e-05, "loss": 2.5793, "step": 750 }, { "epoch": 4.56, "learning_rate": 3.8602941176470595e-05, "loss": 2.5655, "step": 775 }, { "epoch": 4.71, "learning_rate": 3.8235294117647055e-05, "loss": 2.5686, "step": 800 }, { "epoch": 4.85, "learning_rate": 3.786764705882353e-05, "loss": 2.5704, "step": 825 }, { "epoch": 5.0, "learning_rate": 3.7500000000000003e-05, "loss": 2.5369, "step": 850 }, { "epoch": 5.0, "eval_gen_len": 18.996336996336996, "eval_loss": 2.1019859313964844, "eval_rouge1": 29.5767, "eval_rouge2": 15.2692, "eval_rougeL": 27.0113, "eval_rougeLsum": 28.1849, "eval_runtime": 22.9206, "eval_samples_per_second": 35.732, "eval_steps_per_second": 2.269, "step": 850 }, { "epoch": 5.15, "learning_rate": 3.713235294117647e-05, "loss": 2.5257, "step": 875 }, { "epoch": 5.29, "learning_rate": 3.6764705882352945e-05, "loss": 2.5294, "step": 900 }, { "epoch": 5.44, "learning_rate": 3.639705882352941e-05, "loss": 2.5188, "step": 925 }, { "epoch": 5.59, "learning_rate": 3.6029411764705886e-05, "loss": 2.5164, "step": 950 }, { "epoch": 5.74, "learning_rate": 3.566176470588235e-05, "loss": 2.4973, "step": 975 }, { "epoch": 5.88, "learning_rate": 3.529411764705883e-05, "loss": 2.5103, "step": 1000 }, { "epoch": 6.0, "eval_gen_len": 18.996336996336996, "eval_loss": 2.090707302093506, "eval_rouge1": 29.6354, "eval_rouge2": 15.434, "eval_rougeL": 27.0893, "eval_rougeLsum": 28.2703, "eval_runtime": 22.9931, "eval_samples_per_second": 35.619, "eval_steps_per_second": 2.262, "step": 1020 }, { "epoch": 6.03, "learning_rate": 3.4926470588235294e-05, "loss": 2.4817, "step": 1025 }, { "epoch": 6.18, "learning_rate": 3.455882352941177e-05, "loss": 2.4662, "step": 1050 }, { "epoch": 6.32, "learning_rate": 3.4191176470588236e-05, "loss": 2.4879, "step": 1075 }, { "epoch": 6.47, "learning_rate": 3.382352941176471e-05, "loss": 2.4666, "step": 1100 }, { "epoch": 6.62, "learning_rate": 3.345588235294118e-05, "loss": 2.4908, "step": 1125 }, { "epoch": 6.76, "learning_rate": 3.308823529411765e-05, "loss": 2.4887, "step": 1150 }, { "epoch": 6.91, "learning_rate": 3.272058823529412e-05, "loss": 2.4524, "step": 1175 }, { "epoch": 7.0, "eval_gen_len": 18.996336996336996, "eval_loss": 2.0839579105377197, "eval_rouge1": 29.7812, "eval_rouge2": 15.4963, "eval_rougeL": 27.2779, "eval_rougeLsum": 28.385, "eval_runtime": 23.0064, "eval_samples_per_second": 35.599, "eval_steps_per_second": 2.26, "step": 1190 }, { "epoch": 7.06, "learning_rate": 3.235294117647059e-05, "loss": 2.4526, "step": 1200 }, { "epoch": 7.21, "learning_rate": 3.198529411764706e-05, "loss": 2.4316, "step": 1225 }, { "epoch": 7.35, "learning_rate": 3.161764705882353e-05, "loss": 2.4511, "step": 1250 }, { "epoch": 7.5, "learning_rate": 3.125e-05, "loss": 2.4642, "step": 1275 }, { "epoch": 7.65, "learning_rate": 3.0882352941176475e-05, "loss": 2.4387, "step": 1300 }, { "epoch": 7.79, "learning_rate": 3.0514705882352945e-05, "loss": 2.477, "step": 1325 }, { "epoch": 7.94, "learning_rate": 3.0147058823529413e-05, "loss": 2.4472, "step": 1350 }, { "epoch": 8.0, "eval_gen_len": 18.996336996336996, "eval_loss": 2.0799622535705566, "eval_rouge1": 29.6011, "eval_rouge2": 15.5138, "eval_rougeL": 27.1381, "eval_rougeLsum": 28.2799, "eval_runtime": 22.9827, "eval_samples_per_second": 35.636, "eval_steps_per_second": 2.263, "step": 1360 }, { "epoch": 8.09, "learning_rate": 2.9779411764705883e-05, "loss": 2.4296, "step": 1375 }, { "epoch": 8.24, "learning_rate": 2.9411764705882354e-05, "loss": 2.4109, "step": 1400 }, { "epoch": 8.38, "learning_rate": 2.9044117647058828e-05, "loss": 2.4181, "step": 1425 }, { "epoch": 8.53, "learning_rate": 2.8676470588235295e-05, "loss": 2.4089, "step": 1450 }, { "epoch": 8.68, "learning_rate": 2.8308823529411766e-05, "loss": 2.4518, "step": 1475 }, { "epoch": 8.82, "learning_rate": 2.7941176470588236e-05, "loss": 2.4271, "step": 1500 }, { "epoch": 8.97, "learning_rate": 2.757352941176471e-05, "loss": 2.4089, "step": 1525 }, { "epoch": 9.0, "eval_gen_len": 18.996336996336996, "eval_loss": 2.075223207473755, "eval_rouge1": 29.7647, "eval_rouge2": 15.6183, "eval_rougeL": 27.318, "eval_rougeLsum": 28.4747, "eval_runtime": 22.8902, "eval_samples_per_second": 35.779, "eval_steps_per_second": 2.272, "step": 1530 }, { "epoch": 9.12, "learning_rate": 2.7205882352941174e-05, "loss": 2.4048, "step": 1550 }, { "epoch": 9.26, "learning_rate": 2.6838235294117648e-05, "loss": 2.4132, "step": 1575 }, { "epoch": 9.41, "learning_rate": 2.647058823529412e-05, "loss": 2.3885, "step": 1600 }, { "epoch": 9.56, "learning_rate": 2.6102941176470593e-05, "loss": 2.4007, "step": 1625 }, { "epoch": 9.71, "learning_rate": 2.5735294117647057e-05, "loss": 2.4089, "step": 1650 }, { "epoch": 9.85, "learning_rate": 2.536764705882353e-05, "loss": 2.3912, "step": 1675 }, { "epoch": 10.0, "learning_rate": 2.5e-05, "loss": 2.4011, "step": 1700 }, { "epoch": 10.0, "eval_gen_len": 19.0, "eval_loss": 2.071033239364624, "eval_rouge1": 29.6533, "eval_rouge2": 15.5536, "eval_rougeL": 27.2687, "eval_rougeLsum": 28.4457, "eval_runtime": 23.0214, "eval_samples_per_second": 35.576, "eval_steps_per_second": 2.259, "step": 1700 }, { "epoch": 10.15, "learning_rate": 2.4632352941176472e-05, "loss": 2.4049, "step": 1725 }, { "epoch": 10.29, "learning_rate": 2.4264705882352942e-05, "loss": 2.3802, "step": 1750 }, { "epoch": 10.44, "learning_rate": 2.3897058823529413e-05, "loss": 2.3688, "step": 1775 }, { "epoch": 10.59, "learning_rate": 2.3529411764705884e-05, "loss": 2.3897, "step": 1800 }, { "epoch": 10.74, "learning_rate": 2.3161764705882354e-05, "loss": 2.3464, "step": 1825 }, { "epoch": 10.88, "learning_rate": 2.2794117647058825e-05, "loss": 2.3792, "step": 1850 }, { "epoch": 11.0, "eval_gen_len": 19.0, "eval_loss": 2.0655674934387207, "eval_rouge1": 29.8668, "eval_rouge2": 15.6931, "eval_rougeL": 27.4208, "eval_rougeLsum": 28.5477, "eval_runtime": 21.951, "eval_samples_per_second": 37.31, "eval_steps_per_second": 2.369, "step": 1870 }, { "epoch": 11.03, "learning_rate": 2.2426470588235296e-05, "loss": 2.3783, "step": 1875 }, { "epoch": 11.18, "learning_rate": 2.2058823529411766e-05, "loss": 2.3446, "step": 1900 }, { "epoch": 11.32, "learning_rate": 2.1691176470588237e-05, "loss": 2.3929, "step": 1925 }, { "epoch": 11.47, "learning_rate": 2.1323529411764707e-05, "loss": 2.374, "step": 1950 }, { "epoch": 11.62, "learning_rate": 2.0955882352941178e-05, "loss": 2.3544, "step": 1975 }, { "epoch": 11.76, "learning_rate": 2.058823529411765e-05, "loss": 2.357, "step": 2000 }, { "epoch": 11.91, "learning_rate": 2.022058823529412e-05, "loss": 2.3588, "step": 2025 }, { "epoch": 12.0, "eval_gen_len": 18.996336996336996, "eval_loss": 2.0634803771972656, "eval_rouge1": 29.8378, "eval_rouge2": 15.682, "eval_rougeL": 27.4635, "eval_rougeLsum": 28.5803, "eval_runtime": 22.98, "eval_samples_per_second": 35.64, "eval_steps_per_second": 2.263, "step": 2040 }, { "epoch": 12.06, "learning_rate": 1.9852941176470586e-05, "loss": 2.3503, "step": 2050 }, { "epoch": 12.21, "learning_rate": 1.948529411764706e-05, "loss": 2.3402, "step": 2075 }, { "epoch": 12.35, "learning_rate": 1.9117647058823528e-05, "loss": 2.3716, "step": 2100 }, { "epoch": 12.5, "learning_rate": 1.8750000000000002e-05, "loss": 2.3161, "step": 2125 }, { "epoch": 12.65, "learning_rate": 1.8382352941176472e-05, "loss": 2.3354, "step": 2150 }, { "epoch": 12.79, "learning_rate": 1.8014705882352943e-05, "loss": 2.3476, "step": 2175 }, { "epoch": 12.94, "learning_rate": 1.7647058823529414e-05, "loss": 2.3397, "step": 2200 }, { "epoch": 13.0, "eval_gen_len": 19.0, "eval_loss": 2.0630440711975098, "eval_rouge1": 29.9043, "eval_rouge2": 15.7535, "eval_rougeL": 27.5065, "eval_rougeLsum": 28.6539, "eval_runtime": 22.9094, "eval_samples_per_second": 35.75, "eval_steps_per_second": 2.27, "step": 2210 }, { "epoch": 13.09, "learning_rate": 1.7279411764705884e-05, "loss": 2.3399, "step": 2225 }, { "epoch": 13.24, "learning_rate": 1.6911764705882355e-05, "loss": 2.3207, "step": 2250 }, { "epoch": 13.38, "learning_rate": 1.6544117647058825e-05, "loss": 2.3339, "step": 2275 }, { "epoch": 13.53, "learning_rate": 1.6176470588235296e-05, "loss": 2.3347, "step": 2300 }, { "epoch": 13.68, "learning_rate": 1.5808823529411763e-05, "loss": 2.3318, "step": 2325 }, { "epoch": 13.82, "learning_rate": 1.5441176470588237e-05, "loss": 2.3275, "step": 2350 }, { "epoch": 13.97, "learning_rate": 1.5073529411764706e-05, "loss": 2.3201, "step": 2375 }, { "epoch": 14.0, "eval_gen_len": 18.996336996336996, "eval_loss": 2.0599966049194336, "eval_rouge1": 29.7926, "eval_rouge2": 15.7077, "eval_rougeL": 27.4066, "eval_rougeLsum": 28.5302, "eval_runtime": 23.1182, "eval_samples_per_second": 35.427, "eval_steps_per_second": 2.249, "step": 2380 }, { "epoch": 14.12, "learning_rate": 1.4705882352941177e-05, "loss": 2.3204, "step": 2400 }, { "epoch": 14.26, "learning_rate": 1.4338235294117647e-05, "loss": 2.3592, "step": 2425 }, { "epoch": 14.41, "learning_rate": 1.3970588235294118e-05, "loss": 2.3275, "step": 2450 }, { "epoch": 14.56, "learning_rate": 1.3602941176470587e-05, "loss": 2.2936, "step": 2475 }, { "epoch": 14.71, "learning_rate": 1.323529411764706e-05, "loss": 2.3013, "step": 2500 }, { "epoch": 14.85, "learning_rate": 1.2867647058823528e-05, "loss": 2.3007, "step": 2525 }, { "epoch": 15.0, "learning_rate": 1.25e-05, "loss": 2.3241, "step": 2550 }, { "epoch": 15.0, "eval_gen_len": 19.0, "eval_loss": 2.0615200996398926, "eval_rouge1": 29.8536, "eval_rouge2": 15.7929, "eval_rougeL": 27.4572, "eval_rougeLsum": 28.5704, "eval_runtime": 22.9087, "eval_samples_per_second": 35.751, "eval_steps_per_second": 2.27, "step": 2550 }, { "epoch": 15.15, "learning_rate": 1.2132352941176471e-05, "loss": 2.326, "step": 2575 }, { "epoch": 15.29, "learning_rate": 1.1764705882352942e-05, "loss": 2.3004, "step": 2600 }, { "epoch": 15.44, "learning_rate": 1.1397058823529412e-05, "loss": 2.311, "step": 2625 }, { "epoch": 15.59, "learning_rate": 1.1029411764705883e-05, "loss": 2.3427, "step": 2650 }, { "epoch": 15.74, "learning_rate": 1.0661764705882354e-05, "loss": 2.2741, "step": 2675 }, { "epoch": 15.88, "learning_rate": 1.0294117647058824e-05, "loss": 2.3183, "step": 2700 }, { "epoch": 16.0, "eval_gen_len": 19.0, "eval_loss": 2.0573582649230957, "eval_rouge1": 29.7529, "eval_rouge2": 15.6729, "eval_rougeL": 27.3388, "eval_rougeLsum": 28.4678, "eval_runtime": 23.1299, "eval_samples_per_second": 35.409, "eval_steps_per_second": 2.248, "step": 2720 }, { "epoch": 16.03, "learning_rate": 9.926470588235293e-06, "loss": 2.2934, "step": 2725 }, { "epoch": 16.18, "learning_rate": 9.558823529411764e-06, "loss": 2.2633, "step": 2750 }, { "epoch": 16.32, "learning_rate": 9.191176470588236e-06, "loss": 2.2957, "step": 2775 }, { "epoch": 16.47, "learning_rate": 8.823529411764707e-06, "loss": 2.3083, "step": 2800 }, { "epoch": 16.62, "learning_rate": 8.455882352941177e-06, "loss": 2.3246, "step": 2825 }, { "epoch": 16.76, "learning_rate": 8.088235294117648e-06, "loss": 2.2989, "step": 2850 }, { "epoch": 16.91, "learning_rate": 7.720588235294119e-06, "loss": 2.3346, "step": 2875 }, { "epoch": 17.0, "eval_gen_len": 19.0, "eval_loss": 2.0570528507232666, "eval_rouge1": 29.7443, "eval_rouge2": 15.6459, "eval_rougeL": 27.3245, "eval_rougeLsum": 28.4549, "eval_runtime": 22.9331, "eval_samples_per_second": 35.713, "eval_steps_per_second": 2.267, "step": 2890 }, { "epoch": 17.06, "learning_rate": 7.3529411764705884e-06, "loss": 2.2887, "step": 2900 }, { "epoch": 17.21, "learning_rate": 6.985294117647059e-06, "loss": 2.2881, "step": 2925 }, { "epoch": 17.35, "learning_rate": 6.61764705882353e-06, "loss": 2.3062, "step": 2950 }, { "epoch": 17.5, "learning_rate": 6.25e-06, "loss": 2.2867, "step": 2975 }, { "epoch": 17.65, "learning_rate": 5.882352941176471e-06, "loss": 2.3056, "step": 3000 }, { "epoch": 17.79, "learning_rate": 5.5147058823529415e-06, "loss": 2.3098, "step": 3025 }, { "epoch": 17.94, "learning_rate": 5.147058823529412e-06, "loss": 2.2932, "step": 3050 }, { "epoch": 18.0, "eval_gen_len": 19.0, "eval_loss": 2.0577263832092285, "eval_rouge1": 29.7467, "eval_rouge2": 15.6717, "eval_rougeL": 27.3391, "eval_rougeLsum": 28.4541, "eval_runtime": 23.0624, "eval_samples_per_second": 35.512, "eval_steps_per_second": 2.255, "step": 3060 }, { "epoch": 18.09, "learning_rate": 4.779411764705882e-06, "loss": 2.2832, "step": 3075 }, { "epoch": 18.24, "learning_rate": 4.411764705882353e-06, "loss": 2.289, "step": 3100 }, { "epoch": 18.38, "learning_rate": 4.044117647058824e-06, "loss": 2.2932, "step": 3125 }, { "epoch": 18.53, "learning_rate": 3.6764705882352942e-06, "loss": 2.3085, "step": 3150 }, { "epoch": 18.68, "learning_rate": 3.308823529411765e-06, "loss": 2.2884, "step": 3175 }, { "epoch": 18.82, "learning_rate": 2.9411764705882355e-06, "loss": 2.2877, "step": 3200 }, { "epoch": 18.97, "learning_rate": 2.573529411764706e-06, "loss": 2.2755, "step": 3225 }, { "epoch": 19.0, "eval_gen_len": 19.0, "eval_loss": 2.0573978424072266, "eval_rouge1": 29.7694, "eval_rouge2": 15.6776, "eval_rougeL": 27.3556, "eval_rougeLsum": 28.4819, "eval_runtime": 22.951, "eval_samples_per_second": 35.685, "eval_steps_per_second": 2.266, "step": 3230 }, { "epoch": 19.0, "step": 3230, "total_flos": 2.285170027491492e+17, "train_loss": 2.4905450729393737, "train_runtime": 8327.4677, "train_samples_per_second": 26.116, "train_steps_per_second": 0.408 } ], "max_steps": 3400, "num_train_epochs": 20, "total_flos": 2.285170027491492e+17, "trial_name": null, "trial_params": null }