{ "best_metric": 1.341736078262329, "best_model_checkpoint": "./dual/flan-t5-base-dual/checkpoint-52010", "epoch": 10.0, "global_step": 52010, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 4.951932320707556e-05, "loss": 2.0525, "step": 500 }, { "epoch": 0.19, "learning_rate": 4.903864641415113e-05, "loss": 1.9359, "step": 1000 }, { "epoch": 0.29, "learning_rate": 4.855796962122669e-05, "loss": 1.892, "step": 1500 }, { "epoch": 0.38, "learning_rate": 4.807729282830225e-05, "loss": 1.8455, "step": 2000 }, { "epoch": 0.48, "learning_rate": 4.7596616035377816e-05, "loss": 1.8273, "step": 2500 }, { "epoch": 0.58, "learning_rate": 4.7115939242453377e-05, "loss": 1.8317, "step": 3000 }, { "epoch": 0.67, "learning_rate": 4.663526244952894e-05, "loss": 1.8103, "step": 3500 }, { "epoch": 0.77, "learning_rate": 4.61545856566045e-05, "loss": 1.8032, "step": 4000 }, { "epoch": 0.87, "learning_rate": 4.5673908863680064e-05, "loss": 1.7842, "step": 4500 }, { "epoch": 0.96, "learning_rate": 4.5193232070755624e-05, "loss": 1.7674, "step": 5000 }, { "epoch": 1.0, "eval_gen_len": 16.8, "eval_loss": 1.4587862491607666, "eval_rouge1": 43.5356, "eval_rouge2": 25.8338, "eval_rougeL": 41.1764, "eval_rougeLsum": 41.816, "eval_runtime": 629.4032, "eval_samples_per_second": 16.524, "eval_steps_per_second": 1.033, "step": 5201 }, { "epoch": 1.06, "learning_rate": 4.4712555277831184e-05, "loss": 1.7405, "step": 5500 }, { "epoch": 1.15, "learning_rate": 4.423187848490675e-05, "loss": 1.7135, "step": 6000 }, { "epoch": 1.25, "learning_rate": 4.375120169198231e-05, "loss": 1.7161, "step": 6500 }, { "epoch": 1.35, "learning_rate": 4.327052489905787e-05, "loss": 1.7208, "step": 7000 }, { "epoch": 1.44, "learning_rate": 4.278984810613344e-05, "loss": 1.6999, "step": 7500 }, { "epoch": 1.54, "learning_rate": 4.2309171313209e-05, "loss": 1.694, "step": 8000 }, { "epoch": 1.63, "learning_rate": 4.182849452028456e-05, "loss": 1.6837, "step": 8500 }, { "epoch": 1.73, "learning_rate": 4.1347817727360125e-05, "loss": 1.6892, "step": 9000 }, { "epoch": 1.83, "learning_rate": 4.0867140934435685e-05, "loss": 1.6811, "step": 9500 }, { "epoch": 1.92, "learning_rate": 4.038646414151125e-05, "loss": 1.7004, "step": 10000 }, { "epoch": 2.0, "eval_gen_len": 16.87192307692308, "eval_loss": 1.4108598232269287, "eval_rouge1": 44.1953, "eval_rouge2": 26.6443, "eval_rougeL": 41.7387, "eval_rougeLsum": 42.3745, "eval_runtime": 674.3077, "eval_samples_per_second": 15.423, "eval_steps_per_second": 0.964, "step": 10402 }, { "epoch": 2.02, "learning_rate": 3.990578734858681e-05, "loss": 1.6572, "step": 10500 }, { "epoch": 2.11, "learning_rate": 3.942511055566238e-05, "loss": 1.6449, "step": 11000 }, { "epoch": 2.21, "learning_rate": 3.894443376273794e-05, "loss": 1.6235, "step": 11500 }, { "epoch": 2.31, "learning_rate": 3.84637569698135e-05, "loss": 1.6573, "step": 12000 }, { "epoch": 2.4, "learning_rate": 3.7983080176889066e-05, "loss": 1.6262, "step": 12500 }, { "epoch": 2.5, "learning_rate": 3.7502403383964626e-05, "loss": 1.6377, "step": 13000 }, { "epoch": 2.6, "learning_rate": 3.7021726591040186e-05, "loss": 1.6407, "step": 13500 }, { "epoch": 2.69, "learning_rate": 3.654104979811575e-05, "loss": 1.6343, "step": 14000 }, { "epoch": 2.79, "learning_rate": 3.606037300519131e-05, "loss": 1.6221, "step": 14500 }, { "epoch": 2.88, "learning_rate": 3.557969621226687e-05, "loss": 1.6127, "step": 15000 }, { "epoch": 2.98, "learning_rate": 3.509901941934244e-05, "loss": 1.622, "step": 15500 }, { "epoch": 3.0, "eval_gen_len": 16.911923076923078, "eval_loss": 1.387160301208496, "eval_rouge1": 44.6617, "eval_rouge2": 27.2456, "eval_rougeL": 42.2185, "eval_rougeLsum": 42.8309, "eval_runtime": 678.3822, "eval_samples_per_second": 15.331, "eval_steps_per_second": 0.958, "step": 15603 }, { "epoch": 3.08, "learning_rate": 3.4618342626418e-05, "loss": 1.5886, "step": 16000 }, { "epoch": 3.17, "learning_rate": 3.413766583349356e-05, "loss": 1.5913, "step": 16500 }, { "epoch": 3.27, "learning_rate": 3.365698904056912e-05, "loss": 1.5693, "step": 17000 }, { "epoch": 3.36, "learning_rate": 3.317631224764469e-05, "loss": 1.5781, "step": 17500 }, { "epoch": 3.46, "learning_rate": 3.269563545472025e-05, "loss": 1.5944, "step": 18000 }, { "epoch": 3.56, "learning_rate": 3.221495866179581e-05, "loss": 1.5671, "step": 18500 }, { "epoch": 3.65, "learning_rate": 3.1734281868871374e-05, "loss": 1.5979, "step": 19000 }, { "epoch": 3.75, "learning_rate": 3.1253605075946935e-05, "loss": 1.6014, "step": 19500 }, { "epoch": 3.85, "learning_rate": 3.0772928283022495e-05, "loss": 1.5929, "step": 20000 }, { "epoch": 3.94, "learning_rate": 3.029225149009806e-05, "loss": 1.5822, "step": 20500 }, { "epoch": 4.0, "eval_gen_len": 16.976153846153846, "eval_loss": 1.3675929307937622, "eval_rouge1": 44.7885, "eval_rouge2": 27.4914, "eval_rougeL": 42.3527, "eval_rougeLsum": 42.9959, "eval_runtime": 672.7849, "eval_samples_per_second": 15.458, "eval_steps_per_second": 0.966, "step": 20804 }, { "epoch": 4.04, "learning_rate": 2.981157469717362e-05, "loss": 1.5658, "step": 21000 }, { "epoch": 4.13, "learning_rate": 2.9330897904249182e-05, "loss": 1.5656, "step": 21500 }, { "epoch": 4.23, "learning_rate": 2.885022111132475e-05, "loss": 1.5643, "step": 22000 }, { "epoch": 4.33, "learning_rate": 2.836954431840031e-05, "loss": 1.5484, "step": 22500 }, { "epoch": 4.42, "learning_rate": 2.788886752547587e-05, "loss": 1.5504, "step": 23000 }, { "epoch": 4.52, "learning_rate": 2.7408190732551436e-05, "loss": 1.5546, "step": 23500 }, { "epoch": 4.61, "learning_rate": 2.6927513939626996e-05, "loss": 1.556, "step": 24000 }, { "epoch": 4.71, "learning_rate": 2.644683714670256e-05, "loss": 1.5448, "step": 24500 }, { "epoch": 4.81, "learning_rate": 2.596616035377812e-05, "loss": 1.5519, "step": 25000 }, { "epoch": 4.9, "learning_rate": 2.5485483560853686e-05, "loss": 1.5606, "step": 25500 }, { "epoch": 5.0, "learning_rate": 2.5004806767929246e-05, "loss": 1.5541, "step": 26000 }, { "epoch": 5.0, "eval_gen_len": 17.028846153846153, "eval_loss": 1.3574897050857544, "eval_rouge1": 44.7589, "eval_rouge2": 27.4697, "eval_rougeL": 42.3549, "eval_rougeLsum": 42.9704, "eval_runtime": 665.8005, "eval_samples_per_second": 15.62, "eval_steps_per_second": 0.976, "step": 26005 }, { "epoch": 5.1, "learning_rate": 2.452412997500481e-05, "loss": 1.5154, "step": 26500 }, { "epoch": 5.19, "learning_rate": 2.404345318208037e-05, "loss": 1.5163, "step": 27000 }, { "epoch": 5.29, "learning_rate": 2.3562776389155933e-05, "loss": 1.5176, "step": 27500 }, { "epoch": 5.38, "learning_rate": 2.3082099596231497e-05, "loss": 1.5293, "step": 28000 }, { "epoch": 5.48, "learning_rate": 2.2601422803307057e-05, "loss": 1.5237, "step": 28500 }, { "epoch": 5.58, "learning_rate": 2.212074601038262e-05, "loss": 1.5422, "step": 29000 }, { "epoch": 5.67, "learning_rate": 2.164006921745818e-05, "loss": 1.5309, "step": 29500 }, { "epoch": 5.77, "learning_rate": 2.1159392424533744e-05, "loss": 1.5296, "step": 30000 }, { "epoch": 5.86, "learning_rate": 2.0678715631609308e-05, "loss": 1.5137, "step": 30500 }, { "epoch": 5.96, "learning_rate": 2.0198038838684868e-05, "loss": 1.5116, "step": 31000 }, { "epoch": 6.0, "eval_gen_len": 16.991923076923076, "eval_loss": 1.3511042594909668, "eval_rouge1": 45.0017, "eval_rouge2": 27.6906, "eval_rougeL": 42.5834, "eval_rougeLsum": 43.2073, "eval_runtime": 658.0426, "eval_samples_per_second": 15.804, "eval_steps_per_second": 0.988, "step": 31206 }, { "epoch": 6.06, "learning_rate": 1.971736204576043e-05, "loss": 1.5018, "step": 31500 }, { "epoch": 6.15, "learning_rate": 1.9236685252835995e-05, "loss": 1.5037, "step": 32000 }, { "epoch": 6.25, "learning_rate": 1.8756008459911555e-05, "loss": 1.4991, "step": 32500 }, { "epoch": 6.34, "learning_rate": 1.8275331666987118e-05, "loss": 1.4977, "step": 33000 }, { "epoch": 6.44, "learning_rate": 1.779465487406268e-05, "loss": 1.5024, "step": 33500 }, { "epoch": 6.54, "learning_rate": 1.7313978081138242e-05, "loss": 1.5043, "step": 34000 }, { "epoch": 6.63, "learning_rate": 1.6833301288213805e-05, "loss": 1.506, "step": 34500 }, { "epoch": 6.73, "learning_rate": 1.635262449528937e-05, "loss": 1.497, "step": 35000 }, { "epoch": 6.83, "learning_rate": 1.5871947702364932e-05, "loss": 1.5132, "step": 35500 }, { "epoch": 6.92, "learning_rate": 1.5391270909440492e-05, "loss": 1.5079, "step": 36000 }, { "epoch": 7.0, "eval_gen_len": 16.982019230769232, "eval_loss": 1.347075343132019, "eval_rouge1": 44.9759, "eval_rouge2": 27.7179, "eval_rougeL": 42.5719, "eval_rougeLsum": 43.1803, "eval_runtime": 667.8543, "eval_samples_per_second": 15.572, "eval_steps_per_second": 0.973, "step": 36407 }, { "epoch": 7.02, "learning_rate": 1.4910594116516054e-05, "loss": 1.5017, "step": 36500 }, { "epoch": 7.11, "learning_rate": 1.4429917323591618e-05, "loss": 1.4946, "step": 37000 }, { "epoch": 7.21, "learning_rate": 1.394924053066718e-05, "loss": 1.4941, "step": 37500 }, { "epoch": 7.31, "learning_rate": 1.3468563737742743e-05, "loss": 1.5029, "step": 38000 }, { "epoch": 7.4, "learning_rate": 1.2987886944818307e-05, "loss": 1.4855, "step": 38500 }, { "epoch": 7.5, "learning_rate": 1.2507210151893867e-05, "loss": 1.4726, "step": 39000 }, { "epoch": 7.59, "learning_rate": 1.202653335896943e-05, "loss": 1.4687, "step": 39500 }, { "epoch": 7.69, "learning_rate": 1.1545856566044992e-05, "loss": 1.4915, "step": 40000 }, { "epoch": 7.79, "learning_rate": 1.1065179773120554e-05, "loss": 1.4793, "step": 40500 }, { "epoch": 7.88, "learning_rate": 1.0584502980196116e-05, "loss": 1.4818, "step": 41000 }, { "epoch": 7.98, "learning_rate": 1.0103826187271679e-05, "loss": 1.4771, "step": 41500 }, { "epoch": 8.0, "eval_gen_len": 16.986923076923077, "eval_loss": 1.3443900346755981, "eval_rouge1": 45.2057, "eval_rouge2": 27.9779, "eval_rougeL": 42.7648, "eval_rougeLsum": 43.3885, "eval_runtime": 559.194, "eval_samples_per_second": 18.598, "eval_steps_per_second": 1.162, "step": 41608 }, { "epoch": 8.08, "learning_rate": 9.623149394347242e-06, "loss": 1.4658, "step": 42000 }, { "epoch": 8.17, "learning_rate": 9.142472601422804e-06, "loss": 1.469, "step": 42500 }, { "epoch": 8.27, "learning_rate": 8.661795808498366e-06, "loss": 1.4966, "step": 43000 }, { "epoch": 8.36, "learning_rate": 8.181119015573928e-06, "loss": 1.4691, "step": 43500 }, { "epoch": 8.46, "learning_rate": 7.700442222649491e-06, "loss": 1.4767, "step": 44000 }, { "epoch": 8.56, "learning_rate": 7.219765429725053e-06, "loss": 1.4875, "step": 44500 }, { "epoch": 8.65, "learning_rate": 6.739088636800615e-06, "loss": 1.473, "step": 45000 }, { "epoch": 8.75, "learning_rate": 6.258411843876178e-06, "loss": 1.4826, "step": 45500 }, { "epoch": 8.84, "learning_rate": 5.77773505095174e-06, "loss": 1.4683, "step": 46000 }, { "epoch": 8.94, "learning_rate": 5.297058258027303e-06, "loss": 1.4691, "step": 46500 }, { "epoch": 9.0, "eval_gen_len": 16.951634615384616, "eval_loss": 1.3431836366653442, "eval_rouge1": 45.197, "eval_rouge2": 27.8923, "eval_rougeL": 42.7387, "eval_rougeLsum": 43.3577, "eval_runtime": 562.3582, "eval_samples_per_second": 18.494, "eval_steps_per_second": 1.156, "step": 46809 }, { "epoch": 9.04, "learning_rate": 4.816381465102865e-06, "loss": 1.4663, "step": 47000 }, { "epoch": 9.13, "learning_rate": 4.335704672178427e-06, "loss": 1.4658, "step": 47500 }, { "epoch": 9.23, "learning_rate": 3.85502787925399e-06, "loss": 1.4679, "step": 48000 }, { "epoch": 9.33, "learning_rate": 3.3743510863295526e-06, "loss": 1.4573, "step": 48500 }, { "epoch": 9.42, "learning_rate": 2.8936742934051144e-06, "loss": 1.465, "step": 49000 }, { "epoch": 9.52, "learning_rate": 2.412997500480677e-06, "loss": 1.4582, "step": 49500 }, { "epoch": 9.61, "learning_rate": 1.9323207075562393e-06, "loss": 1.4736, "step": 50000 }, { "epoch": 9.71, "learning_rate": 1.4516439146318017e-06, "loss": 1.4768, "step": 50500 }, { "epoch": 9.81, "learning_rate": 9.70967121707364e-07, "loss": 1.4634, "step": 51000 }, { "epoch": 9.9, "learning_rate": 4.902903287829264e-07, "loss": 1.4832, "step": 51500 }, { "epoch": 10.0, "learning_rate": 9.613535858488752e-09, "loss": 1.4719, "step": 52000 }, { "epoch": 10.0, "eval_gen_len": 16.988557692307694, "eval_loss": 1.341736078262329, "eval_rouge1": 45.2143, "eval_rouge2": 27.9673, "eval_rougeL": 42.7712, "eval_rougeLsum": 43.3892, "eval_runtime": 603.039, "eval_samples_per_second": 17.246, "eval_steps_per_second": 1.078, "step": 52010 } ], "max_steps": 52010, "num_train_epochs": 10, "total_flos": 5.697455075308339e+17, "trial_name": null, "trial_params": null }