{ "best_metric": 0.9965786507808726, "best_model_checkpoint": "/home/cloudwalker/ASVmodel/wavlm-base_2/checkpoint-300", "epoch": 49.9054820415879, "eval_steps": 100, "global_step": 19800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.25, "learning_rate": 1.515151515151515e-05, "loss": 0.4872, "step": 100 }, { "epoch": 0.25, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.21796834468841553, "eval_runtime": 498.2273, "eval_samples_per_second": 49.865, "eval_steps_per_second": 24.932, "step": 100 }, { "epoch": 0.5, "learning_rate": 3.03030303030303e-05, "loss": 0.1571, "step": 200 }, { "epoch": 0.5, "eval_accuracy": 0.93342456931251, "eval_loss": 0.2581726014614105, "eval_runtime": 500.3876, "eval_samples_per_second": 49.65, "eval_steps_per_second": 24.825, "step": 200 }, { "epoch": 0.76, "learning_rate": 4.545454545454545e-05, "loss": 0.0644, "step": 300 }, { "epoch": 0.76, "eval_accuracy": 0.9965786507808726, "eval_loss": 0.024423159658908844, "eval_runtime": 500.4785, "eval_samples_per_second": 49.64, "eval_steps_per_second": 24.82, "step": 300 }, { "epoch": 1.01, "learning_rate": 6.06060606060606e-05, "loss": 0.0553, "step": 400 }, { "epoch": 1.01, "eval_accuracy": 0.9927950410561907, "eval_loss": 0.11555636674165726, "eval_runtime": 500.405, "eval_samples_per_second": 49.648, "eval_steps_per_second": 24.824, "step": 400 }, { "epoch": 1.26, "learning_rate": 7.575757575757576e-05, "loss": 0.1108, "step": 500 }, { "epoch": 1.26, "eval_accuracy": 0.9898164546771856, "eval_loss": 0.1576482504606247, "eval_runtime": 500.3246, "eval_samples_per_second": 49.656, "eval_steps_per_second": 24.828, "step": 500 }, { "epoch": 1.51, "learning_rate": 9.09090909090909e-05, "loss": 0.0849, "step": 600 }, { "epoch": 1.51, "eval_accuracy": 0.9946868459185316, "eval_loss": 0.08708283305168152, "eval_runtime": 500.3759, "eval_samples_per_second": 49.651, "eval_steps_per_second": 24.825, "step": 600 }, { "epoch": 1.76, "learning_rate": 0.00010606060606060605, "loss": 0.0635, "step": 700 }, { "epoch": 1.76, "eval_accuracy": 0.9938818225728546, "eval_loss": 0.10875184088945389, "eval_runtime": 500.3093, "eval_samples_per_second": 49.657, "eval_steps_per_second": 24.829, "step": 700 }, { "epoch": 2.02, "learning_rate": 0.0001212121212121212, "loss": 0.0504, "step": 800 }, { "epoch": 2.02, "eval_accuracy": 0.9789888906778297, "eval_loss": 0.4074054956436157, "eval_runtime": 500.4204, "eval_samples_per_second": 49.646, "eval_steps_per_second": 24.823, "step": 800 }, { "epoch": 2.27, "learning_rate": 0.00013636363636363634, "loss": 0.1075, "step": 900 }, { "epoch": 2.27, "eval_accuracy": 0.9814442118821446, "eval_loss": 0.2954882085323334, "eval_runtime": 500.4501, "eval_samples_per_second": 49.643, "eval_steps_per_second": 24.822, "step": 900 }, { "epoch": 2.52, "learning_rate": 0.00015151515151515152, "loss": 0.2387, "step": 1000 }, { "epoch": 2.52, "eval_accuracy": 0.9956126227660602, "eval_loss": 0.06512398272752762, "eval_runtime": 500.7096, "eval_samples_per_second": 49.618, "eval_steps_per_second": 24.809, "step": 1000 }, { "epoch": 2.77, "learning_rate": 0.00016666666666666666, "loss": 0.3052, "step": 1100 }, { "epoch": 2.77, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.23793257772922516, "eval_runtime": 500.5364, "eval_samples_per_second": 49.635, "eval_steps_per_second": 24.817, "step": 1100 }, { "epoch": 3.02, "learning_rate": 0.0001818181818181818, "loss": 0.3336, "step": 1200 }, { "epoch": 3.02, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.35269346833229065, "eval_runtime": 501.0064, "eval_samples_per_second": 49.588, "eval_steps_per_second": 24.794, "step": 1200 }, { "epoch": 3.28, "learning_rate": 0.00019696969696969695, "loss": 0.3322, "step": 1300 }, { "epoch": 3.28, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33067458868026733, "eval_runtime": 502.8336, "eval_samples_per_second": 49.408, "eval_steps_per_second": 24.704, "step": 1300 }, { "epoch": 3.53, "learning_rate": 0.0002121212121212121, "loss": 0.3201, "step": 1400 }, { "epoch": 3.53, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.34046611189842224, "eval_runtime": 500.7806, "eval_samples_per_second": 49.611, "eval_steps_per_second": 24.805, "step": 1400 }, { "epoch": 3.78, "learning_rate": 0.00022727272727272725, "loss": 0.3406, "step": 1500 }, { "epoch": 3.78, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33353403210639954, "eval_runtime": 500.7499, "eval_samples_per_second": 49.614, "eval_steps_per_second": 24.807, "step": 1500 }, { "epoch": 4.03, "learning_rate": 0.0002424242424242424, "loss": 0.3475, "step": 1600 }, { "epoch": 4.03, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3340916931629181, "eval_runtime": 500.598, "eval_samples_per_second": 49.629, "eval_steps_per_second": 24.814, "step": 1600 }, { "epoch": 4.28, "learning_rate": 0.00025757575757575756, "loss": 0.3312, "step": 1700 }, { "epoch": 4.28, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33610448241233826, "eval_runtime": 500.8798, "eval_samples_per_second": 49.601, "eval_steps_per_second": 24.8, "step": 1700 }, { "epoch": 4.54, "learning_rate": 0.0002727272727272727, "loss": 0.3367, "step": 1800 }, { "epoch": 4.54, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3309839069843292, "eval_runtime": 500.5768, "eval_samples_per_second": 49.631, "eval_steps_per_second": 24.815, "step": 1800 }, { "epoch": 4.79, "learning_rate": 0.00028787878787878786, "loss": 0.3284, "step": 1900 }, { "epoch": 4.79, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33385568857192993, "eval_runtime": 500.453, "eval_samples_per_second": 49.643, "eval_steps_per_second": 24.822, "step": 1900 }, { "epoch": 5.04, "learning_rate": 0.00029966329966329963, "loss": 0.3267, "step": 2000 }, { "epoch": 5.04, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3350389301776886, "eval_runtime": 500.7512, "eval_samples_per_second": 49.613, "eval_steps_per_second": 24.807, "step": 2000 }, { "epoch": 5.29, "learning_rate": 0.00029797979797979794, "loss": 0.338, "step": 2100 }, { "epoch": 5.29, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33081698417663574, "eval_runtime": 500.5738, "eval_samples_per_second": 49.631, "eval_steps_per_second": 24.816, "step": 2100 }, { "epoch": 5.55, "learning_rate": 0.00029629629629629624, "loss": 0.3277, "step": 2200 }, { "epoch": 5.55, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.330895334482193, "eval_runtime": 500.5595, "eval_samples_per_second": 49.632, "eval_steps_per_second": 24.816, "step": 2200 }, { "epoch": 5.8, "learning_rate": 0.0002946127946127946, "loss": 0.3294, "step": 2300 }, { "epoch": 5.8, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3312866687774658, "eval_runtime": 500.6862, "eval_samples_per_second": 49.62, "eval_steps_per_second": 24.81, "step": 2300 }, { "epoch": 6.05, "learning_rate": 0.0002929292929292929, "loss": 0.3315, "step": 2400 }, { "epoch": 6.05, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33599570393562317, "eval_runtime": 500.6176, "eval_samples_per_second": 49.627, "eval_steps_per_second": 24.813, "step": 2400 }, { "epoch": 6.3, "learning_rate": 0.00029124579124579125, "loss": 0.3397, "step": 2500 }, { "epoch": 6.3, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33067843317985535, "eval_runtime": 500.7157, "eval_samples_per_second": 49.617, "eval_steps_per_second": 24.808, "step": 2500 }, { "epoch": 6.55, "learning_rate": 0.00028956228956228955, "loss": 0.3318, "step": 2600 }, { "epoch": 6.55, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3359489440917969, "eval_runtime": 500.805, "eval_samples_per_second": 49.608, "eval_steps_per_second": 24.804, "step": 2600 }, { "epoch": 6.81, "learning_rate": 0.00028787878787878786, "loss": 0.3312, "step": 2700 }, { "epoch": 6.81, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3307555615901947, "eval_runtime": 500.6263, "eval_samples_per_second": 49.626, "eval_steps_per_second": 24.813, "step": 2700 }, { "epoch": 7.06, "learning_rate": 0.00028619528619528616, "loss": 0.3155, "step": 2800 }, { "epoch": 7.06, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33172452449798584, "eval_runtime": 500.7242, "eval_samples_per_second": 49.616, "eval_steps_per_second": 24.808, "step": 2800 }, { "epoch": 7.31, "learning_rate": 0.0002845117845117845, "loss": 0.3304, "step": 2900 }, { "epoch": 7.31, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33618223667144775, "eval_runtime": 500.5229, "eval_samples_per_second": 49.636, "eval_steps_per_second": 24.818, "step": 2900 }, { "epoch": 7.56, "learning_rate": 0.0002828282828282828, "loss": 0.338, "step": 3000 }, { "epoch": 7.56, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3341616988182068, "eval_runtime": 500.7918, "eval_samples_per_second": 49.609, "eval_steps_per_second": 24.805, "step": 3000 }, { "epoch": 7.81, "learning_rate": 0.0002811447811447811, "loss": 0.3241, "step": 3100 }, { "epoch": 7.81, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.330985426902771, "eval_runtime": 500.924, "eval_samples_per_second": 49.596, "eval_steps_per_second": 24.798, "step": 3100 }, { "epoch": 8.07, "learning_rate": 0.0002794612794612794, "loss": 0.3325, "step": 3200 }, { "epoch": 8.07, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33258649706840515, "eval_runtime": 500.7775, "eval_samples_per_second": 49.611, "eval_steps_per_second": 24.805, "step": 3200 }, { "epoch": 8.32, "learning_rate": 0.0002777777777777778, "loss": 0.3202, "step": 3300 }, { "epoch": 8.32, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3345409035682678, "eval_runtime": 501.0057, "eval_samples_per_second": 49.588, "eval_steps_per_second": 24.794, "step": 3300 }, { "epoch": 8.57, "learning_rate": 0.0002760942760942761, "loss": 0.3315, "step": 3400 }, { "epoch": 8.57, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3335334360599518, "eval_runtime": 500.8188, "eval_samples_per_second": 49.607, "eval_steps_per_second": 24.803, "step": 3400 }, { "epoch": 8.82, "learning_rate": 0.0002744107744107744, "loss": 0.3288, "step": 3500 }, { "epoch": 8.82, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33116644620895386, "eval_runtime": 501.0568, "eval_samples_per_second": 49.583, "eval_steps_per_second": 24.792, "step": 3500 }, { "epoch": 9.07, "learning_rate": 0.0002727272727272727, "loss": 0.3371, "step": 3600 }, { "epoch": 9.07, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.34014323353767395, "eval_runtime": 500.7918, "eval_samples_per_second": 49.609, "eval_steps_per_second": 24.805, "step": 3600 }, { "epoch": 9.33, "learning_rate": 0.00027104377104377104, "loss": 0.3409, "step": 3700 }, { "epoch": 9.33, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33300086855888367, "eval_runtime": 500.8108, "eval_samples_per_second": 49.608, "eval_steps_per_second": 24.804, "step": 3700 }, { "epoch": 9.58, "learning_rate": 0.00026936026936026934, "loss": 0.3236, "step": 3800 }, { "epoch": 9.58, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3329709768295288, "eval_runtime": 500.7254, "eval_samples_per_second": 49.616, "eval_steps_per_second": 24.808, "step": 3800 }, { "epoch": 9.83, "learning_rate": 0.00026767676767676764, "loss": 0.3224, "step": 3900 }, { "epoch": 9.83, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3320678770542145, "eval_runtime": 500.9946, "eval_samples_per_second": 49.589, "eval_steps_per_second": 24.795, "step": 3900 }, { "epoch": 10.08, "learning_rate": 0.00026599326599326595, "loss": 0.3439, "step": 4000 }, { "epoch": 10.08, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33262181282043457, "eval_runtime": 501.0278, "eval_samples_per_second": 49.586, "eval_steps_per_second": 24.793, "step": 4000 }, { "epoch": 10.33, "learning_rate": 0.0002643097643097643, "loss": 0.3382, "step": 4100 }, { "epoch": 10.33, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3310418426990509, "eval_runtime": 501.0043, "eval_samples_per_second": 49.588, "eval_steps_per_second": 24.794, "step": 4100 }, { "epoch": 10.59, "learning_rate": 0.0002626262626262626, "loss": 0.3307, "step": 4200 }, { "epoch": 10.59, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33820316195487976, "eval_runtime": 501.0192, "eval_samples_per_second": 49.587, "eval_steps_per_second": 24.793, "step": 4200 }, { "epoch": 10.84, "learning_rate": 0.0002609427609427609, "loss": 0.3231, "step": 4300 }, { "epoch": 10.84, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3324846625328064, "eval_runtime": 500.6054, "eval_samples_per_second": 49.628, "eval_steps_per_second": 24.814, "step": 4300 }, { "epoch": 11.09, "learning_rate": 0.0002592592592592592, "loss": 0.3095, "step": 4400 }, { "epoch": 11.09, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3348234295845032, "eval_runtime": 500.6559, "eval_samples_per_second": 49.623, "eval_steps_per_second": 24.811, "step": 4400 }, { "epoch": 11.34, "learning_rate": 0.00025757575757575756, "loss": 0.3442, "step": 4500 }, { "epoch": 11.34, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33274412155151367, "eval_runtime": 501.0421, "eval_samples_per_second": 49.585, "eval_steps_per_second": 24.792, "step": 4500 }, { "epoch": 11.59, "learning_rate": 0.00025589225589225587, "loss": 0.3269, "step": 4600 }, { "epoch": 11.59, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33261528611183167, "eval_runtime": 500.8659, "eval_samples_per_second": 49.602, "eval_steps_per_second": 24.801, "step": 4600 }, { "epoch": 11.85, "learning_rate": 0.00025420875420875417, "loss": 0.3323, "step": 4700 }, { "epoch": 11.85, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3308444619178772, "eval_runtime": 501.0609, "eval_samples_per_second": 49.583, "eval_steps_per_second": 24.791, "step": 4700 }, { "epoch": 12.1, "learning_rate": 0.0002525252525252525, "loss": 0.3313, "step": 4800 }, { "epoch": 12.1, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3308044970035553, "eval_runtime": 500.8061, "eval_samples_per_second": 49.608, "eval_steps_per_second": 24.804, "step": 4800 }, { "epoch": 12.35, "learning_rate": 0.0002508417508417508, "loss": 0.3283, "step": 4900 }, { "epoch": 12.35, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3314284384250641, "eval_runtime": 500.7554, "eval_samples_per_second": 49.613, "eval_steps_per_second": 24.807, "step": 4900 }, { "epoch": 12.6, "learning_rate": 0.00024915824915824913, "loss": 0.3331, "step": 5000 }, { "epoch": 12.6, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3306741416454315, "eval_runtime": 500.9128, "eval_samples_per_second": 49.597, "eval_steps_per_second": 24.799, "step": 5000 }, { "epoch": 12.85, "learning_rate": 0.0002474747474747475, "loss": 0.3317, "step": 5100 }, { "epoch": 12.85, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3343793749809265, "eval_runtime": 500.7387, "eval_samples_per_second": 49.615, "eval_steps_per_second": 24.807, "step": 5100 }, { "epoch": 13.11, "learning_rate": 0.0002457912457912458, "loss": 0.3283, "step": 5200 }, { "epoch": 13.11, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33199575543403625, "eval_runtime": 500.8527, "eval_samples_per_second": 49.603, "eval_steps_per_second": 24.802, "step": 5200 }, { "epoch": 13.36, "learning_rate": 0.00024410774410774406, "loss": 0.3263, "step": 5300 }, { "epoch": 13.36, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33114317059516907, "eval_runtime": 500.8605, "eval_samples_per_second": 49.603, "eval_steps_per_second": 24.801, "step": 5300 }, { "epoch": 13.61, "learning_rate": 0.0002424242424242424, "loss": 0.3421, "step": 5400 }, { "epoch": 13.61, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3306863009929657, "eval_runtime": 500.9253, "eval_samples_per_second": 49.596, "eval_steps_per_second": 24.798, "step": 5400 }, { "epoch": 13.86, "learning_rate": 0.00024074074074074072, "loss": 0.3164, "step": 5500 }, { "epoch": 13.86, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3318192958831787, "eval_runtime": 500.4762, "eval_samples_per_second": 49.641, "eval_steps_per_second": 24.82, "step": 5500 }, { "epoch": 14.11, "learning_rate": 0.00023905723905723905, "loss": 0.3315, "step": 5600 }, { "epoch": 14.11, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3335217535495758, "eval_runtime": 500.848, "eval_samples_per_second": 49.604, "eval_steps_per_second": 24.802, "step": 5600 }, { "epoch": 14.37, "learning_rate": 0.00023737373737373732, "loss": 0.3415, "step": 5700 }, { "epoch": 14.37, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3314845860004425, "eval_runtime": 500.8633, "eval_samples_per_second": 49.602, "eval_steps_per_second": 24.801, "step": 5700 }, { "epoch": 14.62, "learning_rate": 0.00023569023569023565, "loss": 0.3325, "step": 5800 }, { "epoch": 14.62, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33067190647125244, "eval_runtime": 500.8182, "eval_samples_per_second": 49.607, "eval_steps_per_second": 24.803, "step": 5800 }, { "epoch": 14.87, "learning_rate": 0.00023400673400673398, "loss": 0.3264, "step": 5900 }, { "epoch": 14.87, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33295896649360657, "eval_runtime": 500.9198, "eval_samples_per_second": 49.597, "eval_steps_per_second": 24.798, "step": 5900 }, { "epoch": 15.12, "learning_rate": 0.0002323232323232323, "loss": 0.3223, "step": 6000 }, { "epoch": 15.12, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3306836187839508, "eval_runtime": 500.9444, "eval_samples_per_second": 49.594, "eval_steps_per_second": 24.797, "step": 6000 }, { "epoch": 15.37, "learning_rate": 0.00023063973063973064, "loss": 0.3289, "step": 6100 }, { "epoch": 15.37, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3328978717327118, "eval_runtime": 500.8768, "eval_samples_per_second": 49.601, "eval_steps_per_second": 24.801, "step": 6100 }, { "epoch": 15.63, "learning_rate": 0.00022895622895622892, "loss": 0.3353, "step": 6200 }, { "epoch": 15.63, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33112406730651855, "eval_runtime": 500.9115, "eval_samples_per_second": 49.598, "eval_steps_per_second": 24.799, "step": 6200 }, { "epoch": 15.88, "learning_rate": 0.00022727272727272725, "loss": 0.3246, "step": 6300 }, { "epoch": 15.88, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3310993015766144, "eval_runtime": 501.1149, "eval_samples_per_second": 49.577, "eval_steps_per_second": 24.789, "step": 6300 }, { "epoch": 16.13, "learning_rate": 0.00022558922558922557, "loss": 0.3425, "step": 6400 }, { "epoch": 16.13, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.330674409866333, "eval_runtime": 500.85, "eval_samples_per_second": 49.604, "eval_steps_per_second": 24.802, "step": 6400 }, { "epoch": 16.38, "learning_rate": 0.0002239057239057239, "loss": 0.331, "step": 6500 }, { "epoch": 16.38, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3306954503059387, "eval_runtime": 501.1853, "eval_samples_per_second": 49.57, "eval_steps_per_second": 24.785, "step": 6500 }, { "epoch": 16.64, "learning_rate": 0.00022222222222222218, "loss": 0.3293, "step": 6600 }, { "epoch": 16.64, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33531516790390015, "eval_runtime": 500.9658, "eval_samples_per_second": 49.592, "eval_steps_per_second": 24.796, "step": 6600 }, { "epoch": 16.89, "learning_rate": 0.0002205387205387205, "loss": 0.3249, "step": 6700 }, { "epoch": 16.89, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3339368402957916, "eval_runtime": 500.8138, "eval_samples_per_second": 49.607, "eval_steps_per_second": 24.804, "step": 6700 }, { "epoch": 17.14, "learning_rate": 0.00021885521885521884, "loss": 0.3214, "step": 6800 }, { "epoch": 17.14, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3337734639644623, "eval_runtime": 500.9586, "eval_samples_per_second": 49.593, "eval_steps_per_second": 24.796, "step": 6800 }, { "epoch": 17.39, "learning_rate": 0.00021717171717171717, "loss": 0.3259, "step": 6900 }, { "epoch": 17.39, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3327140212059021, "eval_runtime": 501.0122, "eval_samples_per_second": 49.588, "eval_steps_per_second": 24.794, "step": 6900 }, { "epoch": 17.64, "learning_rate": 0.00021548821548821544, "loss": 0.3408, "step": 7000 }, { "epoch": 17.64, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33182209730148315, "eval_runtime": 500.9279, "eval_samples_per_second": 49.596, "eval_steps_per_second": 24.798, "step": 7000 }, { "epoch": 17.9, "learning_rate": 0.00021380471380471377, "loss": 0.3258, "step": 7100 }, { "epoch": 17.9, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33183717727661133, "eval_runtime": 501.2678, "eval_samples_per_second": 49.562, "eval_steps_per_second": 24.781, "step": 7100 }, { "epoch": 18.15, "learning_rate": 0.0002121212121212121, "loss": 0.3299, "step": 7200 }, { "epoch": 18.15, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33080849051475525, "eval_runtime": 501.2555, "eval_samples_per_second": 49.564, "eval_steps_per_second": 24.782, "step": 7200 }, { "epoch": 18.4, "learning_rate": 0.00021043771043771043, "loss": 0.327, "step": 7300 }, { "epoch": 18.4, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3371123671531677, "eval_runtime": 500.9892, "eval_samples_per_second": 49.59, "eval_steps_per_second": 24.795, "step": 7300 }, { "epoch": 18.65, "learning_rate": 0.00020875420875420876, "loss": 0.3317, "step": 7400 }, { "epoch": 18.65, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3307543694972992, "eval_runtime": 500.9046, "eval_samples_per_second": 49.598, "eval_steps_per_second": 24.799, "step": 7400 }, { "epoch": 18.9, "learning_rate": 0.00020707070707070703, "loss": 0.3291, "step": 7500 }, { "epoch": 18.9, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33100253343582153, "eval_runtime": 500.936, "eval_samples_per_second": 49.595, "eval_steps_per_second": 24.798, "step": 7500 }, { "epoch": 19.16, "learning_rate": 0.00020538720538720536, "loss": 0.3263, "step": 7600 }, { "epoch": 19.16, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33248230814933777, "eval_runtime": 500.8588, "eval_samples_per_second": 49.603, "eval_steps_per_second": 24.801, "step": 7600 }, { "epoch": 19.41, "learning_rate": 0.0002037037037037037, "loss": 0.3223, "step": 7700 }, { "epoch": 19.41, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33463332056999207, "eval_runtime": 500.8344, "eval_samples_per_second": 49.605, "eval_steps_per_second": 24.803, "step": 7700 }, { "epoch": 19.66, "learning_rate": 0.00020202020202020202, "loss": 0.3403, "step": 7800 }, { "epoch": 19.66, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3315812647342682, "eval_runtime": 500.8929, "eval_samples_per_second": 49.599, "eval_steps_per_second": 24.8, "step": 7800 }, { "epoch": 19.91, "learning_rate": 0.0002003367003367003, "loss": 0.3265, "step": 7900 }, { "epoch": 19.91, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3309071660041809, "eval_runtime": 500.8065, "eval_samples_per_second": 49.608, "eval_steps_per_second": 24.804, "step": 7900 }, { "epoch": 20.16, "learning_rate": 0.00019865319865319862, "loss": 0.33, "step": 8000 }, { "epoch": 20.16, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3318468928337097, "eval_runtime": 501.0869, "eval_samples_per_second": 49.58, "eval_steps_per_second": 24.79, "step": 8000 }, { "epoch": 20.42, "learning_rate": 0.00019696969696969695, "loss": 0.3488, "step": 8100 }, { "epoch": 20.42, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33127933740615845, "eval_runtime": 500.8956, "eval_samples_per_second": 49.599, "eval_steps_per_second": 24.8, "step": 8100 }, { "epoch": 20.67, "learning_rate": 0.00019528619528619528, "loss": 0.3293, "step": 8200 }, { "epoch": 20.67, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33354687690734863, "eval_runtime": 501.2059, "eval_samples_per_second": 49.568, "eval_steps_per_second": 24.784, "step": 8200 }, { "epoch": 20.92, "learning_rate": 0.00019360269360269356, "loss": 0.3095, "step": 8300 }, { "epoch": 20.92, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33559244871139526, "eval_runtime": 501.1442, "eval_samples_per_second": 49.575, "eval_steps_per_second": 24.787, "step": 8300 }, { "epoch": 21.17, "learning_rate": 0.0001919191919191919, "loss": 0.3366, "step": 8400 }, { "epoch": 21.17, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3331533968448639, "eval_runtime": 501.0445, "eval_samples_per_second": 49.584, "eval_steps_per_second": 24.792, "step": 8400 }, { "epoch": 21.42, "learning_rate": 0.00019023569023569022, "loss": 0.317, "step": 8500 }, { "epoch": 21.42, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3337852358818054, "eval_runtime": 501.2024, "eval_samples_per_second": 49.569, "eval_steps_per_second": 24.784, "step": 8500 }, { "epoch": 21.68, "learning_rate": 0.00018855218855218854, "loss": 0.3299, "step": 8600 }, { "epoch": 21.68, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3308347165584564, "eval_runtime": 501.0, "eval_samples_per_second": 49.589, "eval_steps_per_second": 24.794, "step": 8600 }, { "epoch": 21.93, "learning_rate": 0.00018686868686868687, "loss": 0.3434, "step": 8700 }, { "epoch": 21.93, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.330965131521225, "eval_runtime": 501.1751, "eval_samples_per_second": 49.571, "eval_steps_per_second": 24.786, "step": 8700 }, { "epoch": 22.18, "learning_rate": 0.00018518518518518515, "loss": 0.3208, "step": 8800 }, { "epoch": 22.18, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3308819830417633, "eval_runtime": 501.189, "eval_samples_per_second": 49.57, "eval_steps_per_second": 24.785, "step": 8800 }, { "epoch": 22.43, "learning_rate": 0.00018350168350168348, "loss": 0.3351, "step": 8900 }, { "epoch": 22.43, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33238929510116577, "eval_runtime": 501.2428, "eval_samples_per_second": 49.565, "eval_steps_per_second": 24.782, "step": 8900 }, { "epoch": 22.68, "learning_rate": 0.0001818181818181818, "loss": 0.3301, "step": 9000 }, { "epoch": 22.68, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3308260440826416, "eval_runtime": 500.8427, "eval_samples_per_second": 49.604, "eval_steps_per_second": 24.802, "step": 9000 }, { "epoch": 22.94, "learning_rate": 0.00018013468013468014, "loss": 0.3196, "step": 9100 }, { "epoch": 22.94, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3329552710056305, "eval_runtime": 501.1009, "eval_samples_per_second": 49.579, "eval_steps_per_second": 24.789, "step": 9100 }, { "epoch": 23.19, "learning_rate": 0.0001784511784511784, "loss": 0.3339, "step": 9200 }, { "epoch": 23.19, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33333924412727356, "eval_runtime": 500.8988, "eval_samples_per_second": 49.599, "eval_steps_per_second": 24.799, "step": 9200 }, { "epoch": 23.44, "learning_rate": 0.00017676767676767674, "loss": 0.3249, "step": 9300 }, { "epoch": 23.44, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3307563364505768, "eval_runtime": 501.0394, "eval_samples_per_second": 49.585, "eval_steps_per_second": 24.792, "step": 9300 }, { "epoch": 23.69, "learning_rate": 0.00017508417508417507, "loss": 0.3247, "step": 9400 }, { "epoch": 23.69, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3338105082511902, "eval_runtime": 500.7662, "eval_samples_per_second": 49.612, "eval_steps_per_second": 24.806, "step": 9400 }, { "epoch": 23.94, "learning_rate": 0.0001734006734006734, "loss": 0.3369, "step": 9500 }, { "epoch": 23.94, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3312574028968811, "eval_runtime": 501.3028, "eval_samples_per_second": 49.559, "eval_steps_per_second": 24.779, "step": 9500 }, { "epoch": 24.2, "learning_rate": 0.00017171717171717167, "loss": 0.3291, "step": 9600 }, { "epoch": 24.2, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3320288062095642, "eval_runtime": 501.3419, "eval_samples_per_second": 49.555, "eval_steps_per_second": 24.777, "step": 9600 }, { "epoch": 24.45, "learning_rate": 0.00017003367003367, "loss": 0.3307, "step": 9700 }, { "epoch": 24.45, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33087393641471863, "eval_runtime": 501.1809, "eval_samples_per_second": 49.571, "eval_steps_per_second": 24.785, "step": 9700 }, { "epoch": 24.7, "learning_rate": 0.00016835016835016833, "loss": 0.3328, "step": 9800 }, { "epoch": 24.7, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33073562383651733, "eval_runtime": 500.8708, "eval_samples_per_second": 49.602, "eval_steps_per_second": 24.801, "step": 9800 }, { "epoch": 24.95, "learning_rate": 0.00016666666666666666, "loss": 0.3277, "step": 9900 }, { "epoch": 24.95, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3341946601867676, "eval_runtime": 501.1287, "eval_samples_per_second": 49.576, "eval_steps_per_second": 24.788, "step": 9900 }, { "epoch": 25.2, "learning_rate": 0.000164983164983165, "loss": 0.3278, "step": 10000 }, { "epoch": 25.2, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3309917449951172, "eval_runtime": 500.9142, "eval_samples_per_second": 49.597, "eval_steps_per_second": 24.799, "step": 10000 }, { "epoch": 25.46, "learning_rate": 0.00016329966329966327, "loss": 0.3197, "step": 10100 }, { "epoch": 25.46, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3348841965198517, "eval_runtime": 501.1589, "eval_samples_per_second": 49.573, "eval_steps_per_second": 24.787, "step": 10100 }, { "epoch": 25.71, "learning_rate": 0.0001616161616161616, "loss": 0.3273, "step": 10200 }, { "epoch": 25.71, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3321140706539154, "eval_runtime": 501.4344, "eval_samples_per_second": 49.546, "eval_steps_per_second": 24.773, "step": 10200 }, { "epoch": 25.96, "learning_rate": 0.00015993265993265992, "loss": 0.3345, "step": 10300 }, { "epoch": 25.96, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3312225043773651, "eval_runtime": 500.9045, "eval_samples_per_second": 49.598, "eval_steps_per_second": 24.799, "step": 10300 }, { "epoch": 26.21, "learning_rate": 0.00015824915824915825, "loss": 0.3351, "step": 10400 }, { "epoch": 26.21, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33248284459114075, "eval_runtime": 501.2052, "eval_samples_per_second": 49.569, "eval_steps_per_second": 24.784, "step": 10400 }, { "epoch": 26.47, "learning_rate": 0.00015656565656565653, "loss": 0.3144, "step": 10500 }, { "epoch": 26.47, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.334583580493927, "eval_runtime": 501.103, "eval_samples_per_second": 49.579, "eval_steps_per_second": 24.789, "step": 10500 }, { "epoch": 26.72, "learning_rate": 0.00015488215488215486, "loss": 0.3361, "step": 10600 }, { "epoch": 26.72, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33112606406211853, "eval_runtime": 500.6859, "eval_samples_per_second": 49.62, "eval_steps_per_second": 24.81, "step": 10600 }, { "epoch": 26.97, "learning_rate": 0.00015319865319865319, "loss": 0.3334, "step": 10700 }, { "epoch": 26.97, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3306848406791687, "eval_runtime": 500.81, "eval_samples_per_second": 49.608, "eval_steps_per_second": 24.804, "step": 10700 }, { "epoch": 27.22, "learning_rate": 0.00015151515151515152, "loss": 0.3287, "step": 10800 }, { "epoch": 27.22, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3373050093650818, "eval_runtime": 501.0133, "eval_samples_per_second": 49.588, "eval_steps_per_second": 24.794, "step": 10800 }, { "epoch": 27.47, "learning_rate": 0.00014983164983164982, "loss": 0.3374, "step": 10900 }, { "epoch": 27.47, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3306737244129181, "eval_runtime": 501.4272, "eval_samples_per_second": 49.547, "eval_steps_per_second": 24.773, "step": 10900 }, { "epoch": 27.73, "learning_rate": 0.00014814814814814812, "loss": 0.3302, "step": 11000 }, { "epoch": 27.73, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3306819200515747, "eval_runtime": 501.0901, "eval_samples_per_second": 49.58, "eval_steps_per_second": 24.79, "step": 11000 }, { "epoch": 27.98, "learning_rate": 0.00014646464646464645, "loss": 0.3245, "step": 11100 }, { "epoch": 27.98, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33153215050697327, "eval_runtime": 501.2107, "eval_samples_per_second": 49.568, "eval_steps_per_second": 24.784, "step": 11100 }, { "epoch": 28.23, "learning_rate": 0.00014478114478114478, "loss": 0.3353, "step": 11200 }, { "epoch": 28.23, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33351030945777893, "eval_runtime": 501.1769, "eval_samples_per_second": 49.571, "eval_steps_per_second": 24.786, "step": 11200 }, { "epoch": 28.48, "learning_rate": 0.00014309764309764308, "loss": 0.3191, "step": 11300 }, { "epoch": 28.48, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33356890082359314, "eval_runtime": 501.2908, "eval_samples_per_second": 49.56, "eval_steps_per_second": 24.78, "step": 11300 }, { "epoch": 28.73, "learning_rate": 0.0001414141414141414, "loss": 0.3226, "step": 11400 }, { "epoch": 28.73, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33081722259521484, "eval_runtime": 501.1297, "eval_samples_per_second": 49.576, "eval_steps_per_second": 24.788, "step": 11400 }, { "epoch": 28.99, "learning_rate": 0.0001397306397306397, "loss": 0.3384, "step": 11500 }, { "epoch": 28.99, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3321709930896759, "eval_runtime": 501.2824, "eval_samples_per_second": 49.561, "eval_steps_per_second": 24.78, "step": 11500 }, { "epoch": 29.24, "learning_rate": 0.00013804713804713804, "loss": 0.3368, "step": 11600 }, { "epoch": 29.24, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3336506485939026, "eval_runtime": 500.9493, "eval_samples_per_second": 49.594, "eval_steps_per_second": 24.797, "step": 11600 }, { "epoch": 29.49, "learning_rate": 0.00013636363636363634, "loss": 0.3224, "step": 11700 }, { "epoch": 29.49, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3331962823867798, "eval_runtime": 501.3861, "eval_samples_per_second": 49.551, "eval_steps_per_second": 24.775, "step": 11700 }, { "epoch": 29.74, "learning_rate": 0.00013468013468013467, "loss": 0.3224, "step": 11800 }, { "epoch": 29.74, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3318473696708679, "eval_runtime": 501.3052, "eval_samples_per_second": 49.559, "eval_steps_per_second": 24.779, "step": 11800 }, { "epoch": 29.99, "learning_rate": 0.00013299663299663297, "loss": 0.3363, "step": 11900 }, { "epoch": 29.99, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3309670388698578, "eval_runtime": 501.4331, "eval_samples_per_second": 49.546, "eval_steps_per_second": 24.773, "step": 11900 }, { "epoch": 30.25, "learning_rate": 0.0001313131313131313, "loss": 0.327, "step": 12000 }, { "epoch": 30.25, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3306976854801178, "eval_runtime": 501.4726, "eval_samples_per_second": 49.542, "eval_steps_per_second": 24.771, "step": 12000 }, { "epoch": 30.5, "learning_rate": 0.0001296296296296296, "loss": 0.3291, "step": 12100 }, { "epoch": 30.5, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3306950628757477, "eval_runtime": 501.0845, "eval_samples_per_second": 49.58, "eval_steps_per_second": 24.79, "step": 12100 }, { "epoch": 30.75, "learning_rate": 0.00012794612794612793, "loss": 0.3369, "step": 12200 }, { "epoch": 30.75, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3321588933467865, "eval_runtime": 501.0681, "eval_samples_per_second": 49.582, "eval_steps_per_second": 24.791, "step": 12200 }, { "epoch": 31.0, "learning_rate": 0.00012626262626262626, "loss": 0.3211, "step": 12300 }, { "epoch": 31.0, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3328503370285034, "eval_runtime": 501.3433, "eval_samples_per_second": 49.555, "eval_steps_per_second": 24.777, "step": 12300 }, { "epoch": 31.25, "learning_rate": 0.00012457912457912456, "loss": 0.329, "step": 12400 }, { "epoch": 31.25, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33213621377944946, "eval_runtime": 501.089, "eval_samples_per_second": 49.58, "eval_steps_per_second": 24.79, "step": 12400 }, { "epoch": 31.51, "learning_rate": 0.0001228956228956229, "loss": 0.3206, "step": 12500 }, { "epoch": 31.51, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33089062571525574, "eval_runtime": 501.0382, "eval_samples_per_second": 49.585, "eval_steps_per_second": 24.793, "step": 12500 }, { "epoch": 31.76, "learning_rate": 0.0001212121212121212, "loss": 0.3339, "step": 12600 }, { "epoch": 31.76, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3331972658634186, "eval_runtime": 501.242, "eval_samples_per_second": 49.565, "eval_steps_per_second": 24.782, "step": 12600 }, { "epoch": 32.01, "learning_rate": 0.00011952861952861952, "loss": 0.3323, "step": 12700 }, { "epoch": 32.01, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3316064476966858, "eval_runtime": 501.0626, "eval_samples_per_second": 49.583, "eval_steps_per_second": 24.791, "step": 12700 }, { "epoch": 32.26, "learning_rate": 0.00011784511784511783, "loss": 0.3273, "step": 12800 }, { "epoch": 32.26, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3323478400707245, "eval_runtime": 501.4098, "eval_samples_per_second": 49.548, "eval_steps_per_second": 24.774, "step": 12800 }, { "epoch": 32.51, "learning_rate": 0.00011616161616161616, "loss": 0.3362, "step": 12900 }, { "epoch": 32.51, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33072328567504883, "eval_runtime": 501.8013, "eval_samples_per_second": 49.51, "eval_steps_per_second": 24.755, "step": 12900 }, { "epoch": 32.77, "learning_rate": 0.00011447811447811446, "loss": 0.3387, "step": 13000 }, { "epoch": 32.77, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3308790922164917, "eval_runtime": 501.2768, "eval_samples_per_second": 49.561, "eval_steps_per_second": 24.781, "step": 13000 }, { "epoch": 33.02, "learning_rate": 0.00011279461279461279, "loss": 0.3173, "step": 13100 }, { "epoch": 33.02, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33112308382987976, "eval_runtime": 501.2827, "eval_samples_per_second": 49.561, "eval_steps_per_second": 24.78, "step": 13100 }, { "epoch": 33.27, "learning_rate": 0.00011111111111111109, "loss": 0.3291, "step": 13200 }, { "epoch": 33.27, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33085396885871887, "eval_runtime": 501.2449, "eval_samples_per_second": 49.565, "eval_steps_per_second": 24.782, "step": 13200 }, { "epoch": 33.52, "learning_rate": 0.00010942760942760942, "loss": 0.3316, "step": 13300 }, { "epoch": 33.52, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33154767751693726, "eval_runtime": 501.0257, "eval_samples_per_second": 49.586, "eval_steps_per_second": 24.793, "step": 13300 }, { "epoch": 33.77, "learning_rate": 0.00010774410774410772, "loss": 0.3366, "step": 13400 }, { "epoch": 33.77, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33317822217941284, "eval_runtime": 501.0403, "eval_samples_per_second": 49.585, "eval_steps_per_second": 24.792, "step": 13400 }, { "epoch": 34.03, "learning_rate": 0.00010606060606060605, "loss": 0.3115, "step": 13500 }, { "epoch": 34.03, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3382599353790283, "eval_runtime": 500.7949, "eval_samples_per_second": 49.609, "eval_steps_per_second": 24.805, "step": 13500 }, { "epoch": 34.28, "learning_rate": 0.00010437710437710438, "loss": 0.3275, "step": 13600 }, { "epoch": 34.28, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.332431823015213, "eval_runtime": 500.933, "eval_samples_per_second": 49.595, "eval_steps_per_second": 24.798, "step": 13600 }, { "epoch": 34.53, "learning_rate": 0.00010269360269360268, "loss": 0.3373, "step": 13700 }, { "epoch": 34.53, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33150389790534973, "eval_runtime": 500.781, "eval_samples_per_second": 49.611, "eval_steps_per_second": 24.805, "step": 13700 }, { "epoch": 34.78, "learning_rate": 0.00010101010101010101, "loss": 0.3247, "step": 13800 }, { "epoch": 34.78, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3313423693180084, "eval_runtime": 501.1265, "eval_samples_per_second": 49.576, "eval_steps_per_second": 24.788, "step": 13800 }, { "epoch": 35.03, "learning_rate": 9.932659932659931e-05, "loss": 0.3349, "step": 13900 }, { "epoch": 35.03, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33250102400779724, "eval_runtime": 501.0936, "eval_samples_per_second": 49.58, "eval_steps_per_second": 24.79, "step": 13900 }, { "epoch": 35.29, "learning_rate": 9.764309764309764e-05, "loss": 0.3223, "step": 14000 }, { "epoch": 35.29, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33122116327285767, "eval_runtime": 500.9681, "eval_samples_per_second": 49.592, "eval_steps_per_second": 24.796, "step": 14000 }, { "epoch": 35.54, "learning_rate": 9.595959595959594e-05, "loss": 0.3321, "step": 14100 }, { "epoch": 35.54, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3307776153087616, "eval_runtime": 500.9865, "eval_samples_per_second": 49.59, "eval_steps_per_second": 24.795, "step": 14100 }, { "epoch": 35.79, "learning_rate": 9.427609427609427e-05, "loss": 0.3304, "step": 14200 }, { "epoch": 35.79, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3315562605857849, "eval_runtime": 500.788, "eval_samples_per_second": 49.61, "eval_steps_per_second": 24.805, "step": 14200 }, { "epoch": 36.04, "learning_rate": 9.259259259259257e-05, "loss": 0.3262, "step": 14300 }, { "epoch": 36.04, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33196473121643066, "eval_runtime": 501.0344, "eval_samples_per_second": 49.585, "eval_steps_per_second": 24.793, "step": 14300 }, { "epoch": 36.29, "learning_rate": 9.09090909090909e-05, "loss": 0.3239, "step": 14400 }, { "epoch": 36.29, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3316847085952759, "eval_runtime": 501.0629, "eval_samples_per_second": 49.583, "eval_steps_per_second": 24.791, "step": 14400 }, { "epoch": 36.55, "learning_rate": 8.92255892255892e-05, "loss": 0.3325, "step": 14500 }, { "epoch": 36.55, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3308143615722656, "eval_runtime": 500.8635, "eval_samples_per_second": 49.602, "eval_steps_per_second": 24.801, "step": 14500 }, { "epoch": 36.8, "learning_rate": 8.754208754208753e-05, "loss": 0.325, "step": 14600 }, { "epoch": 36.8, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3316170275211334, "eval_runtime": 500.7755, "eval_samples_per_second": 49.611, "eval_steps_per_second": 24.806, "step": 14600 }, { "epoch": 37.05, "learning_rate": 8.585858585858584e-05, "loss": 0.3416, "step": 14700 }, { "epoch": 37.05, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3310682773590088, "eval_runtime": 501.0155, "eval_samples_per_second": 49.587, "eval_steps_per_second": 24.794, "step": 14700 }, { "epoch": 37.3, "learning_rate": 8.417508417508417e-05, "loss": 0.3226, "step": 14800 }, { "epoch": 37.3, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33090585470199585, "eval_runtime": 500.9851, "eval_samples_per_second": 49.59, "eval_steps_per_second": 24.795, "step": 14800 }, { "epoch": 37.56, "learning_rate": 8.24915824915825e-05, "loss": 0.3286, "step": 14900 }, { "epoch": 37.56, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3306817412376404, "eval_runtime": 500.941, "eval_samples_per_second": 49.595, "eval_steps_per_second": 24.797, "step": 14900 }, { "epoch": 37.81, "learning_rate": 8.08080808080808e-05, "loss": 0.3284, "step": 15000 }, { "epoch": 37.81, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3312084972858429, "eval_runtime": 500.7822, "eval_samples_per_second": 49.61, "eval_steps_per_second": 24.805, "step": 15000 }, { "epoch": 38.06, "learning_rate": 7.912457912457913e-05, "loss": 0.3298, "step": 15100 }, { "epoch": 38.06, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33263882994651794, "eval_runtime": 500.7072, "eval_samples_per_second": 49.618, "eval_steps_per_second": 24.809, "step": 15100 }, { "epoch": 38.31, "learning_rate": 7.744107744107743e-05, "loss": 0.3383, "step": 15200 }, { "epoch": 38.31, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33113569021224976, "eval_runtime": 501.0449, "eval_samples_per_second": 49.584, "eval_steps_per_second": 24.792, "step": 15200 }, { "epoch": 38.56, "learning_rate": 7.575757575757576e-05, "loss": 0.3418, "step": 15300 }, { "epoch": 38.56, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33080732822418213, "eval_runtime": 501.1158, "eval_samples_per_second": 49.577, "eval_steps_per_second": 24.789, "step": 15300 }, { "epoch": 38.82, "learning_rate": 7.407407407407406e-05, "loss": 0.3123, "step": 15400 }, { "epoch": 38.82, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3311246931552887, "eval_runtime": 501.2192, "eval_samples_per_second": 49.567, "eval_steps_per_second": 24.784, "step": 15400 }, { "epoch": 39.07, "learning_rate": 7.239057239057239e-05, "loss": 0.3237, "step": 15500 }, { "epoch": 39.07, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3346453011035919, "eval_runtime": 501.3886, "eval_samples_per_second": 49.55, "eval_steps_per_second": 24.775, "step": 15500 }, { "epoch": 39.32, "learning_rate": 7.07070707070707e-05, "loss": 0.3261, "step": 15600 }, { "epoch": 39.32, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33250510692596436, "eval_runtime": 501.4416, "eval_samples_per_second": 49.545, "eval_steps_per_second": 24.773, "step": 15600 }, { "epoch": 39.57, "learning_rate": 6.902356902356902e-05, "loss": 0.3269, "step": 15700 }, { "epoch": 39.57, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33122241497039795, "eval_runtime": 501.3022, "eval_samples_per_second": 49.559, "eval_steps_per_second": 24.779, "step": 15700 }, { "epoch": 39.82, "learning_rate": 6.734006734006734e-05, "loss": 0.3267, "step": 15800 }, { "epoch": 39.82, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3319133520126343, "eval_runtime": 501.3262, "eval_samples_per_second": 49.557, "eval_steps_per_second": 24.778, "step": 15800 }, { "epoch": 40.08, "learning_rate": 6.565656565656565e-05, "loss": 0.3381, "step": 15900 }, { "epoch": 40.08, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33269181847572327, "eval_runtime": 500.8586, "eval_samples_per_second": 49.603, "eval_steps_per_second": 24.801, "step": 15900 }, { "epoch": 40.33, "learning_rate": 6.397306397306397e-05, "loss": 0.3238, "step": 16000 }, { "epoch": 40.33, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3325785994529724, "eval_runtime": 501.1224, "eval_samples_per_second": 49.577, "eval_steps_per_second": 24.788, "step": 16000 }, { "epoch": 40.58, "learning_rate": 6.228956228956228e-05, "loss": 0.3299, "step": 16100 }, { "epoch": 40.58, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33203861117362976, "eval_runtime": 500.8556, "eval_samples_per_second": 49.603, "eval_steps_per_second": 24.802, "step": 16100 }, { "epoch": 40.83, "learning_rate": 6.06060606060606e-05, "loss": 0.3385, "step": 16200 }, { "epoch": 40.83, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33086949586868286, "eval_runtime": 501.1006, "eval_samples_per_second": 49.579, "eval_steps_per_second": 24.789, "step": 16200 }, { "epoch": 41.08, "learning_rate": 5.8922558922558913e-05, "loss": 0.3268, "step": 16300 }, { "epoch": 41.08, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33224013447761536, "eval_runtime": 500.9055, "eval_samples_per_second": 49.598, "eval_steps_per_second": 24.799, "step": 16300 }, { "epoch": 41.34, "learning_rate": 5.723905723905723e-05, "loss": 0.3253, "step": 16400 }, { "epoch": 41.34, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3319920301437378, "eval_runtime": 501.2306, "eval_samples_per_second": 49.566, "eval_steps_per_second": 24.783, "step": 16400 }, { "epoch": 41.59, "learning_rate": 5.5555555555555545e-05, "loss": 0.3261, "step": 16500 }, { "epoch": 41.59, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33143314719200134, "eval_runtime": 501.0325, "eval_samples_per_second": 49.586, "eval_steps_per_second": 24.793, "step": 16500 }, { "epoch": 41.84, "learning_rate": 5.387205387205386e-05, "loss": 0.3362, "step": 16600 }, { "epoch": 41.84, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3324301838874817, "eval_runtime": 500.7987, "eval_samples_per_second": 49.609, "eval_steps_per_second": 24.804, "step": 16600 }, { "epoch": 42.09, "learning_rate": 5.218855218855219e-05, "loss": 0.3203, "step": 16700 }, { "epoch": 42.09, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3325912356376648, "eval_runtime": 500.6821, "eval_samples_per_second": 49.62, "eval_steps_per_second": 24.81, "step": 16700 }, { "epoch": 42.34, "learning_rate": 5.0505050505050505e-05, "loss": 0.325, "step": 16800 }, { "epoch": 42.34, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3323043882846832, "eval_runtime": 501.26, "eval_samples_per_second": 49.563, "eval_steps_per_second": 24.782, "step": 16800 }, { "epoch": 42.6, "learning_rate": 4.882154882154882e-05, "loss": 0.3172, "step": 16900 }, { "epoch": 42.6, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33261871337890625, "eval_runtime": 500.9553, "eval_samples_per_second": 49.593, "eval_steps_per_second": 24.797, "step": 16900 }, { "epoch": 42.85, "learning_rate": 4.7138047138047136e-05, "loss": 0.3361, "step": 17000 }, { "epoch": 42.85, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3307626247406006, "eval_runtime": 501.0928, "eval_samples_per_second": 49.58, "eval_steps_per_second": 24.79, "step": 17000 }, { "epoch": 43.1, "learning_rate": 4.545454545454545e-05, "loss": 0.3432, "step": 17100 }, { "epoch": 43.1, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3309696614742279, "eval_runtime": 501.4895, "eval_samples_per_second": 49.54, "eval_steps_per_second": 24.77, "step": 17100 }, { "epoch": 43.35, "learning_rate": 4.377104377104377e-05, "loss": 0.3396, "step": 17200 }, { "epoch": 43.35, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3312534689903259, "eval_runtime": 501.211, "eval_samples_per_second": 49.568, "eval_steps_per_second": 24.784, "step": 17200 }, { "epoch": 43.6, "learning_rate": 4.208754208754208e-05, "loss": 0.3163, "step": 17300 }, { "epoch": 43.6, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33276140689849854, "eval_runtime": 501.2264, "eval_samples_per_second": 49.566, "eval_steps_per_second": 24.783, "step": 17300 }, { "epoch": 43.86, "learning_rate": 4.04040404040404e-05, "loss": 0.3353, "step": 17400 }, { "epoch": 43.86, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3318198025226593, "eval_runtime": 501.088, "eval_samples_per_second": 49.58, "eval_steps_per_second": 24.79, "step": 17400 }, { "epoch": 44.11, "learning_rate": 3.8720538720538714e-05, "loss": 0.3299, "step": 17500 }, { "epoch": 44.11, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3316582441329956, "eval_runtime": 501.3763, "eval_samples_per_second": 49.552, "eval_steps_per_second": 24.776, "step": 17500 }, { "epoch": 44.36, "learning_rate": 3.703703703703703e-05, "loss": 0.3213, "step": 17600 }, { "epoch": 44.36, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33194538950920105, "eval_runtime": 501.7895, "eval_samples_per_second": 49.511, "eval_steps_per_second": 24.755, "step": 17600 }, { "epoch": 44.61, "learning_rate": 3.535353535353535e-05, "loss": 0.3253, "step": 17700 }, { "epoch": 44.61, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33288490772247314, "eval_runtime": 501.1283, "eval_samples_per_second": 49.576, "eval_steps_per_second": 24.788, "step": 17700 }, { "epoch": 44.86, "learning_rate": 3.367003367003367e-05, "loss": 0.3391, "step": 17800 }, { "epoch": 44.86, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33222696185112, "eval_runtime": 501.5052, "eval_samples_per_second": 49.539, "eval_steps_per_second": 24.769, "step": 17800 }, { "epoch": 45.12, "learning_rate": 3.198653198653198e-05, "loss": 0.3179, "step": 17900 }, { "epoch": 45.12, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.333011269569397, "eval_runtime": 500.7091, "eval_samples_per_second": 49.618, "eval_steps_per_second": 24.809, "step": 17900 }, { "epoch": 45.37, "learning_rate": 3.03030303030303e-05, "loss": 0.3348, "step": 18000 }, { "epoch": 45.37, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3321002721786499, "eval_runtime": 501.2516, "eval_samples_per_second": 49.564, "eval_steps_per_second": 24.782, "step": 18000 }, { "epoch": 45.62, "learning_rate": 2.8619528619528615e-05, "loss": 0.3116, "step": 18100 }, { "epoch": 45.62, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33259570598602295, "eval_runtime": 501.2743, "eval_samples_per_second": 49.562, "eval_steps_per_second": 24.781, "step": 18100 }, { "epoch": 45.87, "learning_rate": 2.693602693602693e-05, "loss": 0.3334, "step": 18200 }, { "epoch": 45.87, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33218705654144287, "eval_runtime": 501.0248, "eval_samples_per_second": 49.586, "eval_steps_per_second": 24.793, "step": 18200 }, { "epoch": 46.12, "learning_rate": 2.5252525252525253e-05, "loss": 0.3401, "step": 18300 }, { "epoch": 46.12, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3314586579799652, "eval_runtime": 501.1615, "eval_samples_per_second": 49.573, "eval_steps_per_second": 24.786, "step": 18300 }, { "epoch": 46.38, "learning_rate": 2.3569023569023568e-05, "loss": 0.3381, "step": 18400 }, { "epoch": 46.38, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33111417293548584, "eval_runtime": 501.2779, "eval_samples_per_second": 49.561, "eval_steps_per_second": 24.781, "step": 18400 }, { "epoch": 46.63, "learning_rate": 2.1885521885521884e-05, "loss": 0.3154, "step": 18500 }, { "epoch": 46.63, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3327486515045166, "eval_runtime": 500.9424, "eval_samples_per_second": 49.595, "eval_steps_per_second": 24.797, "step": 18500 }, { "epoch": 46.88, "learning_rate": 2.02020202020202e-05, "loss": 0.3348, "step": 18600 }, { "epoch": 46.88, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33224716782569885, "eval_runtime": 500.9745, "eval_samples_per_second": 49.591, "eval_steps_per_second": 24.796, "step": 18600 }, { "epoch": 47.13, "learning_rate": 1.8518518518518515e-05, "loss": 0.3285, "step": 18700 }, { "epoch": 47.13, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3325360119342804, "eval_runtime": 501.157, "eval_samples_per_second": 49.573, "eval_steps_per_second": 24.787, "step": 18700 }, { "epoch": 47.39, "learning_rate": 1.6835016835016834e-05, "loss": 0.3256, "step": 18800 }, { "epoch": 47.39, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3328581750392914, "eval_runtime": 501.2027, "eval_samples_per_second": 49.569, "eval_steps_per_second": 24.784, "step": 18800 }, { "epoch": 47.64, "learning_rate": 1.515151515151515e-05, "loss": 0.3389, "step": 18900 }, { "epoch": 47.64, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3324856758117676, "eval_runtime": 501.1837, "eval_samples_per_second": 49.571, "eval_steps_per_second": 24.785, "step": 18900 }, { "epoch": 47.89, "learning_rate": 1.3468013468013465e-05, "loss": 0.3288, "step": 19000 }, { "epoch": 47.89, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3326534032821655, "eval_runtime": 501.4739, "eval_samples_per_second": 49.542, "eval_steps_per_second": 24.771, "step": 19000 }, { "epoch": 48.14, "learning_rate": 1.1784511784511784e-05, "loss": 0.3172, "step": 19100 }, { "epoch": 48.14, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3326767683029175, "eval_runtime": 500.8014, "eval_samples_per_second": 49.608, "eval_steps_per_second": 24.804, "step": 19100 }, { "epoch": 48.39, "learning_rate": 1.01010101010101e-05, "loss": 0.3211, "step": 19200 }, { "epoch": 48.39, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.3324893116950989, "eval_runtime": 500.8057, "eval_samples_per_second": 49.608, "eval_steps_per_second": 24.804, "step": 19200 }, { "epoch": 48.65, "learning_rate": 8.417508417508417e-06, "loss": 0.3348, "step": 19300 }, { "epoch": 48.65, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33247503638267517, "eval_runtime": 501.0569, "eval_samples_per_second": 49.583, "eval_steps_per_second": 24.792, "step": 19300 }, { "epoch": 48.9, "learning_rate": 6.7340067340067325e-06, "loss": 0.3327, "step": 19400 }, { "epoch": 48.9, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33261463046073914, "eval_runtime": 501.0903, "eval_samples_per_second": 49.58, "eval_steps_per_second": 24.79, "step": 19400 }, { "epoch": 49.15, "learning_rate": 5.05050505050505e-06, "loss": 0.3341, "step": 19500 }, { "epoch": 49.15, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33255448937416077, "eval_runtime": 500.9292, "eval_samples_per_second": 49.596, "eval_steps_per_second": 24.798, "step": 19500 }, { "epoch": 49.4, "learning_rate": 3.3670033670033663e-06, "loss": 0.3344, "step": 19600 }, { "epoch": 49.4, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33247339725494385, "eval_runtime": 501.4588, "eval_samples_per_second": 49.543, "eval_steps_per_second": 24.772, "step": 19600 }, { "epoch": 49.65, "learning_rate": 1.6835016835016831e-06, "loss": 0.3207, "step": 19700 }, { "epoch": 49.65, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33257797360420227, "eval_runtime": 501.4807, "eval_samples_per_second": 49.541, "eval_steps_per_second": 24.771, "step": 19700 }, { "epoch": 49.91, "learning_rate": 0.0, "loss": 0.3299, "step": 19800 }, { "epoch": 49.91, "eval_accuracy": 0.8974400257607471, "eval_loss": 0.33260539174079895, "eval_runtime": 501.1869, "eval_samples_per_second": 49.57, "eval_steps_per_second": 24.785, "step": 19800 }, { "epoch": 49.91, "step": 19800, "total_flos": 7.53101543607702e+19, "train_loss": 0.3200095210412536, "train_runtime": 116243.9445, "train_samples_per_second": 10.917, "train_steps_per_second": 0.17 } ], "logging_steps": 100, "max_steps": 19800, "num_train_epochs": 50, "save_steps": 100, "total_flos": 7.53101543607702e+19, "trial_name": null, "trial_params": null }