|
{ |
|
"best_metric": 0.9965786507808726, |
|
"best_model_checkpoint": "/home/cloudwalker/ASVmodel/wavlm-base_2/checkpoint-300", |
|
"epoch": 49.9054820415879, |
|
"eval_steps": 100, |
|
"global_step": 19800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.515151515151515e-05, |
|
"loss": 0.4872, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.21796834468841553, |
|
"eval_runtime": 498.2273, |
|
"eval_samples_per_second": 49.865, |
|
"eval_steps_per_second": 24.932, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 3.03030303030303e-05, |
|
"loss": 0.1571, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.93342456931251, |
|
"eval_loss": 0.2581726014614105, |
|
"eval_runtime": 500.3876, |
|
"eval_samples_per_second": 49.65, |
|
"eval_steps_per_second": 24.825, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.545454545454545e-05, |
|
"loss": 0.0644, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.9965786507808726, |
|
"eval_loss": 0.024423159658908844, |
|
"eval_runtime": 500.4785, |
|
"eval_samples_per_second": 49.64, |
|
"eval_steps_per_second": 24.82, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 6.06060606060606e-05, |
|
"loss": 0.0553, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_accuracy": 0.9927950410561907, |
|
"eval_loss": 0.11555636674165726, |
|
"eval_runtime": 500.405, |
|
"eval_samples_per_second": 49.648, |
|
"eval_steps_per_second": 24.824, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.575757575757576e-05, |
|
"loss": 0.1108, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.9898164546771856, |
|
"eval_loss": 0.1576482504606247, |
|
"eval_runtime": 500.3246, |
|
"eval_samples_per_second": 49.656, |
|
"eval_steps_per_second": 24.828, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 9.09090909090909e-05, |
|
"loss": 0.0849, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_accuracy": 0.9946868459185316, |
|
"eval_loss": 0.08708283305168152, |
|
"eval_runtime": 500.3759, |
|
"eval_samples_per_second": 49.651, |
|
"eval_steps_per_second": 24.825, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00010606060606060605, |
|
"loss": 0.0635, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_accuracy": 0.9938818225728546, |
|
"eval_loss": 0.10875184088945389, |
|
"eval_runtime": 500.3093, |
|
"eval_samples_per_second": 49.657, |
|
"eval_steps_per_second": 24.829, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.0001212121212121212, |
|
"loss": 0.0504, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_accuracy": 0.9789888906778297, |
|
"eval_loss": 0.4074054956436157, |
|
"eval_runtime": 500.4204, |
|
"eval_samples_per_second": 49.646, |
|
"eval_steps_per_second": 24.823, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.00013636363636363634, |
|
"loss": 0.1075, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_accuracy": 0.9814442118821446, |
|
"eval_loss": 0.2954882085323334, |
|
"eval_runtime": 500.4501, |
|
"eval_samples_per_second": 49.643, |
|
"eval_steps_per_second": 24.822, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.00015151515151515152, |
|
"loss": 0.2387, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_accuracy": 0.9956126227660602, |
|
"eval_loss": 0.06512398272752762, |
|
"eval_runtime": 500.7096, |
|
"eval_samples_per_second": 49.618, |
|
"eval_steps_per_second": 24.809, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.00016666666666666666, |
|
"loss": 0.3052, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.23793257772922516, |
|
"eval_runtime": 500.5364, |
|
"eval_samples_per_second": 49.635, |
|
"eval_steps_per_second": 24.817, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.0001818181818181818, |
|
"loss": 0.3336, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.35269346833229065, |
|
"eval_runtime": 501.0064, |
|
"eval_samples_per_second": 49.588, |
|
"eval_steps_per_second": 24.794, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.00019696969696969695, |
|
"loss": 0.3322, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33067458868026733, |
|
"eval_runtime": 502.8336, |
|
"eval_samples_per_second": 49.408, |
|
"eval_steps_per_second": 24.704, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.0002121212121212121, |
|
"loss": 0.3201, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.34046611189842224, |
|
"eval_runtime": 500.7806, |
|
"eval_samples_per_second": 49.611, |
|
"eval_steps_per_second": 24.805, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.00022727272727272725, |
|
"loss": 0.3406, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33353403210639954, |
|
"eval_runtime": 500.7499, |
|
"eval_samples_per_second": 49.614, |
|
"eval_steps_per_second": 24.807, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.0002424242424242424, |
|
"loss": 0.3475, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3340916931629181, |
|
"eval_runtime": 500.598, |
|
"eval_samples_per_second": 49.629, |
|
"eval_steps_per_second": 24.814, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 0.00025757575757575756, |
|
"loss": 0.3312, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33610448241233826, |
|
"eval_runtime": 500.8798, |
|
"eval_samples_per_second": 49.601, |
|
"eval_steps_per_second": 24.8, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.0002727272727272727, |
|
"loss": 0.3367, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3309839069843292, |
|
"eval_runtime": 500.5768, |
|
"eval_samples_per_second": 49.631, |
|
"eval_steps_per_second": 24.815, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.00028787878787878786, |
|
"loss": 0.3284, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33385568857192993, |
|
"eval_runtime": 500.453, |
|
"eval_samples_per_second": 49.643, |
|
"eval_steps_per_second": 24.822, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 0.00029966329966329963, |
|
"loss": 0.3267, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3350389301776886, |
|
"eval_runtime": 500.7512, |
|
"eval_samples_per_second": 49.613, |
|
"eval_steps_per_second": 24.807, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.00029797979797979794, |
|
"loss": 0.338, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33081698417663574, |
|
"eval_runtime": 500.5738, |
|
"eval_samples_per_second": 49.631, |
|
"eval_steps_per_second": 24.816, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 0.00029629629629629624, |
|
"loss": 0.3277, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.330895334482193, |
|
"eval_runtime": 500.5595, |
|
"eval_samples_per_second": 49.632, |
|
"eval_steps_per_second": 24.816, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 0.0002946127946127946, |
|
"loss": 0.3294, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3312866687774658, |
|
"eval_runtime": 500.6862, |
|
"eval_samples_per_second": 49.62, |
|
"eval_steps_per_second": 24.81, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.0002929292929292929, |
|
"loss": 0.3315, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33599570393562317, |
|
"eval_runtime": 500.6176, |
|
"eval_samples_per_second": 49.627, |
|
"eval_steps_per_second": 24.813, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.00029124579124579125, |
|
"loss": 0.3397, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33067843317985535, |
|
"eval_runtime": 500.7157, |
|
"eval_samples_per_second": 49.617, |
|
"eval_steps_per_second": 24.808, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 0.00028956228956228955, |
|
"loss": 0.3318, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3359489440917969, |
|
"eval_runtime": 500.805, |
|
"eval_samples_per_second": 49.608, |
|
"eval_steps_per_second": 24.804, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 0.00028787878787878786, |
|
"loss": 0.3312, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3307555615901947, |
|
"eval_runtime": 500.6263, |
|
"eval_samples_per_second": 49.626, |
|
"eval_steps_per_second": 24.813, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 0.00028619528619528616, |
|
"loss": 0.3155, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33172452449798584, |
|
"eval_runtime": 500.7242, |
|
"eval_samples_per_second": 49.616, |
|
"eval_steps_per_second": 24.808, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.0002845117845117845, |
|
"loss": 0.3304, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33618223667144775, |
|
"eval_runtime": 500.5229, |
|
"eval_samples_per_second": 49.636, |
|
"eval_steps_per_second": 24.818, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 0.0002828282828282828, |
|
"loss": 0.338, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3341616988182068, |
|
"eval_runtime": 500.7918, |
|
"eval_samples_per_second": 49.609, |
|
"eval_steps_per_second": 24.805, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 0.0002811447811447811, |
|
"loss": 0.3241, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.330985426902771, |
|
"eval_runtime": 500.924, |
|
"eval_samples_per_second": 49.596, |
|
"eval_steps_per_second": 24.798, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 0.0002794612794612794, |
|
"loss": 0.3325, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33258649706840515, |
|
"eval_runtime": 500.7775, |
|
"eval_samples_per_second": 49.611, |
|
"eval_steps_per_second": 24.805, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 0.0002777777777777778, |
|
"loss": 0.3202, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3345409035682678, |
|
"eval_runtime": 501.0057, |
|
"eval_samples_per_second": 49.588, |
|
"eval_steps_per_second": 24.794, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 0.0002760942760942761, |
|
"loss": 0.3315, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3335334360599518, |
|
"eval_runtime": 500.8188, |
|
"eval_samples_per_second": 49.607, |
|
"eval_steps_per_second": 24.803, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 0.0002744107744107744, |
|
"loss": 0.3288, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33116644620895386, |
|
"eval_runtime": 501.0568, |
|
"eval_samples_per_second": 49.583, |
|
"eval_steps_per_second": 24.792, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 0.0002727272727272727, |
|
"loss": 0.3371, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.34014323353767395, |
|
"eval_runtime": 500.7918, |
|
"eval_samples_per_second": 49.609, |
|
"eval_steps_per_second": 24.805, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 0.00027104377104377104, |
|
"loss": 0.3409, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33300086855888367, |
|
"eval_runtime": 500.8108, |
|
"eval_samples_per_second": 49.608, |
|
"eval_steps_per_second": 24.804, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 0.00026936026936026934, |
|
"loss": 0.3236, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3329709768295288, |
|
"eval_runtime": 500.7254, |
|
"eval_samples_per_second": 49.616, |
|
"eval_steps_per_second": 24.808, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 0.00026767676767676764, |
|
"loss": 0.3224, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3320678770542145, |
|
"eval_runtime": 500.9946, |
|
"eval_samples_per_second": 49.589, |
|
"eval_steps_per_second": 24.795, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 0.00026599326599326595, |
|
"loss": 0.3439, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33262181282043457, |
|
"eval_runtime": 501.0278, |
|
"eval_samples_per_second": 49.586, |
|
"eval_steps_per_second": 24.793, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 0.0002643097643097643, |
|
"loss": 0.3382, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3310418426990509, |
|
"eval_runtime": 501.0043, |
|
"eval_samples_per_second": 49.588, |
|
"eval_steps_per_second": 24.794, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"learning_rate": 0.0002626262626262626, |
|
"loss": 0.3307, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33820316195487976, |
|
"eval_runtime": 501.0192, |
|
"eval_samples_per_second": 49.587, |
|
"eval_steps_per_second": 24.793, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"learning_rate": 0.0002609427609427609, |
|
"loss": 0.3231, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3324846625328064, |
|
"eval_runtime": 500.6054, |
|
"eval_samples_per_second": 49.628, |
|
"eval_steps_per_second": 24.814, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 0.0002592592592592592, |
|
"loss": 0.3095, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3348234295845032, |
|
"eval_runtime": 500.6559, |
|
"eval_samples_per_second": 49.623, |
|
"eval_steps_per_second": 24.811, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 11.34, |
|
"learning_rate": 0.00025757575757575756, |
|
"loss": 0.3442, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 11.34, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33274412155151367, |
|
"eval_runtime": 501.0421, |
|
"eval_samples_per_second": 49.585, |
|
"eval_steps_per_second": 24.792, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 0.00025589225589225587, |
|
"loss": 0.3269, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33261528611183167, |
|
"eval_runtime": 500.8659, |
|
"eval_samples_per_second": 49.602, |
|
"eval_steps_per_second": 24.801, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"learning_rate": 0.00025420875420875417, |
|
"loss": 0.3323, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3308444619178772, |
|
"eval_runtime": 501.0609, |
|
"eval_samples_per_second": 49.583, |
|
"eval_steps_per_second": 24.791, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 0.0002525252525252525, |
|
"loss": 0.3313, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3308044970035553, |
|
"eval_runtime": 500.8061, |
|
"eval_samples_per_second": 49.608, |
|
"eval_steps_per_second": 24.804, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"learning_rate": 0.0002508417508417508, |
|
"loss": 0.3283, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3314284384250641, |
|
"eval_runtime": 500.7554, |
|
"eval_samples_per_second": 49.613, |
|
"eval_steps_per_second": 24.807, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 0.00024915824915824913, |
|
"loss": 0.3331, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3306741416454315, |
|
"eval_runtime": 500.9128, |
|
"eval_samples_per_second": 49.597, |
|
"eval_steps_per_second": 24.799, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"learning_rate": 0.0002474747474747475, |
|
"loss": 0.3317, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3343793749809265, |
|
"eval_runtime": 500.7387, |
|
"eval_samples_per_second": 49.615, |
|
"eval_steps_per_second": 24.807, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 0.0002457912457912458, |
|
"loss": 0.3283, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33199575543403625, |
|
"eval_runtime": 500.8527, |
|
"eval_samples_per_second": 49.603, |
|
"eval_steps_per_second": 24.802, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 13.36, |
|
"learning_rate": 0.00024410774410774406, |
|
"loss": 0.3263, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 13.36, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33114317059516907, |
|
"eval_runtime": 500.8605, |
|
"eval_samples_per_second": 49.603, |
|
"eval_steps_per_second": 24.801, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 0.0002424242424242424, |
|
"loss": 0.3421, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3306863009929657, |
|
"eval_runtime": 500.9253, |
|
"eval_samples_per_second": 49.596, |
|
"eval_steps_per_second": 24.798, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 13.86, |
|
"learning_rate": 0.00024074074074074072, |
|
"loss": 0.3164, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 13.86, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3318192958831787, |
|
"eval_runtime": 500.4762, |
|
"eval_samples_per_second": 49.641, |
|
"eval_steps_per_second": 24.82, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.11, |
|
"learning_rate": 0.00023905723905723905, |
|
"loss": 0.3315, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 14.11, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3335217535495758, |
|
"eval_runtime": 500.848, |
|
"eval_samples_per_second": 49.604, |
|
"eval_steps_per_second": 24.802, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"learning_rate": 0.00023737373737373732, |
|
"loss": 0.3415, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3314845860004425, |
|
"eval_runtime": 500.8633, |
|
"eval_samples_per_second": 49.602, |
|
"eval_steps_per_second": 24.801, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"learning_rate": 0.00023569023569023565, |
|
"loss": 0.3325, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33067190647125244, |
|
"eval_runtime": 500.8182, |
|
"eval_samples_per_second": 49.607, |
|
"eval_steps_per_second": 24.803, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 14.87, |
|
"learning_rate": 0.00023400673400673398, |
|
"loss": 0.3264, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 14.87, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33295896649360657, |
|
"eval_runtime": 500.9198, |
|
"eval_samples_per_second": 49.597, |
|
"eval_steps_per_second": 24.798, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"learning_rate": 0.0002323232323232323, |
|
"loss": 0.3223, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3306836187839508, |
|
"eval_runtime": 500.9444, |
|
"eval_samples_per_second": 49.594, |
|
"eval_steps_per_second": 24.797, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 15.37, |
|
"learning_rate": 0.00023063973063973064, |
|
"loss": 0.3289, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 15.37, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3328978717327118, |
|
"eval_runtime": 500.8768, |
|
"eval_samples_per_second": 49.601, |
|
"eval_steps_per_second": 24.801, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"learning_rate": 0.00022895622895622892, |
|
"loss": 0.3353, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33112406730651855, |
|
"eval_runtime": 500.9115, |
|
"eval_samples_per_second": 49.598, |
|
"eval_steps_per_second": 24.799, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 15.88, |
|
"learning_rate": 0.00022727272727272725, |
|
"loss": 0.3246, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 15.88, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3310993015766144, |
|
"eval_runtime": 501.1149, |
|
"eval_samples_per_second": 49.577, |
|
"eval_steps_per_second": 24.789, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"learning_rate": 0.00022558922558922557, |
|
"loss": 0.3425, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.330674409866333, |
|
"eval_runtime": 500.85, |
|
"eval_samples_per_second": 49.604, |
|
"eval_steps_per_second": 24.802, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 16.38, |
|
"learning_rate": 0.0002239057239057239, |
|
"loss": 0.331, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 16.38, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3306954503059387, |
|
"eval_runtime": 501.1853, |
|
"eval_samples_per_second": 49.57, |
|
"eval_steps_per_second": 24.785, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"learning_rate": 0.00022222222222222218, |
|
"loss": 0.3293, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33531516790390015, |
|
"eval_runtime": 500.9658, |
|
"eval_samples_per_second": 49.592, |
|
"eval_steps_per_second": 24.796, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"learning_rate": 0.0002205387205387205, |
|
"loss": 0.3249, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3339368402957916, |
|
"eval_runtime": 500.8138, |
|
"eval_samples_per_second": 49.607, |
|
"eval_steps_per_second": 24.804, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 0.00021885521885521884, |
|
"loss": 0.3214, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3337734639644623, |
|
"eval_runtime": 500.9586, |
|
"eval_samples_per_second": 49.593, |
|
"eval_steps_per_second": 24.796, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 0.00021717171717171717, |
|
"loss": 0.3259, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3327140212059021, |
|
"eval_runtime": 501.0122, |
|
"eval_samples_per_second": 49.588, |
|
"eval_steps_per_second": 24.794, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 17.64, |
|
"learning_rate": 0.00021548821548821544, |
|
"loss": 0.3408, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 17.64, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33182209730148315, |
|
"eval_runtime": 500.9279, |
|
"eval_samples_per_second": 49.596, |
|
"eval_steps_per_second": 24.798, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 17.9, |
|
"learning_rate": 0.00021380471380471377, |
|
"loss": 0.3258, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 17.9, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33183717727661133, |
|
"eval_runtime": 501.2678, |
|
"eval_samples_per_second": 49.562, |
|
"eval_steps_per_second": 24.781, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 18.15, |
|
"learning_rate": 0.0002121212121212121, |
|
"loss": 0.3299, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 18.15, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33080849051475525, |
|
"eval_runtime": 501.2555, |
|
"eval_samples_per_second": 49.564, |
|
"eval_steps_per_second": 24.782, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 0.00021043771043771043, |
|
"loss": 0.327, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3371123671531677, |
|
"eval_runtime": 500.9892, |
|
"eval_samples_per_second": 49.59, |
|
"eval_steps_per_second": 24.795, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 18.65, |
|
"learning_rate": 0.00020875420875420876, |
|
"loss": 0.3317, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 18.65, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3307543694972992, |
|
"eval_runtime": 500.9046, |
|
"eval_samples_per_second": 49.598, |
|
"eval_steps_per_second": 24.799, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 18.9, |
|
"learning_rate": 0.00020707070707070703, |
|
"loss": 0.3291, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 18.9, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33100253343582153, |
|
"eval_runtime": 500.936, |
|
"eval_samples_per_second": 49.595, |
|
"eval_steps_per_second": 24.798, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 19.16, |
|
"learning_rate": 0.00020538720538720536, |
|
"loss": 0.3263, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 19.16, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33248230814933777, |
|
"eval_runtime": 500.8588, |
|
"eval_samples_per_second": 49.603, |
|
"eval_steps_per_second": 24.801, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"learning_rate": 0.0002037037037037037, |
|
"loss": 0.3223, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33463332056999207, |
|
"eval_runtime": 500.8344, |
|
"eval_samples_per_second": 49.605, |
|
"eval_steps_per_second": 24.803, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 19.66, |
|
"learning_rate": 0.00020202020202020202, |
|
"loss": 0.3403, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 19.66, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3315812647342682, |
|
"eval_runtime": 500.8929, |
|
"eval_samples_per_second": 49.599, |
|
"eval_steps_per_second": 24.8, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 0.0002003367003367003, |
|
"loss": 0.3265, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3309071660041809, |
|
"eval_runtime": 500.8065, |
|
"eval_samples_per_second": 49.608, |
|
"eval_steps_per_second": 24.804, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"learning_rate": 0.00019865319865319862, |
|
"loss": 0.33, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3318468928337097, |
|
"eval_runtime": 501.0869, |
|
"eval_samples_per_second": 49.58, |
|
"eval_steps_per_second": 24.79, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 20.42, |
|
"learning_rate": 0.00019696969696969695, |
|
"loss": 0.3488, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 20.42, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33127933740615845, |
|
"eval_runtime": 500.8956, |
|
"eval_samples_per_second": 49.599, |
|
"eval_steps_per_second": 24.8, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 20.67, |
|
"learning_rate": 0.00019528619528619528, |
|
"loss": 0.3293, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 20.67, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33354687690734863, |
|
"eval_runtime": 501.2059, |
|
"eval_samples_per_second": 49.568, |
|
"eval_steps_per_second": 24.784, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 20.92, |
|
"learning_rate": 0.00019360269360269356, |
|
"loss": 0.3095, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 20.92, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33559244871139526, |
|
"eval_runtime": 501.1442, |
|
"eval_samples_per_second": 49.575, |
|
"eval_steps_per_second": 24.787, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 21.17, |
|
"learning_rate": 0.0001919191919191919, |
|
"loss": 0.3366, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 21.17, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3331533968448639, |
|
"eval_runtime": 501.0445, |
|
"eval_samples_per_second": 49.584, |
|
"eval_steps_per_second": 24.792, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 21.42, |
|
"learning_rate": 0.00019023569023569022, |
|
"loss": 0.317, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 21.42, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3337852358818054, |
|
"eval_runtime": 501.2024, |
|
"eval_samples_per_second": 49.569, |
|
"eval_steps_per_second": 24.784, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 21.68, |
|
"learning_rate": 0.00018855218855218854, |
|
"loss": 0.3299, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 21.68, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3308347165584564, |
|
"eval_runtime": 501.0, |
|
"eval_samples_per_second": 49.589, |
|
"eval_steps_per_second": 24.794, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 21.93, |
|
"learning_rate": 0.00018686868686868687, |
|
"loss": 0.3434, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 21.93, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.330965131521225, |
|
"eval_runtime": 501.1751, |
|
"eval_samples_per_second": 49.571, |
|
"eval_steps_per_second": 24.786, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 22.18, |
|
"learning_rate": 0.00018518518518518515, |
|
"loss": 0.3208, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 22.18, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3308819830417633, |
|
"eval_runtime": 501.189, |
|
"eval_samples_per_second": 49.57, |
|
"eval_steps_per_second": 24.785, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 22.43, |
|
"learning_rate": 0.00018350168350168348, |
|
"loss": 0.3351, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 22.43, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33238929510116577, |
|
"eval_runtime": 501.2428, |
|
"eval_samples_per_second": 49.565, |
|
"eval_steps_per_second": 24.782, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 22.68, |
|
"learning_rate": 0.0001818181818181818, |
|
"loss": 0.3301, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 22.68, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3308260440826416, |
|
"eval_runtime": 500.8427, |
|
"eval_samples_per_second": 49.604, |
|
"eval_steps_per_second": 24.802, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 22.94, |
|
"learning_rate": 0.00018013468013468014, |
|
"loss": 0.3196, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 22.94, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3329552710056305, |
|
"eval_runtime": 501.1009, |
|
"eval_samples_per_second": 49.579, |
|
"eval_steps_per_second": 24.789, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 23.19, |
|
"learning_rate": 0.0001784511784511784, |
|
"loss": 0.3339, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 23.19, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33333924412727356, |
|
"eval_runtime": 500.8988, |
|
"eval_samples_per_second": 49.599, |
|
"eval_steps_per_second": 24.799, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 23.44, |
|
"learning_rate": 0.00017676767676767674, |
|
"loss": 0.3249, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 23.44, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3307563364505768, |
|
"eval_runtime": 501.0394, |
|
"eval_samples_per_second": 49.585, |
|
"eval_steps_per_second": 24.792, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 23.69, |
|
"learning_rate": 0.00017508417508417507, |
|
"loss": 0.3247, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 23.69, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3338105082511902, |
|
"eval_runtime": 500.7662, |
|
"eval_samples_per_second": 49.612, |
|
"eval_steps_per_second": 24.806, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 23.94, |
|
"learning_rate": 0.0001734006734006734, |
|
"loss": 0.3369, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 23.94, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3312574028968811, |
|
"eval_runtime": 501.3028, |
|
"eval_samples_per_second": 49.559, |
|
"eval_steps_per_second": 24.779, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 24.2, |
|
"learning_rate": 0.00017171717171717167, |
|
"loss": 0.3291, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 24.2, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3320288062095642, |
|
"eval_runtime": 501.3419, |
|
"eval_samples_per_second": 49.555, |
|
"eval_steps_per_second": 24.777, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 24.45, |
|
"learning_rate": 0.00017003367003367, |
|
"loss": 0.3307, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 24.45, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33087393641471863, |
|
"eval_runtime": 501.1809, |
|
"eval_samples_per_second": 49.571, |
|
"eval_steps_per_second": 24.785, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 24.7, |
|
"learning_rate": 0.00016835016835016833, |
|
"loss": 0.3328, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 24.7, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33073562383651733, |
|
"eval_runtime": 500.8708, |
|
"eval_samples_per_second": 49.602, |
|
"eval_steps_per_second": 24.801, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 24.95, |
|
"learning_rate": 0.00016666666666666666, |
|
"loss": 0.3277, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 24.95, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3341946601867676, |
|
"eval_runtime": 501.1287, |
|
"eval_samples_per_second": 49.576, |
|
"eval_steps_per_second": 24.788, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 25.2, |
|
"learning_rate": 0.000164983164983165, |
|
"loss": 0.3278, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 25.2, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3309917449951172, |
|
"eval_runtime": 500.9142, |
|
"eval_samples_per_second": 49.597, |
|
"eval_steps_per_second": 24.799, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 25.46, |
|
"learning_rate": 0.00016329966329966327, |
|
"loss": 0.3197, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 25.46, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3348841965198517, |
|
"eval_runtime": 501.1589, |
|
"eval_samples_per_second": 49.573, |
|
"eval_steps_per_second": 24.787, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 25.71, |
|
"learning_rate": 0.0001616161616161616, |
|
"loss": 0.3273, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 25.71, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3321140706539154, |
|
"eval_runtime": 501.4344, |
|
"eval_samples_per_second": 49.546, |
|
"eval_steps_per_second": 24.773, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 25.96, |
|
"learning_rate": 0.00015993265993265992, |
|
"loss": 0.3345, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 25.96, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3312225043773651, |
|
"eval_runtime": 500.9045, |
|
"eval_samples_per_second": 49.598, |
|
"eval_steps_per_second": 24.799, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 26.21, |
|
"learning_rate": 0.00015824915824915825, |
|
"loss": 0.3351, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 26.21, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33248284459114075, |
|
"eval_runtime": 501.2052, |
|
"eval_samples_per_second": 49.569, |
|
"eval_steps_per_second": 24.784, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 26.47, |
|
"learning_rate": 0.00015656565656565653, |
|
"loss": 0.3144, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 26.47, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.334583580493927, |
|
"eval_runtime": 501.103, |
|
"eval_samples_per_second": 49.579, |
|
"eval_steps_per_second": 24.789, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 26.72, |
|
"learning_rate": 0.00015488215488215486, |
|
"loss": 0.3361, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 26.72, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33112606406211853, |
|
"eval_runtime": 500.6859, |
|
"eval_samples_per_second": 49.62, |
|
"eval_steps_per_second": 24.81, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"learning_rate": 0.00015319865319865319, |
|
"loss": 0.3334, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3306848406791687, |
|
"eval_runtime": 500.81, |
|
"eval_samples_per_second": 49.608, |
|
"eval_steps_per_second": 24.804, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 27.22, |
|
"learning_rate": 0.00015151515151515152, |
|
"loss": 0.3287, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 27.22, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3373050093650818, |
|
"eval_runtime": 501.0133, |
|
"eval_samples_per_second": 49.588, |
|
"eval_steps_per_second": 24.794, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 27.47, |
|
"learning_rate": 0.00014983164983164982, |
|
"loss": 0.3374, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 27.47, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3306737244129181, |
|
"eval_runtime": 501.4272, |
|
"eval_samples_per_second": 49.547, |
|
"eval_steps_per_second": 24.773, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 27.73, |
|
"learning_rate": 0.00014814814814814812, |
|
"loss": 0.3302, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 27.73, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3306819200515747, |
|
"eval_runtime": 501.0901, |
|
"eval_samples_per_second": 49.58, |
|
"eval_steps_per_second": 24.79, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 27.98, |
|
"learning_rate": 0.00014646464646464645, |
|
"loss": 0.3245, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 27.98, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33153215050697327, |
|
"eval_runtime": 501.2107, |
|
"eval_samples_per_second": 49.568, |
|
"eval_steps_per_second": 24.784, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 28.23, |
|
"learning_rate": 0.00014478114478114478, |
|
"loss": 0.3353, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 28.23, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33351030945777893, |
|
"eval_runtime": 501.1769, |
|
"eval_samples_per_second": 49.571, |
|
"eval_steps_per_second": 24.786, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 28.48, |
|
"learning_rate": 0.00014309764309764308, |
|
"loss": 0.3191, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 28.48, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33356890082359314, |
|
"eval_runtime": 501.2908, |
|
"eval_samples_per_second": 49.56, |
|
"eval_steps_per_second": 24.78, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 28.73, |
|
"learning_rate": 0.0001414141414141414, |
|
"loss": 0.3226, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 28.73, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33081722259521484, |
|
"eval_runtime": 501.1297, |
|
"eval_samples_per_second": 49.576, |
|
"eval_steps_per_second": 24.788, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"learning_rate": 0.0001397306397306397, |
|
"loss": 0.3384, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3321709930896759, |
|
"eval_runtime": 501.2824, |
|
"eval_samples_per_second": 49.561, |
|
"eval_steps_per_second": 24.78, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 29.24, |
|
"learning_rate": 0.00013804713804713804, |
|
"loss": 0.3368, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 29.24, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3336506485939026, |
|
"eval_runtime": 500.9493, |
|
"eval_samples_per_second": 49.594, |
|
"eval_steps_per_second": 24.797, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 29.49, |
|
"learning_rate": 0.00013636363636363634, |
|
"loss": 0.3224, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 29.49, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3331962823867798, |
|
"eval_runtime": 501.3861, |
|
"eval_samples_per_second": 49.551, |
|
"eval_steps_per_second": 24.775, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 29.74, |
|
"learning_rate": 0.00013468013468013467, |
|
"loss": 0.3224, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 29.74, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3318473696708679, |
|
"eval_runtime": 501.3052, |
|
"eval_samples_per_second": 49.559, |
|
"eval_steps_per_second": 24.779, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"learning_rate": 0.00013299663299663297, |
|
"loss": 0.3363, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3309670388698578, |
|
"eval_runtime": 501.4331, |
|
"eval_samples_per_second": 49.546, |
|
"eval_steps_per_second": 24.773, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 30.25, |
|
"learning_rate": 0.0001313131313131313, |
|
"loss": 0.327, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 30.25, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3306976854801178, |
|
"eval_runtime": 501.4726, |
|
"eval_samples_per_second": 49.542, |
|
"eval_steps_per_second": 24.771, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 30.5, |
|
"learning_rate": 0.0001296296296296296, |
|
"loss": 0.3291, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 30.5, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3306950628757477, |
|
"eval_runtime": 501.0845, |
|
"eval_samples_per_second": 49.58, |
|
"eval_steps_per_second": 24.79, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 30.75, |
|
"learning_rate": 0.00012794612794612793, |
|
"loss": 0.3369, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 30.75, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3321588933467865, |
|
"eval_runtime": 501.0681, |
|
"eval_samples_per_second": 49.582, |
|
"eval_steps_per_second": 24.791, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 0.00012626262626262626, |
|
"loss": 0.3211, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3328503370285034, |
|
"eval_runtime": 501.3433, |
|
"eval_samples_per_second": 49.555, |
|
"eval_steps_per_second": 24.777, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"learning_rate": 0.00012457912457912456, |
|
"loss": 0.329, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33213621377944946, |
|
"eval_runtime": 501.089, |
|
"eval_samples_per_second": 49.58, |
|
"eval_steps_per_second": 24.79, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 31.51, |
|
"learning_rate": 0.0001228956228956229, |
|
"loss": 0.3206, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 31.51, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33089062571525574, |
|
"eval_runtime": 501.0382, |
|
"eval_samples_per_second": 49.585, |
|
"eval_steps_per_second": 24.793, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 31.76, |
|
"learning_rate": 0.0001212121212121212, |
|
"loss": 0.3339, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 31.76, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3331972658634186, |
|
"eval_runtime": 501.242, |
|
"eval_samples_per_second": 49.565, |
|
"eval_steps_per_second": 24.782, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 0.00011952861952861952, |
|
"loss": 0.3323, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3316064476966858, |
|
"eval_runtime": 501.0626, |
|
"eval_samples_per_second": 49.583, |
|
"eval_steps_per_second": 24.791, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"learning_rate": 0.00011784511784511783, |
|
"loss": 0.3273, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3323478400707245, |
|
"eval_runtime": 501.4098, |
|
"eval_samples_per_second": 49.548, |
|
"eval_steps_per_second": 24.774, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 32.51, |
|
"learning_rate": 0.00011616161616161616, |
|
"loss": 0.3362, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 32.51, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33072328567504883, |
|
"eval_runtime": 501.8013, |
|
"eval_samples_per_second": 49.51, |
|
"eval_steps_per_second": 24.755, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 32.77, |
|
"learning_rate": 0.00011447811447811446, |
|
"loss": 0.3387, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 32.77, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3308790922164917, |
|
"eval_runtime": 501.2768, |
|
"eval_samples_per_second": 49.561, |
|
"eval_steps_per_second": 24.781, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"learning_rate": 0.00011279461279461279, |
|
"loss": 0.3173, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33112308382987976, |
|
"eval_runtime": 501.2827, |
|
"eval_samples_per_second": 49.561, |
|
"eval_steps_per_second": 24.78, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 33.27, |
|
"learning_rate": 0.00011111111111111109, |
|
"loss": 0.3291, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 33.27, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33085396885871887, |
|
"eval_runtime": 501.2449, |
|
"eval_samples_per_second": 49.565, |
|
"eval_steps_per_second": 24.782, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 33.52, |
|
"learning_rate": 0.00010942760942760942, |
|
"loss": 0.3316, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 33.52, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33154767751693726, |
|
"eval_runtime": 501.0257, |
|
"eval_samples_per_second": 49.586, |
|
"eval_steps_per_second": 24.793, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 33.77, |
|
"learning_rate": 0.00010774410774410772, |
|
"loss": 0.3366, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 33.77, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33317822217941284, |
|
"eval_runtime": 501.0403, |
|
"eval_samples_per_second": 49.585, |
|
"eval_steps_per_second": 24.792, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 34.03, |
|
"learning_rate": 0.00010606060606060605, |
|
"loss": 0.3115, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 34.03, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3382599353790283, |
|
"eval_runtime": 500.7949, |
|
"eval_samples_per_second": 49.609, |
|
"eval_steps_per_second": 24.805, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 34.28, |
|
"learning_rate": 0.00010437710437710438, |
|
"loss": 0.3275, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 34.28, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.332431823015213, |
|
"eval_runtime": 500.933, |
|
"eval_samples_per_second": 49.595, |
|
"eval_steps_per_second": 24.798, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 34.53, |
|
"learning_rate": 0.00010269360269360268, |
|
"loss": 0.3373, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 34.53, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33150389790534973, |
|
"eval_runtime": 500.781, |
|
"eval_samples_per_second": 49.611, |
|
"eval_steps_per_second": 24.805, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 34.78, |
|
"learning_rate": 0.00010101010101010101, |
|
"loss": 0.3247, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 34.78, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3313423693180084, |
|
"eval_runtime": 501.1265, |
|
"eval_samples_per_second": 49.576, |
|
"eval_steps_per_second": 24.788, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 35.03, |
|
"learning_rate": 9.932659932659931e-05, |
|
"loss": 0.3349, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 35.03, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33250102400779724, |
|
"eval_runtime": 501.0936, |
|
"eval_samples_per_second": 49.58, |
|
"eval_steps_per_second": 24.79, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 35.29, |
|
"learning_rate": 9.764309764309764e-05, |
|
"loss": 0.3223, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 35.29, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33122116327285767, |
|
"eval_runtime": 500.9681, |
|
"eval_samples_per_second": 49.592, |
|
"eval_steps_per_second": 24.796, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 35.54, |
|
"learning_rate": 9.595959595959594e-05, |
|
"loss": 0.3321, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 35.54, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3307776153087616, |
|
"eval_runtime": 500.9865, |
|
"eval_samples_per_second": 49.59, |
|
"eval_steps_per_second": 24.795, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 35.79, |
|
"learning_rate": 9.427609427609427e-05, |
|
"loss": 0.3304, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 35.79, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3315562605857849, |
|
"eval_runtime": 500.788, |
|
"eval_samples_per_second": 49.61, |
|
"eval_steps_per_second": 24.805, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 36.04, |
|
"learning_rate": 9.259259259259257e-05, |
|
"loss": 0.3262, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 36.04, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33196473121643066, |
|
"eval_runtime": 501.0344, |
|
"eval_samples_per_second": 49.585, |
|
"eval_steps_per_second": 24.793, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 36.29, |
|
"learning_rate": 9.09090909090909e-05, |
|
"loss": 0.3239, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 36.29, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3316847085952759, |
|
"eval_runtime": 501.0629, |
|
"eval_samples_per_second": 49.583, |
|
"eval_steps_per_second": 24.791, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 36.55, |
|
"learning_rate": 8.92255892255892e-05, |
|
"loss": 0.3325, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 36.55, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3308143615722656, |
|
"eval_runtime": 500.8635, |
|
"eval_samples_per_second": 49.602, |
|
"eval_steps_per_second": 24.801, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"learning_rate": 8.754208754208753e-05, |
|
"loss": 0.325, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3316170275211334, |
|
"eval_runtime": 500.7755, |
|
"eval_samples_per_second": 49.611, |
|
"eval_steps_per_second": 24.806, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 37.05, |
|
"learning_rate": 8.585858585858584e-05, |
|
"loss": 0.3416, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 37.05, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3310682773590088, |
|
"eval_runtime": 501.0155, |
|
"eval_samples_per_second": 49.587, |
|
"eval_steps_per_second": 24.794, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 37.3, |
|
"learning_rate": 8.417508417508417e-05, |
|
"loss": 0.3226, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 37.3, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33090585470199585, |
|
"eval_runtime": 500.9851, |
|
"eval_samples_per_second": 49.59, |
|
"eval_steps_per_second": 24.795, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 37.56, |
|
"learning_rate": 8.24915824915825e-05, |
|
"loss": 0.3286, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 37.56, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3306817412376404, |
|
"eval_runtime": 500.941, |
|
"eval_samples_per_second": 49.595, |
|
"eval_steps_per_second": 24.797, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 37.81, |
|
"learning_rate": 8.08080808080808e-05, |
|
"loss": 0.3284, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 37.81, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3312084972858429, |
|
"eval_runtime": 500.7822, |
|
"eval_samples_per_second": 49.61, |
|
"eval_steps_per_second": 24.805, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 38.06, |
|
"learning_rate": 7.912457912457913e-05, |
|
"loss": 0.3298, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 38.06, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33263882994651794, |
|
"eval_runtime": 500.7072, |
|
"eval_samples_per_second": 49.618, |
|
"eval_steps_per_second": 24.809, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 38.31, |
|
"learning_rate": 7.744107744107743e-05, |
|
"loss": 0.3383, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 38.31, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33113569021224976, |
|
"eval_runtime": 501.0449, |
|
"eval_samples_per_second": 49.584, |
|
"eval_steps_per_second": 24.792, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 38.56, |
|
"learning_rate": 7.575757575757576e-05, |
|
"loss": 0.3418, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 38.56, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33080732822418213, |
|
"eval_runtime": 501.1158, |
|
"eval_samples_per_second": 49.577, |
|
"eval_steps_per_second": 24.789, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 38.82, |
|
"learning_rate": 7.407407407407406e-05, |
|
"loss": 0.3123, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 38.82, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3311246931552887, |
|
"eval_runtime": 501.2192, |
|
"eval_samples_per_second": 49.567, |
|
"eval_steps_per_second": 24.784, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 39.07, |
|
"learning_rate": 7.239057239057239e-05, |
|
"loss": 0.3237, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 39.07, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3346453011035919, |
|
"eval_runtime": 501.3886, |
|
"eval_samples_per_second": 49.55, |
|
"eval_steps_per_second": 24.775, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 39.32, |
|
"learning_rate": 7.07070707070707e-05, |
|
"loss": 0.3261, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 39.32, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33250510692596436, |
|
"eval_runtime": 501.4416, |
|
"eval_samples_per_second": 49.545, |
|
"eval_steps_per_second": 24.773, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 39.57, |
|
"learning_rate": 6.902356902356902e-05, |
|
"loss": 0.3269, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 39.57, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33122241497039795, |
|
"eval_runtime": 501.3022, |
|
"eval_samples_per_second": 49.559, |
|
"eval_steps_per_second": 24.779, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 39.82, |
|
"learning_rate": 6.734006734006734e-05, |
|
"loss": 0.3267, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 39.82, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3319133520126343, |
|
"eval_runtime": 501.3262, |
|
"eval_samples_per_second": 49.557, |
|
"eval_steps_per_second": 24.778, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 40.08, |
|
"learning_rate": 6.565656565656565e-05, |
|
"loss": 0.3381, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 40.08, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33269181847572327, |
|
"eval_runtime": 500.8586, |
|
"eval_samples_per_second": 49.603, |
|
"eval_steps_per_second": 24.801, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 40.33, |
|
"learning_rate": 6.397306397306397e-05, |
|
"loss": 0.3238, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 40.33, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3325785994529724, |
|
"eval_runtime": 501.1224, |
|
"eval_samples_per_second": 49.577, |
|
"eval_steps_per_second": 24.788, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 40.58, |
|
"learning_rate": 6.228956228956228e-05, |
|
"loss": 0.3299, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 40.58, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33203861117362976, |
|
"eval_runtime": 500.8556, |
|
"eval_samples_per_second": 49.603, |
|
"eval_steps_per_second": 24.802, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 40.83, |
|
"learning_rate": 6.06060606060606e-05, |
|
"loss": 0.3385, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 40.83, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33086949586868286, |
|
"eval_runtime": 501.1006, |
|
"eval_samples_per_second": 49.579, |
|
"eval_steps_per_second": 24.789, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 41.08, |
|
"learning_rate": 5.8922558922558913e-05, |
|
"loss": 0.3268, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 41.08, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33224013447761536, |
|
"eval_runtime": 500.9055, |
|
"eval_samples_per_second": 49.598, |
|
"eval_steps_per_second": 24.799, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 41.34, |
|
"learning_rate": 5.723905723905723e-05, |
|
"loss": 0.3253, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 41.34, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3319920301437378, |
|
"eval_runtime": 501.2306, |
|
"eval_samples_per_second": 49.566, |
|
"eval_steps_per_second": 24.783, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 41.59, |
|
"learning_rate": 5.5555555555555545e-05, |
|
"loss": 0.3261, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 41.59, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33143314719200134, |
|
"eval_runtime": 501.0325, |
|
"eval_samples_per_second": 49.586, |
|
"eval_steps_per_second": 24.793, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 41.84, |
|
"learning_rate": 5.387205387205386e-05, |
|
"loss": 0.3362, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 41.84, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3324301838874817, |
|
"eval_runtime": 500.7987, |
|
"eval_samples_per_second": 49.609, |
|
"eval_steps_per_second": 24.804, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 42.09, |
|
"learning_rate": 5.218855218855219e-05, |
|
"loss": 0.3203, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 42.09, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3325912356376648, |
|
"eval_runtime": 500.6821, |
|
"eval_samples_per_second": 49.62, |
|
"eval_steps_per_second": 24.81, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 42.34, |
|
"learning_rate": 5.0505050505050505e-05, |
|
"loss": 0.325, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 42.34, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3323043882846832, |
|
"eval_runtime": 501.26, |
|
"eval_samples_per_second": 49.563, |
|
"eval_steps_per_second": 24.782, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 42.6, |
|
"learning_rate": 4.882154882154882e-05, |
|
"loss": 0.3172, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 42.6, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33261871337890625, |
|
"eval_runtime": 500.9553, |
|
"eval_samples_per_second": 49.593, |
|
"eval_steps_per_second": 24.797, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 42.85, |
|
"learning_rate": 4.7138047138047136e-05, |
|
"loss": 0.3361, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 42.85, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3307626247406006, |
|
"eval_runtime": 501.0928, |
|
"eval_samples_per_second": 49.58, |
|
"eval_steps_per_second": 24.79, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 43.1, |
|
"learning_rate": 4.545454545454545e-05, |
|
"loss": 0.3432, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 43.1, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3309696614742279, |
|
"eval_runtime": 501.4895, |
|
"eval_samples_per_second": 49.54, |
|
"eval_steps_per_second": 24.77, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 43.35, |
|
"learning_rate": 4.377104377104377e-05, |
|
"loss": 0.3396, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 43.35, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3312534689903259, |
|
"eval_runtime": 501.211, |
|
"eval_samples_per_second": 49.568, |
|
"eval_steps_per_second": 24.784, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 43.6, |
|
"learning_rate": 4.208754208754208e-05, |
|
"loss": 0.3163, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 43.6, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33276140689849854, |
|
"eval_runtime": 501.2264, |
|
"eval_samples_per_second": 49.566, |
|
"eval_steps_per_second": 24.783, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 43.86, |
|
"learning_rate": 4.04040404040404e-05, |
|
"loss": 0.3353, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 43.86, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3318198025226593, |
|
"eval_runtime": 501.088, |
|
"eval_samples_per_second": 49.58, |
|
"eval_steps_per_second": 24.79, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 44.11, |
|
"learning_rate": 3.8720538720538714e-05, |
|
"loss": 0.3299, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 44.11, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3316582441329956, |
|
"eval_runtime": 501.3763, |
|
"eval_samples_per_second": 49.552, |
|
"eval_steps_per_second": 24.776, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 44.36, |
|
"learning_rate": 3.703703703703703e-05, |
|
"loss": 0.3213, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 44.36, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33194538950920105, |
|
"eval_runtime": 501.7895, |
|
"eval_samples_per_second": 49.511, |
|
"eval_steps_per_second": 24.755, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 44.61, |
|
"learning_rate": 3.535353535353535e-05, |
|
"loss": 0.3253, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 44.61, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33288490772247314, |
|
"eval_runtime": 501.1283, |
|
"eval_samples_per_second": 49.576, |
|
"eval_steps_per_second": 24.788, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 44.86, |
|
"learning_rate": 3.367003367003367e-05, |
|
"loss": 0.3391, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 44.86, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33222696185112, |
|
"eval_runtime": 501.5052, |
|
"eval_samples_per_second": 49.539, |
|
"eval_steps_per_second": 24.769, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 45.12, |
|
"learning_rate": 3.198653198653198e-05, |
|
"loss": 0.3179, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 45.12, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.333011269569397, |
|
"eval_runtime": 500.7091, |
|
"eval_samples_per_second": 49.618, |
|
"eval_steps_per_second": 24.809, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 45.37, |
|
"learning_rate": 3.03030303030303e-05, |
|
"loss": 0.3348, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 45.37, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3321002721786499, |
|
"eval_runtime": 501.2516, |
|
"eval_samples_per_second": 49.564, |
|
"eval_steps_per_second": 24.782, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 45.62, |
|
"learning_rate": 2.8619528619528615e-05, |
|
"loss": 0.3116, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 45.62, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33259570598602295, |
|
"eval_runtime": 501.2743, |
|
"eval_samples_per_second": 49.562, |
|
"eval_steps_per_second": 24.781, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 45.87, |
|
"learning_rate": 2.693602693602693e-05, |
|
"loss": 0.3334, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 45.87, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33218705654144287, |
|
"eval_runtime": 501.0248, |
|
"eval_samples_per_second": 49.586, |
|
"eval_steps_per_second": 24.793, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 46.12, |
|
"learning_rate": 2.5252525252525253e-05, |
|
"loss": 0.3401, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 46.12, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3314586579799652, |
|
"eval_runtime": 501.1615, |
|
"eval_samples_per_second": 49.573, |
|
"eval_steps_per_second": 24.786, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 46.38, |
|
"learning_rate": 2.3569023569023568e-05, |
|
"loss": 0.3381, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 46.38, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33111417293548584, |
|
"eval_runtime": 501.2779, |
|
"eval_samples_per_second": 49.561, |
|
"eval_steps_per_second": 24.781, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 46.63, |
|
"learning_rate": 2.1885521885521884e-05, |
|
"loss": 0.3154, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 46.63, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3327486515045166, |
|
"eval_runtime": 500.9424, |
|
"eval_samples_per_second": 49.595, |
|
"eval_steps_per_second": 24.797, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 46.88, |
|
"learning_rate": 2.02020202020202e-05, |
|
"loss": 0.3348, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 46.88, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33224716782569885, |
|
"eval_runtime": 500.9745, |
|
"eval_samples_per_second": 49.591, |
|
"eval_steps_per_second": 24.796, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 47.13, |
|
"learning_rate": 1.8518518518518515e-05, |
|
"loss": 0.3285, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 47.13, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3325360119342804, |
|
"eval_runtime": 501.157, |
|
"eval_samples_per_second": 49.573, |
|
"eval_steps_per_second": 24.787, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 47.39, |
|
"learning_rate": 1.6835016835016834e-05, |
|
"loss": 0.3256, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 47.39, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3328581750392914, |
|
"eval_runtime": 501.2027, |
|
"eval_samples_per_second": 49.569, |
|
"eval_steps_per_second": 24.784, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 47.64, |
|
"learning_rate": 1.515151515151515e-05, |
|
"loss": 0.3389, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 47.64, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3324856758117676, |
|
"eval_runtime": 501.1837, |
|
"eval_samples_per_second": 49.571, |
|
"eval_steps_per_second": 24.785, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 47.89, |
|
"learning_rate": 1.3468013468013465e-05, |
|
"loss": 0.3288, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 47.89, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3326534032821655, |
|
"eval_runtime": 501.4739, |
|
"eval_samples_per_second": 49.542, |
|
"eval_steps_per_second": 24.771, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 48.14, |
|
"learning_rate": 1.1784511784511784e-05, |
|
"loss": 0.3172, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 48.14, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3326767683029175, |
|
"eval_runtime": 500.8014, |
|
"eval_samples_per_second": 49.608, |
|
"eval_steps_per_second": 24.804, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 48.39, |
|
"learning_rate": 1.01010101010101e-05, |
|
"loss": 0.3211, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 48.39, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.3324893116950989, |
|
"eval_runtime": 500.8057, |
|
"eval_samples_per_second": 49.608, |
|
"eval_steps_per_second": 24.804, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 48.65, |
|
"learning_rate": 8.417508417508417e-06, |
|
"loss": 0.3348, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 48.65, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33247503638267517, |
|
"eval_runtime": 501.0569, |
|
"eval_samples_per_second": 49.583, |
|
"eval_steps_per_second": 24.792, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 48.9, |
|
"learning_rate": 6.7340067340067325e-06, |
|
"loss": 0.3327, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 48.9, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33261463046073914, |
|
"eval_runtime": 501.0903, |
|
"eval_samples_per_second": 49.58, |
|
"eval_steps_per_second": 24.79, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 49.15, |
|
"learning_rate": 5.05050505050505e-06, |
|
"loss": 0.3341, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 49.15, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33255448937416077, |
|
"eval_runtime": 500.9292, |
|
"eval_samples_per_second": 49.596, |
|
"eval_steps_per_second": 24.798, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 49.4, |
|
"learning_rate": 3.3670033670033663e-06, |
|
"loss": 0.3344, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 49.4, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33247339725494385, |
|
"eval_runtime": 501.4588, |
|
"eval_samples_per_second": 49.543, |
|
"eval_steps_per_second": 24.772, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 49.65, |
|
"learning_rate": 1.6835016835016831e-06, |
|
"loss": 0.3207, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 49.65, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33257797360420227, |
|
"eval_runtime": 501.4807, |
|
"eval_samples_per_second": 49.541, |
|
"eval_steps_per_second": 24.771, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 49.91, |
|
"learning_rate": 0.0, |
|
"loss": 0.3299, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 49.91, |
|
"eval_accuracy": 0.8974400257607471, |
|
"eval_loss": 0.33260539174079895, |
|
"eval_runtime": 501.1869, |
|
"eval_samples_per_second": 49.57, |
|
"eval_steps_per_second": 24.785, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 49.91, |
|
"step": 19800, |
|
"total_flos": 7.53101543607702e+19, |
|
"train_loss": 0.3200095210412536, |
|
"train_runtime": 116243.9445, |
|
"train_samples_per_second": 10.917, |
|
"train_steps_per_second": 0.17 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 19800, |
|
"num_train_epochs": 50, |
|
"save_steps": 100, |
|
"total_flos": 7.53101543607702e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|