wavlm-base_2 / trainer_state.json
cloudwalkerw's picture
End of training
f8fabf9
{
"best_metric": 0.9965786507808726,
"best_model_checkpoint": "/home/cloudwalker/ASVmodel/wavlm-base_2/checkpoint-300",
"epoch": 49.9054820415879,
"eval_steps": 100,
"global_step": 19800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.25,
"learning_rate": 1.515151515151515e-05,
"loss": 0.4872,
"step": 100
},
{
"epoch": 0.25,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.21796834468841553,
"eval_runtime": 498.2273,
"eval_samples_per_second": 49.865,
"eval_steps_per_second": 24.932,
"step": 100
},
{
"epoch": 0.5,
"learning_rate": 3.03030303030303e-05,
"loss": 0.1571,
"step": 200
},
{
"epoch": 0.5,
"eval_accuracy": 0.93342456931251,
"eval_loss": 0.2581726014614105,
"eval_runtime": 500.3876,
"eval_samples_per_second": 49.65,
"eval_steps_per_second": 24.825,
"step": 200
},
{
"epoch": 0.76,
"learning_rate": 4.545454545454545e-05,
"loss": 0.0644,
"step": 300
},
{
"epoch": 0.76,
"eval_accuracy": 0.9965786507808726,
"eval_loss": 0.024423159658908844,
"eval_runtime": 500.4785,
"eval_samples_per_second": 49.64,
"eval_steps_per_second": 24.82,
"step": 300
},
{
"epoch": 1.01,
"learning_rate": 6.06060606060606e-05,
"loss": 0.0553,
"step": 400
},
{
"epoch": 1.01,
"eval_accuracy": 0.9927950410561907,
"eval_loss": 0.11555636674165726,
"eval_runtime": 500.405,
"eval_samples_per_second": 49.648,
"eval_steps_per_second": 24.824,
"step": 400
},
{
"epoch": 1.26,
"learning_rate": 7.575757575757576e-05,
"loss": 0.1108,
"step": 500
},
{
"epoch": 1.26,
"eval_accuracy": 0.9898164546771856,
"eval_loss": 0.1576482504606247,
"eval_runtime": 500.3246,
"eval_samples_per_second": 49.656,
"eval_steps_per_second": 24.828,
"step": 500
},
{
"epoch": 1.51,
"learning_rate": 9.09090909090909e-05,
"loss": 0.0849,
"step": 600
},
{
"epoch": 1.51,
"eval_accuracy": 0.9946868459185316,
"eval_loss": 0.08708283305168152,
"eval_runtime": 500.3759,
"eval_samples_per_second": 49.651,
"eval_steps_per_second": 24.825,
"step": 600
},
{
"epoch": 1.76,
"learning_rate": 0.00010606060606060605,
"loss": 0.0635,
"step": 700
},
{
"epoch": 1.76,
"eval_accuracy": 0.9938818225728546,
"eval_loss": 0.10875184088945389,
"eval_runtime": 500.3093,
"eval_samples_per_second": 49.657,
"eval_steps_per_second": 24.829,
"step": 700
},
{
"epoch": 2.02,
"learning_rate": 0.0001212121212121212,
"loss": 0.0504,
"step": 800
},
{
"epoch": 2.02,
"eval_accuracy": 0.9789888906778297,
"eval_loss": 0.4074054956436157,
"eval_runtime": 500.4204,
"eval_samples_per_second": 49.646,
"eval_steps_per_second": 24.823,
"step": 800
},
{
"epoch": 2.27,
"learning_rate": 0.00013636363636363634,
"loss": 0.1075,
"step": 900
},
{
"epoch": 2.27,
"eval_accuracy": 0.9814442118821446,
"eval_loss": 0.2954882085323334,
"eval_runtime": 500.4501,
"eval_samples_per_second": 49.643,
"eval_steps_per_second": 24.822,
"step": 900
},
{
"epoch": 2.52,
"learning_rate": 0.00015151515151515152,
"loss": 0.2387,
"step": 1000
},
{
"epoch": 2.52,
"eval_accuracy": 0.9956126227660602,
"eval_loss": 0.06512398272752762,
"eval_runtime": 500.7096,
"eval_samples_per_second": 49.618,
"eval_steps_per_second": 24.809,
"step": 1000
},
{
"epoch": 2.77,
"learning_rate": 0.00016666666666666666,
"loss": 0.3052,
"step": 1100
},
{
"epoch": 2.77,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.23793257772922516,
"eval_runtime": 500.5364,
"eval_samples_per_second": 49.635,
"eval_steps_per_second": 24.817,
"step": 1100
},
{
"epoch": 3.02,
"learning_rate": 0.0001818181818181818,
"loss": 0.3336,
"step": 1200
},
{
"epoch": 3.02,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.35269346833229065,
"eval_runtime": 501.0064,
"eval_samples_per_second": 49.588,
"eval_steps_per_second": 24.794,
"step": 1200
},
{
"epoch": 3.28,
"learning_rate": 0.00019696969696969695,
"loss": 0.3322,
"step": 1300
},
{
"epoch": 3.28,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33067458868026733,
"eval_runtime": 502.8336,
"eval_samples_per_second": 49.408,
"eval_steps_per_second": 24.704,
"step": 1300
},
{
"epoch": 3.53,
"learning_rate": 0.0002121212121212121,
"loss": 0.3201,
"step": 1400
},
{
"epoch": 3.53,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.34046611189842224,
"eval_runtime": 500.7806,
"eval_samples_per_second": 49.611,
"eval_steps_per_second": 24.805,
"step": 1400
},
{
"epoch": 3.78,
"learning_rate": 0.00022727272727272725,
"loss": 0.3406,
"step": 1500
},
{
"epoch": 3.78,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33353403210639954,
"eval_runtime": 500.7499,
"eval_samples_per_second": 49.614,
"eval_steps_per_second": 24.807,
"step": 1500
},
{
"epoch": 4.03,
"learning_rate": 0.0002424242424242424,
"loss": 0.3475,
"step": 1600
},
{
"epoch": 4.03,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3340916931629181,
"eval_runtime": 500.598,
"eval_samples_per_second": 49.629,
"eval_steps_per_second": 24.814,
"step": 1600
},
{
"epoch": 4.28,
"learning_rate": 0.00025757575757575756,
"loss": 0.3312,
"step": 1700
},
{
"epoch": 4.28,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33610448241233826,
"eval_runtime": 500.8798,
"eval_samples_per_second": 49.601,
"eval_steps_per_second": 24.8,
"step": 1700
},
{
"epoch": 4.54,
"learning_rate": 0.0002727272727272727,
"loss": 0.3367,
"step": 1800
},
{
"epoch": 4.54,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3309839069843292,
"eval_runtime": 500.5768,
"eval_samples_per_second": 49.631,
"eval_steps_per_second": 24.815,
"step": 1800
},
{
"epoch": 4.79,
"learning_rate": 0.00028787878787878786,
"loss": 0.3284,
"step": 1900
},
{
"epoch": 4.79,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33385568857192993,
"eval_runtime": 500.453,
"eval_samples_per_second": 49.643,
"eval_steps_per_second": 24.822,
"step": 1900
},
{
"epoch": 5.04,
"learning_rate": 0.00029966329966329963,
"loss": 0.3267,
"step": 2000
},
{
"epoch": 5.04,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3350389301776886,
"eval_runtime": 500.7512,
"eval_samples_per_second": 49.613,
"eval_steps_per_second": 24.807,
"step": 2000
},
{
"epoch": 5.29,
"learning_rate": 0.00029797979797979794,
"loss": 0.338,
"step": 2100
},
{
"epoch": 5.29,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33081698417663574,
"eval_runtime": 500.5738,
"eval_samples_per_second": 49.631,
"eval_steps_per_second": 24.816,
"step": 2100
},
{
"epoch": 5.55,
"learning_rate": 0.00029629629629629624,
"loss": 0.3277,
"step": 2200
},
{
"epoch": 5.55,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.330895334482193,
"eval_runtime": 500.5595,
"eval_samples_per_second": 49.632,
"eval_steps_per_second": 24.816,
"step": 2200
},
{
"epoch": 5.8,
"learning_rate": 0.0002946127946127946,
"loss": 0.3294,
"step": 2300
},
{
"epoch": 5.8,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3312866687774658,
"eval_runtime": 500.6862,
"eval_samples_per_second": 49.62,
"eval_steps_per_second": 24.81,
"step": 2300
},
{
"epoch": 6.05,
"learning_rate": 0.0002929292929292929,
"loss": 0.3315,
"step": 2400
},
{
"epoch": 6.05,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33599570393562317,
"eval_runtime": 500.6176,
"eval_samples_per_second": 49.627,
"eval_steps_per_second": 24.813,
"step": 2400
},
{
"epoch": 6.3,
"learning_rate": 0.00029124579124579125,
"loss": 0.3397,
"step": 2500
},
{
"epoch": 6.3,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33067843317985535,
"eval_runtime": 500.7157,
"eval_samples_per_second": 49.617,
"eval_steps_per_second": 24.808,
"step": 2500
},
{
"epoch": 6.55,
"learning_rate": 0.00028956228956228955,
"loss": 0.3318,
"step": 2600
},
{
"epoch": 6.55,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3359489440917969,
"eval_runtime": 500.805,
"eval_samples_per_second": 49.608,
"eval_steps_per_second": 24.804,
"step": 2600
},
{
"epoch": 6.81,
"learning_rate": 0.00028787878787878786,
"loss": 0.3312,
"step": 2700
},
{
"epoch": 6.81,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3307555615901947,
"eval_runtime": 500.6263,
"eval_samples_per_second": 49.626,
"eval_steps_per_second": 24.813,
"step": 2700
},
{
"epoch": 7.06,
"learning_rate": 0.00028619528619528616,
"loss": 0.3155,
"step": 2800
},
{
"epoch": 7.06,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33172452449798584,
"eval_runtime": 500.7242,
"eval_samples_per_second": 49.616,
"eval_steps_per_second": 24.808,
"step": 2800
},
{
"epoch": 7.31,
"learning_rate": 0.0002845117845117845,
"loss": 0.3304,
"step": 2900
},
{
"epoch": 7.31,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33618223667144775,
"eval_runtime": 500.5229,
"eval_samples_per_second": 49.636,
"eval_steps_per_second": 24.818,
"step": 2900
},
{
"epoch": 7.56,
"learning_rate": 0.0002828282828282828,
"loss": 0.338,
"step": 3000
},
{
"epoch": 7.56,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3341616988182068,
"eval_runtime": 500.7918,
"eval_samples_per_second": 49.609,
"eval_steps_per_second": 24.805,
"step": 3000
},
{
"epoch": 7.81,
"learning_rate": 0.0002811447811447811,
"loss": 0.3241,
"step": 3100
},
{
"epoch": 7.81,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.330985426902771,
"eval_runtime": 500.924,
"eval_samples_per_second": 49.596,
"eval_steps_per_second": 24.798,
"step": 3100
},
{
"epoch": 8.07,
"learning_rate": 0.0002794612794612794,
"loss": 0.3325,
"step": 3200
},
{
"epoch": 8.07,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33258649706840515,
"eval_runtime": 500.7775,
"eval_samples_per_second": 49.611,
"eval_steps_per_second": 24.805,
"step": 3200
},
{
"epoch": 8.32,
"learning_rate": 0.0002777777777777778,
"loss": 0.3202,
"step": 3300
},
{
"epoch": 8.32,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3345409035682678,
"eval_runtime": 501.0057,
"eval_samples_per_second": 49.588,
"eval_steps_per_second": 24.794,
"step": 3300
},
{
"epoch": 8.57,
"learning_rate": 0.0002760942760942761,
"loss": 0.3315,
"step": 3400
},
{
"epoch": 8.57,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3335334360599518,
"eval_runtime": 500.8188,
"eval_samples_per_second": 49.607,
"eval_steps_per_second": 24.803,
"step": 3400
},
{
"epoch": 8.82,
"learning_rate": 0.0002744107744107744,
"loss": 0.3288,
"step": 3500
},
{
"epoch": 8.82,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33116644620895386,
"eval_runtime": 501.0568,
"eval_samples_per_second": 49.583,
"eval_steps_per_second": 24.792,
"step": 3500
},
{
"epoch": 9.07,
"learning_rate": 0.0002727272727272727,
"loss": 0.3371,
"step": 3600
},
{
"epoch": 9.07,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.34014323353767395,
"eval_runtime": 500.7918,
"eval_samples_per_second": 49.609,
"eval_steps_per_second": 24.805,
"step": 3600
},
{
"epoch": 9.33,
"learning_rate": 0.00027104377104377104,
"loss": 0.3409,
"step": 3700
},
{
"epoch": 9.33,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33300086855888367,
"eval_runtime": 500.8108,
"eval_samples_per_second": 49.608,
"eval_steps_per_second": 24.804,
"step": 3700
},
{
"epoch": 9.58,
"learning_rate": 0.00026936026936026934,
"loss": 0.3236,
"step": 3800
},
{
"epoch": 9.58,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3329709768295288,
"eval_runtime": 500.7254,
"eval_samples_per_second": 49.616,
"eval_steps_per_second": 24.808,
"step": 3800
},
{
"epoch": 9.83,
"learning_rate": 0.00026767676767676764,
"loss": 0.3224,
"step": 3900
},
{
"epoch": 9.83,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3320678770542145,
"eval_runtime": 500.9946,
"eval_samples_per_second": 49.589,
"eval_steps_per_second": 24.795,
"step": 3900
},
{
"epoch": 10.08,
"learning_rate": 0.00026599326599326595,
"loss": 0.3439,
"step": 4000
},
{
"epoch": 10.08,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33262181282043457,
"eval_runtime": 501.0278,
"eval_samples_per_second": 49.586,
"eval_steps_per_second": 24.793,
"step": 4000
},
{
"epoch": 10.33,
"learning_rate": 0.0002643097643097643,
"loss": 0.3382,
"step": 4100
},
{
"epoch": 10.33,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3310418426990509,
"eval_runtime": 501.0043,
"eval_samples_per_second": 49.588,
"eval_steps_per_second": 24.794,
"step": 4100
},
{
"epoch": 10.59,
"learning_rate": 0.0002626262626262626,
"loss": 0.3307,
"step": 4200
},
{
"epoch": 10.59,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33820316195487976,
"eval_runtime": 501.0192,
"eval_samples_per_second": 49.587,
"eval_steps_per_second": 24.793,
"step": 4200
},
{
"epoch": 10.84,
"learning_rate": 0.0002609427609427609,
"loss": 0.3231,
"step": 4300
},
{
"epoch": 10.84,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3324846625328064,
"eval_runtime": 500.6054,
"eval_samples_per_second": 49.628,
"eval_steps_per_second": 24.814,
"step": 4300
},
{
"epoch": 11.09,
"learning_rate": 0.0002592592592592592,
"loss": 0.3095,
"step": 4400
},
{
"epoch": 11.09,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3348234295845032,
"eval_runtime": 500.6559,
"eval_samples_per_second": 49.623,
"eval_steps_per_second": 24.811,
"step": 4400
},
{
"epoch": 11.34,
"learning_rate": 0.00025757575757575756,
"loss": 0.3442,
"step": 4500
},
{
"epoch": 11.34,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33274412155151367,
"eval_runtime": 501.0421,
"eval_samples_per_second": 49.585,
"eval_steps_per_second": 24.792,
"step": 4500
},
{
"epoch": 11.59,
"learning_rate": 0.00025589225589225587,
"loss": 0.3269,
"step": 4600
},
{
"epoch": 11.59,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33261528611183167,
"eval_runtime": 500.8659,
"eval_samples_per_second": 49.602,
"eval_steps_per_second": 24.801,
"step": 4600
},
{
"epoch": 11.85,
"learning_rate": 0.00025420875420875417,
"loss": 0.3323,
"step": 4700
},
{
"epoch": 11.85,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3308444619178772,
"eval_runtime": 501.0609,
"eval_samples_per_second": 49.583,
"eval_steps_per_second": 24.791,
"step": 4700
},
{
"epoch": 12.1,
"learning_rate": 0.0002525252525252525,
"loss": 0.3313,
"step": 4800
},
{
"epoch": 12.1,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3308044970035553,
"eval_runtime": 500.8061,
"eval_samples_per_second": 49.608,
"eval_steps_per_second": 24.804,
"step": 4800
},
{
"epoch": 12.35,
"learning_rate": 0.0002508417508417508,
"loss": 0.3283,
"step": 4900
},
{
"epoch": 12.35,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3314284384250641,
"eval_runtime": 500.7554,
"eval_samples_per_second": 49.613,
"eval_steps_per_second": 24.807,
"step": 4900
},
{
"epoch": 12.6,
"learning_rate": 0.00024915824915824913,
"loss": 0.3331,
"step": 5000
},
{
"epoch": 12.6,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3306741416454315,
"eval_runtime": 500.9128,
"eval_samples_per_second": 49.597,
"eval_steps_per_second": 24.799,
"step": 5000
},
{
"epoch": 12.85,
"learning_rate": 0.0002474747474747475,
"loss": 0.3317,
"step": 5100
},
{
"epoch": 12.85,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3343793749809265,
"eval_runtime": 500.7387,
"eval_samples_per_second": 49.615,
"eval_steps_per_second": 24.807,
"step": 5100
},
{
"epoch": 13.11,
"learning_rate": 0.0002457912457912458,
"loss": 0.3283,
"step": 5200
},
{
"epoch": 13.11,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33199575543403625,
"eval_runtime": 500.8527,
"eval_samples_per_second": 49.603,
"eval_steps_per_second": 24.802,
"step": 5200
},
{
"epoch": 13.36,
"learning_rate": 0.00024410774410774406,
"loss": 0.3263,
"step": 5300
},
{
"epoch": 13.36,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33114317059516907,
"eval_runtime": 500.8605,
"eval_samples_per_second": 49.603,
"eval_steps_per_second": 24.801,
"step": 5300
},
{
"epoch": 13.61,
"learning_rate": 0.0002424242424242424,
"loss": 0.3421,
"step": 5400
},
{
"epoch": 13.61,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3306863009929657,
"eval_runtime": 500.9253,
"eval_samples_per_second": 49.596,
"eval_steps_per_second": 24.798,
"step": 5400
},
{
"epoch": 13.86,
"learning_rate": 0.00024074074074074072,
"loss": 0.3164,
"step": 5500
},
{
"epoch": 13.86,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3318192958831787,
"eval_runtime": 500.4762,
"eval_samples_per_second": 49.641,
"eval_steps_per_second": 24.82,
"step": 5500
},
{
"epoch": 14.11,
"learning_rate": 0.00023905723905723905,
"loss": 0.3315,
"step": 5600
},
{
"epoch": 14.11,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3335217535495758,
"eval_runtime": 500.848,
"eval_samples_per_second": 49.604,
"eval_steps_per_second": 24.802,
"step": 5600
},
{
"epoch": 14.37,
"learning_rate": 0.00023737373737373732,
"loss": 0.3415,
"step": 5700
},
{
"epoch": 14.37,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3314845860004425,
"eval_runtime": 500.8633,
"eval_samples_per_second": 49.602,
"eval_steps_per_second": 24.801,
"step": 5700
},
{
"epoch": 14.62,
"learning_rate": 0.00023569023569023565,
"loss": 0.3325,
"step": 5800
},
{
"epoch": 14.62,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33067190647125244,
"eval_runtime": 500.8182,
"eval_samples_per_second": 49.607,
"eval_steps_per_second": 24.803,
"step": 5800
},
{
"epoch": 14.87,
"learning_rate": 0.00023400673400673398,
"loss": 0.3264,
"step": 5900
},
{
"epoch": 14.87,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33295896649360657,
"eval_runtime": 500.9198,
"eval_samples_per_second": 49.597,
"eval_steps_per_second": 24.798,
"step": 5900
},
{
"epoch": 15.12,
"learning_rate": 0.0002323232323232323,
"loss": 0.3223,
"step": 6000
},
{
"epoch": 15.12,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3306836187839508,
"eval_runtime": 500.9444,
"eval_samples_per_second": 49.594,
"eval_steps_per_second": 24.797,
"step": 6000
},
{
"epoch": 15.37,
"learning_rate": 0.00023063973063973064,
"loss": 0.3289,
"step": 6100
},
{
"epoch": 15.37,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3328978717327118,
"eval_runtime": 500.8768,
"eval_samples_per_second": 49.601,
"eval_steps_per_second": 24.801,
"step": 6100
},
{
"epoch": 15.63,
"learning_rate": 0.00022895622895622892,
"loss": 0.3353,
"step": 6200
},
{
"epoch": 15.63,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33112406730651855,
"eval_runtime": 500.9115,
"eval_samples_per_second": 49.598,
"eval_steps_per_second": 24.799,
"step": 6200
},
{
"epoch": 15.88,
"learning_rate": 0.00022727272727272725,
"loss": 0.3246,
"step": 6300
},
{
"epoch": 15.88,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3310993015766144,
"eval_runtime": 501.1149,
"eval_samples_per_second": 49.577,
"eval_steps_per_second": 24.789,
"step": 6300
},
{
"epoch": 16.13,
"learning_rate": 0.00022558922558922557,
"loss": 0.3425,
"step": 6400
},
{
"epoch": 16.13,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.330674409866333,
"eval_runtime": 500.85,
"eval_samples_per_second": 49.604,
"eval_steps_per_second": 24.802,
"step": 6400
},
{
"epoch": 16.38,
"learning_rate": 0.0002239057239057239,
"loss": 0.331,
"step": 6500
},
{
"epoch": 16.38,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3306954503059387,
"eval_runtime": 501.1853,
"eval_samples_per_second": 49.57,
"eval_steps_per_second": 24.785,
"step": 6500
},
{
"epoch": 16.64,
"learning_rate": 0.00022222222222222218,
"loss": 0.3293,
"step": 6600
},
{
"epoch": 16.64,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33531516790390015,
"eval_runtime": 500.9658,
"eval_samples_per_second": 49.592,
"eval_steps_per_second": 24.796,
"step": 6600
},
{
"epoch": 16.89,
"learning_rate": 0.0002205387205387205,
"loss": 0.3249,
"step": 6700
},
{
"epoch": 16.89,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3339368402957916,
"eval_runtime": 500.8138,
"eval_samples_per_second": 49.607,
"eval_steps_per_second": 24.804,
"step": 6700
},
{
"epoch": 17.14,
"learning_rate": 0.00021885521885521884,
"loss": 0.3214,
"step": 6800
},
{
"epoch": 17.14,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3337734639644623,
"eval_runtime": 500.9586,
"eval_samples_per_second": 49.593,
"eval_steps_per_second": 24.796,
"step": 6800
},
{
"epoch": 17.39,
"learning_rate": 0.00021717171717171717,
"loss": 0.3259,
"step": 6900
},
{
"epoch": 17.39,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3327140212059021,
"eval_runtime": 501.0122,
"eval_samples_per_second": 49.588,
"eval_steps_per_second": 24.794,
"step": 6900
},
{
"epoch": 17.64,
"learning_rate": 0.00021548821548821544,
"loss": 0.3408,
"step": 7000
},
{
"epoch": 17.64,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33182209730148315,
"eval_runtime": 500.9279,
"eval_samples_per_second": 49.596,
"eval_steps_per_second": 24.798,
"step": 7000
},
{
"epoch": 17.9,
"learning_rate": 0.00021380471380471377,
"loss": 0.3258,
"step": 7100
},
{
"epoch": 17.9,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33183717727661133,
"eval_runtime": 501.2678,
"eval_samples_per_second": 49.562,
"eval_steps_per_second": 24.781,
"step": 7100
},
{
"epoch": 18.15,
"learning_rate": 0.0002121212121212121,
"loss": 0.3299,
"step": 7200
},
{
"epoch": 18.15,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33080849051475525,
"eval_runtime": 501.2555,
"eval_samples_per_second": 49.564,
"eval_steps_per_second": 24.782,
"step": 7200
},
{
"epoch": 18.4,
"learning_rate": 0.00021043771043771043,
"loss": 0.327,
"step": 7300
},
{
"epoch": 18.4,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3371123671531677,
"eval_runtime": 500.9892,
"eval_samples_per_second": 49.59,
"eval_steps_per_second": 24.795,
"step": 7300
},
{
"epoch": 18.65,
"learning_rate": 0.00020875420875420876,
"loss": 0.3317,
"step": 7400
},
{
"epoch": 18.65,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3307543694972992,
"eval_runtime": 500.9046,
"eval_samples_per_second": 49.598,
"eval_steps_per_second": 24.799,
"step": 7400
},
{
"epoch": 18.9,
"learning_rate": 0.00020707070707070703,
"loss": 0.3291,
"step": 7500
},
{
"epoch": 18.9,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33100253343582153,
"eval_runtime": 500.936,
"eval_samples_per_second": 49.595,
"eval_steps_per_second": 24.798,
"step": 7500
},
{
"epoch": 19.16,
"learning_rate": 0.00020538720538720536,
"loss": 0.3263,
"step": 7600
},
{
"epoch": 19.16,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33248230814933777,
"eval_runtime": 500.8588,
"eval_samples_per_second": 49.603,
"eval_steps_per_second": 24.801,
"step": 7600
},
{
"epoch": 19.41,
"learning_rate": 0.0002037037037037037,
"loss": 0.3223,
"step": 7700
},
{
"epoch": 19.41,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33463332056999207,
"eval_runtime": 500.8344,
"eval_samples_per_second": 49.605,
"eval_steps_per_second": 24.803,
"step": 7700
},
{
"epoch": 19.66,
"learning_rate": 0.00020202020202020202,
"loss": 0.3403,
"step": 7800
},
{
"epoch": 19.66,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3315812647342682,
"eval_runtime": 500.8929,
"eval_samples_per_second": 49.599,
"eval_steps_per_second": 24.8,
"step": 7800
},
{
"epoch": 19.91,
"learning_rate": 0.0002003367003367003,
"loss": 0.3265,
"step": 7900
},
{
"epoch": 19.91,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3309071660041809,
"eval_runtime": 500.8065,
"eval_samples_per_second": 49.608,
"eval_steps_per_second": 24.804,
"step": 7900
},
{
"epoch": 20.16,
"learning_rate": 0.00019865319865319862,
"loss": 0.33,
"step": 8000
},
{
"epoch": 20.16,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3318468928337097,
"eval_runtime": 501.0869,
"eval_samples_per_second": 49.58,
"eval_steps_per_second": 24.79,
"step": 8000
},
{
"epoch": 20.42,
"learning_rate": 0.00019696969696969695,
"loss": 0.3488,
"step": 8100
},
{
"epoch": 20.42,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33127933740615845,
"eval_runtime": 500.8956,
"eval_samples_per_second": 49.599,
"eval_steps_per_second": 24.8,
"step": 8100
},
{
"epoch": 20.67,
"learning_rate": 0.00019528619528619528,
"loss": 0.3293,
"step": 8200
},
{
"epoch": 20.67,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33354687690734863,
"eval_runtime": 501.2059,
"eval_samples_per_second": 49.568,
"eval_steps_per_second": 24.784,
"step": 8200
},
{
"epoch": 20.92,
"learning_rate": 0.00019360269360269356,
"loss": 0.3095,
"step": 8300
},
{
"epoch": 20.92,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33559244871139526,
"eval_runtime": 501.1442,
"eval_samples_per_second": 49.575,
"eval_steps_per_second": 24.787,
"step": 8300
},
{
"epoch": 21.17,
"learning_rate": 0.0001919191919191919,
"loss": 0.3366,
"step": 8400
},
{
"epoch": 21.17,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3331533968448639,
"eval_runtime": 501.0445,
"eval_samples_per_second": 49.584,
"eval_steps_per_second": 24.792,
"step": 8400
},
{
"epoch": 21.42,
"learning_rate": 0.00019023569023569022,
"loss": 0.317,
"step": 8500
},
{
"epoch": 21.42,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3337852358818054,
"eval_runtime": 501.2024,
"eval_samples_per_second": 49.569,
"eval_steps_per_second": 24.784,
"step": 8500
},
{
"epoch": 21.68,
"learning_rate": 0.00018855218855218854,
"loss": 0.3299,
"step": 8600
},
{
"epoch": 21.68,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3308347165584564,
"eval_runtime": 501.0,
"eval_samples_per_second": 49.589,
"eval_steps_per_second": 24.794,
"step": 8600
},
{
"epoch": 21.93,
"learning_rate": 0.00018686868686868687,
"loss": 0.3434,
"step": 8700
},
{
"epoch": 21.93,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.330965131521225,
"eval_runtime": 501.1751,
"eval_samples_per_second": 49.571,
"eval_steps_per_second": 24.786,
"step": 8700
},
{
"epoch": 22.18,
"learning_rate": 0.00018518518518518515,
"loss": 0.3208,
"step": 8800
},
{
"epoch": 22.18,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3308819830417633,
"eval_runtime": 501.189,
"eval_samples_per_second": 49.57,
"eval_steps_per_second": 24.785,
"step": 8800
},
{
"epoch": 22.43,
"learning_rate": 0.00018350168350168348,
"loss": 0.3351,
"step": 8900
},
{
"epoch": 22.43,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33238929510116577,
"eval_runtime": 501.2428,
"eval_samples_per_second": 49.565,
"eval_steps_per_second": 24.782,
"step": 8900
},
{
"epoch": 22.68,
"learning_rate": 0.0001818181818181818,
"loss": 0.3301,
"step": 9000
},
{
"epoch": 22.68,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3308260440826416,
"eval_runtime": 500.8427,
"eval_samples_per_second": 49.604,
"eval_steps_per_second": 24.802,
"step": 9000
},
{
"epoch": 22.94,
"learning_rate": 0.00018013468013468014,
"loss": 0.3196,
"step": 9100
},
{
"epoch": 22.94,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3329552710056305,
"eval_runtime": 501.1009,
"eval_samples_per_second": 49.579,
"eval_steps_per_second": 24.789,
"step": 9100
},
{
"epoch": 23.19,
"learning_rate": 0.0001784511784511784,
"loss": 0.3339,
"step": 9200
},
{
"epoch": 23.19,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33333924412727356,
"eval_runtime": 500.8988,
"eval_samples_per_second": 49.599,
"eval_steps_per_second": 24.799,
"step": 9200
},
{
"epoch": 23.44,
"learning_rate": 0.00017676767676767674,
"loss": 0.3249,
"step": 9300
},
{
"epoch": 23.44,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3307563364505768,
"eval_runtime": 501.0394,
"eval_samples_per_second": 49.585,
"eval_steps_per_second": 24.792,
"step": 9300
},
{
"epoch": 23.69,
"learning_rate": 0.00017508417508417507,
"loss": 0.3247,
"step": 9400
},
{
"epoch": 23.69,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3338105082511902,
"eval_runtime": 500.7662,
"eval_samples_per_second": 49.612,
"eval_steps_per_second": 24.806,
"step": 9400
},
{
"epoch": 23.94,
"learning_rate": 0.0001734006734006734,
"loss": 0.3369,
"step": 9500
},
{
"epoch": 23.94,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3312574028968811,
"eval_runtime": 501.3028,
"eval_samples_per_second": 49.559,
"eval_steps_per_second": 24.779,
"step": 9500
},
{
"epoch": 24.2,
"learning_rate": 0.00017171717171717167,
"loss": 0.3291,
"step": 9600
},
{
"epoch": 24.2,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3320288062095642,
"eval_runtime": 501.3419,
"eval_samples_per_second": 49.555,
"eval_steps_per_second": 24.777,
"step": 9600
},
{
"epoch": 24.45,
"learning_rate": 0.00017003367003367,
"loss": 0.3307,
"step": 9700
},
{
"epoch": 24.45,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33087393641471863,
"eval_runtime": 501.1809,
"eval_samples_per_second": 49.571,
"eval_steps_per_second": 24.785,
"step": 9700
},
{
"epoch": 24.7,
"learning_rate": 0.00016835016835016833,
"loss": 0.3328,
"step": 9800
},
{
"epoch": 24.7,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33073562383651733,
"eval_runtime": 500.8708,
"eval_samples_per_second": 49.602,
"eval_steps_per_second": 24.801,
"step": 9800
},
{
"epoch": 24.95,
"learning_rate": 0.00016666666666666666,
"loss": 0.3277,
"step": 9900
},
{
"epoch": 24.95,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3341946601867676,
"eval_runtime": 501.1287,
"eval_samples_per_second": 49.576,
"eval_steps_per_second": 24.788,
"step": 9900
},
{
"epoch": 25.2,
"learning_rate": 0.000164983164983165,
"loss": 0.3278,
"step": 10000
},
{
"epoch": 25.2,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3309917449951172,
"eval_runtime": 500.9142,
"eval_samples_per_second": 49.597,
"eval_steps_per_second": 24.799,
"step": 10000
},
{
"epoch": 25.46,
"learning_rate": 0.00016329966329966327,
"loss": 0.3197,
"step": 10100
},
{
"epoch": 25.46,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3348841965198517,
"eval_runtime": 501.1589,
"eval_samples_per_second": 49.573,
"eval_steps_per_second": 24.787,
"step": 10100
},
{
"epoch": 25.71,
"learning_rate": 0.0001616161616161616,
"loss": 0.3273,
"step": 10200
},
{
"epoch": 25.71,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3321140706539154,
"eval_runtime": 501.4344,
"eval_samples_per_second": 49.546,
"eval_steps_per_second": 24.773,
"step": 10200
},
{
"epoch": 25.96,
"learning_rate": 0.00015993265993265992,
"loss": 0.3345,
"step": 10300
},
{
"epoch": 25.96,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3312225043773651,
"eval_runtime": 500.9045,
"eval_samples_per_second": 49.598,
"eval_steps_per_second": 24.799,
"step": 10300
},
{
"epoch": 26.21,
"learning_rate": 0.00015824915824915825,
"loss": 0.3351,
"step": 10400
},
{
"epoch": 26.21,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33248284459114075,
"eval_runtime": 501.2052,
"eval_samples_per_second": 49.569,
"eval_steps_per_second": 24.784,
"step": 10400
},
{
"epoch": 26.47,
"learning_rate": 0.00015656565656565653,
"loss": 0.3144,
"step": 10500
},
{
"epoch": 26.47,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.334583580493927,
"eval_runtime": 501.103,
"eval_samples_per_second": 49.579,
"eval_steps_per_second": 24.789,
"step": 10500
},
{
"epoch": 26.72,
"learning_rate": 0.00015488215488215486,
"loss": 0.3361,
"step": 10600
},
{
"epoch": 26.72,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33112606406211853,
"eval_runtime": 500.6859,
"eval_samples_per_second": 49.62,
"eval_steps_per_second": 24.81,
"step": 10600
},
{
"epoch": 26.97,
"learning_rate": 0.00015319865319865319,
"loss": 0.3334,
"step": 10700
},
{
"epoch": 26.97,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3306848406791687,
"eval_runtime": 500.81,
"eval_samples_per_second": 49.608,
"eval_steps_per_second": 24.804,
"step": 10700
},
{
"epoch": 27.22,
"learning_rate": 0.00015151515151515152,
"loss": 0.3287,
"step": 10800
},
{
"epoch": 27.22,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3373050093650818,
"eval_runtime": 501.0133,
"eval_samples_per_second": 49.588,
"eval_steps_per_second": 24.794,
"step": 10800
},
{
"epoch": 27.47,
"learning_rate": 0.00014983164983164982,
"loss": 0.3374,
"step": 10900
},
{
"epoch": 27.47,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3306737244129181,
"eval_runtime": 501.4272,
"eval_samples_per_second": 49.547,
"eval_steps_per_second": 24.773,
"step": 10900
},
{
"epoch": 27.73,
"learning_rate": 0.00014814814814814812,
"loss": 0.3302,
"step": 11000
},
{
"epoch": 27.73,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3306819200515747,
"eval_runtime": 501.0901,
"eval_samples_per_second": 49.58,
"eval_steps_per_second": 24.79,
"step": 11000
},
{
"epoch": 27.98,
"learning_rate": 0.00014646464646464645,
"loss": 0.3245,
"step": 11100
},
{
"epoch": 27.98,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33153215050697327,
"eval_runtime": 501.2107,
"eval_samples_per_second": 49.568,
"eval_steps_per_second": 24.784,
"step": 11100
},
{
"epoch": 28.23,
"learning_rate": 0.00014478114478114478,
"loss": 0.3353,
"step": 11200
},
{
"epoch": 28.23,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33351030945777893,
"eval_runtime": 501.1769,
"eval_samples_per_second": 49.571,
"eval_steps_per_second": 24.786,
"step": 11200
},
{
"epoch": 28.48,
"learning_rate": 0.00014309764309764308,
"loss": 0.3191,
"step": 11300
},
{
"epoch": 28.48,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33356890082359314,
"eval_runtime": 501.2908,
"eval_samples_per_second": 49.56,
"eval_steps_per_second": 24.78,
"step": 11300
},
{
"epoch": 28.73,
"learning_rate": 0.0001414141414141414,
"loss": 0.3226,
"step": 11400
},
{
"epoch": 28.73,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33081722259521484,
"eval_runtime": 501.1297,
"eval_samples_per_second": 49.576,
"eval_steps_per_second": 24.788,
"step": 11400
},
{
"epoch": 28.99,
"learning_rate": 0.0001397306397306397,
"loss": 0.3384,
"step": 11500
},
{
"epoch": 28.99,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3321709930896759,
"eval_runtime": 501.2824,
"eval_samples_per_second": 49.561,
"eval_steps_per_second": 24.78,
"step": 11500
},
{
"epoch": 29.24,
"learning_rate": 0.00013804713804713804,
"loss": 0.3368,
"step": 11600
},
{
"epoch": 29.24,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3336506485939026,
"eval_runtime": 500.9493,
"eval_samples_per_second": 49.594,
"eval_steps_per_second": 24.797,
"step": 11600
},
{
"epoch": 29.49,
"learning_rate": 0.00013636363636363634,
"loss": 0.3224,
"step": 11700
},
{
"epoch": 29.49,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3331962823867798,
"eval_runtime": 501.3861,
"eval_samples_per_second": 49.551,
"eval_steps_per_second": 24.775,
"step": 11700
},
{
"epoch": 29.74,
"learning_rate": 0.00013468013468013467,
"loss": 0.3224,
"step": 11800
},
{
"epoch": 29.74,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3318473696708679,
"eval_runtime": 501.3052,
"eval_samples_per_second": 49.559,
"eval_steps_per_second": 24.779,
"step": 11800
},
{
"epoch": 29.99,
"learning_rate": 0.00013299663299663297,
"loss": 0.3363,
"step": 11900
},
{
"epoch": 29.99,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3309670388698578,
"eval_runtime": 501.4331,
"eval_samples_per_second": 49.546,
"eval_steps_per_second": 24.773,
"step": 11900
},
{
"epoch": 30.25,
"learning_rate": 0.0001313131313131313,
"loss": 0.327,
"step": 12000
},
{
"epoch": 30.25,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3306976854801178,
"eval_runtime": 501.4726,
"eval_samples_per_second": 49.542,
"eval_steps_per_second": 24.771,
"step": 12000
},
{
"epoch": 30.5,
"learning_rate": 0.0001296296296296296,
"loss": 0.3291,
"step": 12100
},
{
"epoch": 30.5,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3306950628757477,
"eval_runtime": 501.0845,
"eval_samples_per_second": 49.58,
"eval_steps_per_second": 24.79,
"step": 12100
},
{
"epoch": 30.75,
"learning_rate": 0.00012794612794612793,
"loss": 0.3369,
"step": 12200
},
{
"epoch": 30.75,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3321588933467865,
"eval_runtime": 501.0681,
"eval_samples_per_second": 49.582,
"eval_steps_per_second": 24.791,
"step": 12200
},
{
"epoch": 31.0,
"learning_rate": 0.00012626262626262626,
"loss": 0.3211,
"step": 12300
},
{
"epoch": 31.0,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3328503370285034,
"eval_runtime": 501.3433,
"eval_samples_per_second": 49.555,
"eval_steps_per_second": 24.777,
"step": 12300
},
{
"epoch": 31.25,
"learning_rate": 0.00012457912457912456,
"loss": 0.329,
"step": 12400
},
{
"epoch": 31.25,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33213621377944946,
"eval_runtime": 501.089,
"eval_samples_per_second": 49.58,
"eval_steps_per_second": 24.79,
"step": 12400
},
{
"epoch": 31.51,
"learning_rate": 0.0001228956228956229,
"loss": 0.3206,
"step": 12500
},
{
"epoch": 31.51,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33089062571525574,
"eval_runtime": 501.0382,
"eval_samples_per_second": 49.585,
"eval_steps_per_second": 24.793,
"step": 12500
},
{
"epoch": 31.76,
"learning_rate": 0.0001212121212121212,
"loss": 0.3339,
"step": 12600
},
{
"epoch": 31.76,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3331972658634186,
"eval_runtime": 501.242,
"eval_samples_per_second": 49.565,
"eval_steps_per_second": 24.782,
"step": 12600
},
{
"epoch": 32.01,
"learning_rate": 0.00011952861952861952,
"loss": 0.3323,
"step": 12700
},
{
"epoch": 32.01,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3316064476966858,
"eval_runtime": 501.0626,
"eval_samples_per_second": 49.583,
"eval_steps_per_second": 24.791,
"step": 12700
},
{
"epoch": 32.26,
"learning_rate": 0.00011784511784511783,
"loss": 0.3273,
"step": 12800
},
{
"epoch": 32.26,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3323478400707245,
"eval_runtime": 501.4098,
"eval_samples_per_second": 49.548,
"eval_steps_per_second": 24.774,
"step": 12800
},
{
"epoch": 32.51,
"learning_rate": 0.00011616161616161616,
"loss": 0.3362,
"step": 12900
},
{
"epoch": 32.51,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33072328567504883,
"eval_runtime": 501.8013,
"eval_samples_per_second": 49.51,
"eval_steps_per_second": 24.755,
"step": 12900
},
{
"epoch": 32.77,
"learning_rate": 0.00011447811447811446,
"loss": 0.3387,
"step": 13000
},
{
"epoch": 32.77,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3308790922164917,
"eval_runtime": 501.2768,
"eval_samples_per_second": 49.561,
"eval_steps_per_second": 24.781,
"step": 13000
},
{
"epoch": 33.02,
"learning_rate": 0.00011279461279461279,
"loss": 0.3173,
"step": 13100
},
{
"epoch": 33.02,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33112308382987976,
"eval_runtime": 501.2827,
"eval_samples_per_second": 49.561,
"eval_steps_per_second": 24.78,
"step": 13100
},
{
"epoch": 33.27,
"learning_rate": 0.00011111111111111109,
"loss": 0.3291,
"step": 13200
},
{
"epoch": 33.27,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33085396885871887,
"eval_runtime": 501.2449,
"eval_samples_per_second": 49.565,
"eval_steps_per_second": 24.782,
"step": 13200
},
{
"epoch": 33.52,
"learning_rate": 0.00010942760942760942,
"loss": 0.3316,
"step": 13300
},
{
"epoch": 33.52,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33154767751693726,
"eval_runtime": 501.0257,
"eval_samples_per_second": 49.586,
"eval_steps_per_second": 24.793,
"step": 13300
},
{
"epoch": 33.77,
"learning_rate": 0.00010774410774410772,
"loss": 0.3366,
"step": 13400
},
{
"epoch": 33.77,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33317822217941284,
"eval_runtime": 501.0403,
"eval_samples_per_second": 49.585,
"eval_steps_per_second": 24.792,
"step": 13400
},
{
"epoch": 34.03,
"learning_rate": 0.00010606060606060605,
"loss": 0.3115,
"step": 13500
},
{
"epoch": 34.03,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3382599353790283,
"eval_runtime": 500.7949,
"eval_samples_per_second": 49.609,
"eval_steps_per_second": 24.805,
"step": 13500
},
{
"epoch": 34.28,
"learning_rate": 0.00010437710437710438,
"loss": 0.3275,
"step": 13600
},
{
"epoch": 34.28,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.332431823015213,
"eval_runtime": 500.933,
"eval_samples_per_second": 49.595,
"eval_steps_per_second": 24.798,
"step": 13600
},
{
"epoch": 34.53,
"learning_rate": 0.00010269360269360268,
"loss": 0.3373,
"step": 13700
},
{
"epoch": 34.53,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33150389790534973,
"eval_runtime": 500.781,
"eval_samples_per_second": 49.611,
"eval_steps_per_second": 24.805,
"step": 13700
},
{
"epoch": 34.78,
"learning_rate": 0.00010101010101010101,
"loss": 0.3247,
"step": 13800
},
{
"epoch": 34.78,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3313423693180084,
"eval_runtime": 501.1265,
"eval_samples_per_second": 49.576,
"eval_steps_per_second": 24.788,
"step": 13800
},
{
"epoch": 35.03,
"learning_rate": 9.932659932659931e-05,
"loss": 0.3349,
"step": 13900
},
{
"epoch": 35.03,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33250102400779724,
"eval_runtime": 501.0936,
"eval_samples_per_second": 49.58,
"eval_steps_per_second": 24.79,
"step": 13900
},
{
"epoch": 35.29,
"learning_rate": 9.764309764309764e-05,
"loss": 0.3223,
"step": 14000
},
{
"epoch": 35.29,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33122116327285767,
"eval_runtime": 500.9681,
"eval_samples_per_second": 49.592,
"eval_steps_per_second": 24.796,
"step": 14000
},
{
"epoch": 35.54,
"learning_rate": 9.595959595959594e-05,
"loss": 0.3321,
"step": 14100
},
{
"epoch": 35.54,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3307776153087616,
"eval_runtime": 500.9865,
"eval_samples_per_second": 49.59,
"eval_steps_per_second": 24.795,
"step": 14100
},
{
"epoch": 35.79,
"learning_rate": 9.427609427609427e-05,
"loss": 0.3304,
"step": 14200
},
{
"epoch": 35.79,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3315562605857849,
"eval_runtime": 500.788,
"eval_samples_per_second": 49.61,
"eval_steps_per_second": 24.805,
"step": 14200
},
{
"epoch": 36.04,
"learning_rate": 9.259259259259257e-05,
"loss": 0.3262,
"step": 14300
},
{
"epoch": 36.04,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33196473121643066,
"eval_runtime": 501.0344,
"eval_samples_per_second": 49.585,
"eval_steps_per_second": 24.793,
"step": 14300
},
{
"epoch": 36.29,
"learning_rate": 9.09090909090909e-05,
"loss": 0.3239,
"step": 14400
},
{
"epoch": 36.29,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3316847085952759,
"eval_runtime": 501.0629,
"eval_samples_per_second": 49.583,
"eval_steps_per_second": 24.791,
"step": 14400
},
{
"epoch": 36.55,
"learning_rate": 8.92255892255892e-05,
"loss": 0.3325,
"step": 14500
},
{
"epoch": 36.55,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3308143615722656,
"eval_runtime": 500.8635,
"eval_samples_per_second": 49.602,
"eval_steps_per_second": 24.801,
"step": 14500
},
{
"epoch": 36.8,
"learning_rate": 8.754208754208753e-05,
"loss": 0.325,
"step": 14600
},
{
"epoch": 36.8,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3316170275211334,
"eval_runtime": 500.7755,
"eval_samples_per_second": 49.611,
"eval_steps_per_second": 24.806,
"step": 14600
},
{
"epoch": 37.05,
"learning_rate": 8.585858585858584e-05,
"loss": 0.3416,
"step": 14700
},
{
"epoch": 37.05,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3310682773590088,
"eval_runtime": 501.0155,
"eval_samples_per_second": 49.587,
"eval_steps_per_second": 24.794,
"step": 14700
},
{
"epoch": 37.3,
"learning_rate": 8.417508417508417e-05,
"loss": 0.3226,
"step": 14800
},
{
"epoch": 37.3,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33090585470199585,
"eval_runtime": 500.9851,
"eval_samples_per_second": 49.59,
"eval_steps_per_second": 24.795,
"step": 14800
},
{
"epoch": 37.56,
"learning_rate": 8.24915824915825e-05,
"loss": 0.3286,
"step": 14900
},
{
"epoch": 37.56,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3306817412376404,
"eval_runtime": 500.941,
"eval_samples_per_second": 49.595,
"eval_steps_per_second": 24.797,
"step": 14900
},
{
"epoch": 37.81,
"learning_rate": 8.08080808080808e-05,
"loss": 0.3284,
"step": 15000
},
{
"epoch": 37.81,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3312084972858429,
"eval_runtime": 500.7822,
"eval_samples_per_second": 49.61,
"eval_steps_per_second": 24.805,
"step": 15000
},
{
"epoch": 38.06,
"learning_rate": 7.912457912457913e-05,
"loss": 0.3298,
"step": 15100
},
{
"epoch": 38.06,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33263882994651794,
"eval_runtime": 500.7072,
"eval_samples_per_second": 49.618,
"eval_steps_per_second": 24.809,
"step": 15100
},
{
"epoch": 38.31,
"learning_rate": 7.744107744107743e-05,
"loss": 0.3383,
"step": 15200
},
{
"epoch": 38.31,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33113569021224976,
"eval_runtime": 501.0449,
"eval_samples_per_second": 49.584,
"eval_steps_per_second": 24.792,
"step": 15200
},
{
"epoch": 38.56,
"learning_rate": 7.575757575757576e-05,
"loss": 0.3418,
"step": 15300
},
{
"epoch": 38.56,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33080732822418213,
"eval_runtime": 501.1158,
"eval_samples_per_second": 49.577,
"eval_steps_per_second": 24.789,
"step": 15300
},
{
"epoch": 38.82,
"learning_rate": 7.407407407407406e-05,
"loss": 0.3123,
"step": 15400
},
{
"epoch": 38.82,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3311246931552887,
"eval_runtime": 501.2192,
"eval_samples_per_second": 49.567,
"eval_steps_per_second": 24.784,
"step": 15400
},
{
"epoch": 39.07,
"learning_rate": 7.239057239057239e-05,
"loss": 0.3237,
"step": 15500
},
{
"epoch": 39.07,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3346453011035919,
"eval_runtime": 501.3886,
"eval_samples_per_second": 49.55,
"eval_steps_per_second": 24.775,
"step": 15500
},
{
"epoch": 39.32,
"learning_rate": 7.07070707070707e-05,
"loss": 0.3261,
"step": 15600
},
{
"epoch": 39.32,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33250510692596436,
"eval_runtime": 501.4416,
"eval_samples_per_second": 49.545,
"eval_steps_per_second": 24.773,
"step": 15600
},
{
"epoch": 39.57,
"learning_rate": 6.902356902356902e-05,
"loss": 0.3269,
"step": 15700
},
{
"epoch": 39.57,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33122241497039795,
"eval_runtime": 501.3022,
"eval_samples_per_second": 49.559,
"eval_steps_per_second": 24.779,
"step": 15700
},
{
"epoch": 39.82,
"learning_rate": 6.734006734006734e-05,
"loss": 0.3267,
"step": 15800
},
{
"epoch": 39.82,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3319133520126343,
"eval_runtime": 501.3262,
"eval_samples_per_second": 49.557,
"eval_steps_per_second": 24.778,
"step": 15800
},
{
"epoch": 40.08,
"learning_rate": 6.565656565656565e-05,
"loss": 0.3381,
"step": 15900
},
{
"epoch": 40.08,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33269181847572327,
"eval_runtime": 500.8586,
"eval_samples_per_second": 49.603,
"eval_steps_per_second": 24.801,
"step": 15900
},
{
"epoch": 40.33,
"learning_rate": 6.397306397306397e-05,
"loss": 0.3238,
"step": 16000
},
{
"epoch": 40.33,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3325785994529724,
"eval_runtime": 501.1224,
"eval_samples_per_second": 49.577,
"eval_steps_per_second": 24.788,
"step": 16000
},
{
"epoch": 40.58,
"learning_rate": 6.228956228956228e-05,
"loss": 0.3299,
"step": 16100
},
{
"epoch": 40.58,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33203861117362976,
"eval_runtime": 500.8556,
"eval_samples_per_second": 49.603,
"eval_steps_per_second": 24.802,
"step": 16100
},
{
"epoch": 40.83,
"learning_rate": 6.06060606060606e-05,
"loss": 0.3385,
"step": 16200
},
{
"epoch": 40.83,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33086949586868286,
"eval_runtime": 501.1006,
"eval_samples_per_second": 49.579,
"eval_steps_per_second": 24.789,
"step": 16200
},
{
"epoch": 41.08,
"learning_rate": 5.8922558922558913e-05,
"loss": 0.3268,
"step": 16300
},
{
"epoch": 41.08,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33224013447761536,
"eval_runtime": 500.9055,
"eval_samples_per_second": 49.598,
"eval_steps_per_second": 24.799,
"step": 16300
},
{
"epoch": 41.34,
"learning_rate": 5.723905723905723e-05,
"loss": 0.3253,
"step": 16400
},
{
"epoch": 41.34,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3319920301437378,
"eval_runtime": 501.2306,
"eval_samples_per_second": 49.566,
"eval_steps_per_second": 24.783,
"step": 16400
},
{
"epoch": 41.59,
"learning_rate": 5.5555555555555545e-05,
"loss": 0.3261,
"step": 16500
},
{
"epoch": 41.59,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33143314719200134,
"eval_runtime": 501.0325,
"eval_samples_per_second": 49.586,
"eval_steps_per_second": 24.793,
"step": 16500
},
{
"epoch": 41.84,
"learning_rate": 5.387205387205386e-05,
"loss": 0.3362,
"step": 16600
},
{
"epoch": 41.84,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3324301838874817,
"eval_runtime": 500.7987,
"eval_samples_per_second": 49.609,
"eval_steps_per_second": 24.804,
"step": 16600
},
{
"epoch": 42.09,
"learning_rate": 5.218855218855219e-05,
"loss": 0.3203,
"step": 16700
},
{
"epoch": 42.09,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3325912356376648,
"eval_runtime": 500.6821,
"eval_samples_per_second": 49.62,
"eval_steps_per_second": 24.81,
"step": 16700
},
{
"epoch": 42.34,
"learning_rate": 5.0505050505050505e-05,
"loss": 0.325,
"step": 16800
},
{
"epoch": 42.34,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3323043882846832,
"eval_runtime": 501.26,
"eval_samples_per_second": 49.563,
"eval_steps_per_second": 24.782,
"step": 16800
},
{
"epoch": 42.6,
"learning_rate": 4.882154882154882e-05,
"loss": 0.3172,
"step": 16900
},
{
"epoch": 42.6,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33261871337890625,
"eval_runtime": 500.9553,
"eval_samples_per_second": 49.593,
"eval_steps_per_second": 24.797,
"step": 16900
},
{
"epoch": 42.85,
"learning_rate": 4.7138047138047136e-05,
"loss": 0.3361,
"step": 17000
},
{
"epoch": 42.85,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3307626247406006,
"eval_runtime": 501.0928,
"eval_samples_per_second": 49.58,
"eval_steps_per_second": 24.79,
"step": 17000
},
{
"epoch": 43.1,
"learning_rate": 4.545454545454545e-05,
"loss": 0.3432,
"step": 17100
},
{
"epoch": 43.1,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3309696614742279,
"eval_runtime": 501.4895,
"eval_samples_per_second": 49.54,
"eval_steps_per_second": 24.77,
"step": 17100
},
{
"epoch": 43.35,
"learning_rate": 4.377104377104377e-05,
"loss": 0.3396,
"step": 17200
},
{
"epoch": 43.35,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3312534689903259,
"eval_runtime": 501.211,
"eval_samples_per_second": 49.568,
"eval_steps_per_second": 24.784,
"step": 17200
},
{
"epoch": 43.6,
"learning_rate": 4.208754208754208e-05,
"loss": 0.3163,
"step": 17300
},
{
"epoch": 43.6,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33276140689849854,
"eval_runtime": 501.2264,
"eval_samples_per_second": 49.566,
"eval_steps_per_second": 24.783,
"step": 17300
},
{
"epoch": 43.86,
"learning_rate": 4.04040404040404e-05,
"loss": 0.3353,
"step": 17400
},
{
"epoch": 43.86,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3318198025226593,
"eval_runtime": 501.088,
"eval_samples_per_second": 49.58,
"eval_steps_per_second": 24.79,
"step": 17400
},
{
"epoch": 44.11,
"learning_rate": 3.8720538720538714e-05,
"loss": 0.3299,
"step": 17500
},
{
"epoch": 44.11,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3316582441329956,
"eval_runtime": 501.3763,
"eval_samples_per_second": 49.552,
"eval_steps_per_second": 24.776,
"step": 17500
},
{
"epoch": 44.36,
"learning_rate": 3.703703703703703e-05,
"loss": 0.3213,
"step": 17600
},
{
"epoch": 44.36,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33194538950920105,
"eval_runtime": 501.7895,
"eval_samples_per_second": 49.511,
"eval_steps_per_second": 24.755,
"step": 17600
},
{
"epoch": 44.61,
"learning_rate": 3.535353535353535e-05,
"loss": 0.3253,
"step": 17700
},
{
"epoch": 44.61,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33288490772247314,
"eval_runtime": 501.1283,
"eval_samples_per_second": 49.576,
"eval_steps_per_second": 24.788,
"step": 17700
},
{
"epoch": 44.86,
"learning_rate": 3.367003367003367e-05,
"loss": 0.3391,
"step": 17800
},
{
"epoch": 44.86,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33222696185112,
"eval_runtime": 501.5052,
"eval_samples_per_second": 49.539,
"eval_steps_per_second": 24.769,
"step": 17800
},
{
"epoch": 45.12,
"learning_rate": 3.198653198653198e-05,
"loss": 0.3179,
"step": 17900
},
{
"epoch": 45.12,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.333011269569397,
"eval_runtime": 500.7091,
"eval_samples_per_second": 49.618,
"eval_steps_per_second": 24.809,
"step": 17900
},
{
"epoch": 45.37,
"learning_rate": 3.03030303030303e-05,
"loss": 0.3348,
"step": 18000
},
{
"epoch": 45.37,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3321002721786499,
"eval_runtime": 501.2516,
"eval_samples_per_second": 49.564,
"eval_steps_per_second": 24.782,
"step": 18000
},
{
"epoch": 45.62,
"learning_rate": 2.8619528619528615e-05,
"loss": 0.3116,
"step": 18100
},
{
"epoch": 45.62,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33259570598602295,
"eval_runtime": 501.2743,
"eval_samples_per_second": 49.562,
"eval_steps_per_second": 24.781,
"step": 18100
},
{
"epoch": 45.87,
"learning_rate": 2.693602693602693e-05,
"loss": 0.3334,
"step": 18200
},
{
"epoch": 45.87,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33218705654144287,
"eval_runtime": 501.0248,
"eval_samples_per_second": 49.586,
"eval_steps_per_second": 24.793,
"step": 18200
},
{
"epoch": 46.12,
"learning_rate": 2.5252525252525253e-05,
"loss": 0.3401,
"step": 18300
},
{
"epoch": 46.12,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3314586579799652,
"eval_runtime": 501.1615,
"eval_samples_per_second": 49.573,
"eval_steps_per_second": 24.786,
"step": 18300
},
{
"epoch": 46.38,
"learning_rate": 2.3569023569023568e-05,
"loss": 0.3381,
"step": 18400
},
{
"epoch": 46.38,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33111417293548584,
"eval_runtime": 501.2779,
"eval_samples_per_second": 49.561,
"eval_steps_per_second": 24.781,
"step": 18400
},
{
"epoch": 46.63,
"learning_rate": 2.1885521885521884e-05,
"loss": 0.3154,
"step": 18500
},
{
"epoch": 46.63,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3327486515045166,
"eval_runtime": 500.9424,
"eval_samples_per_second": 49.595,
"eval_steps_per_second": 24.797,
"step": 18500
},
{
"epoch": 46.88,
"learning_rate": 2.02020202020202e-05,
"loss": 0.3348,
"step": 18600
},
{
"epoch": 46.88,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33224716782569885,
"eval_runtime": 500.9745,
"eval_samples_per_second": 49.591,
"eval_steps_per_second": 24.796,
"step": 18600
},
{
"epoch": 47.13,
"learning_rate": 1.8518518518518515e-05,
"loss": 0.3285,
"step": 18700
},
{
"epoch": 47.13,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3325360119342804,
"eval_runtime": 501.157,
"eval_samples_per_second": 49.573,
"eval_steps_per_second": 24.787,
"step": 18700
},
{
"epoch": 47.39,
"learning_rate": 1.6835016835016834e-05,
"loss": 0.3256,
"step": 18800
},
{
"epoch": 47.39,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3328581750392914,
"eval_runtime": 501.2027,
"eval_samples_per_second": 49.569,
"eval_steps_per_second": 24.784,
"step": 18800
},
{
"epoch": 47.64,
"learning_rate": 1.515151515151515e-05,
"loss": 0.3389,
"step": 18900
},
{
"epoch": 47.64,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3324856758117676,
"eval_runtime": 501.1837,
"eval_samples_per_second": 49.571,
"eval_steps_per_second": 24.785,
"step": 18900
},
{
"epoch": 47.89,
"learning_rate": 1.3468013468013465e-05,
"loss": 0.3288,
"step": 19000
},
{
"epoch": 47.89,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3326534032821655,
"eval_runtime": 501.4739,
"eval_samples_per_second": 49.542,
"eval_steps_per_second": 24.771,
"step": 19000
},
{
"epoch": 48.14,
"learning_rate": 1.1784511784511784e-05,
"loss": 0.3172,
"step": 19100
},
{
"epoch": 48.14,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3326767683029175,
"eval_runtime": 500.8014,
"eval_samples_per_second": 49.608,
"eval_steps_per_second": 24.804,
"step": 19100
},
{
"epoch": 48.39,
"learning_rate": 1.01010101010101e-05,
"loss": 0.3211,
"step": 19200
},
{
"epoch": 48.39,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.3324893116950989,
"eval_runtime": 500.8057,
"eval_samples_per_second": 49.608,
"eval_steps_per_second": 24.804,
"step": 19200
},
{
"epoch": 48.65,
"learning_rate": 8.417508417508417e-06,
"loss": 0.3348,
"step": 19300
},
{
"epoch": 48.65,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33247503638267517,
"eval_runtime": 501.0569,
"eval_samples_per_second": 49.583,
"eval_steps_per_second": 24.792,
"step": 19300
},
{
"epoch": 48.9,
"learning_rate": 6.7340067340067325e-06,
"loss": 0.3327,
"step": 19400
},
{
"epoch": 48.9,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33261463046073914,
"eval_runtime": 501.0903,
"eval_samples_per_second": 49.58,
"eval_steps_per_second": 24.79,
"step": 19400
},
{
"epoch": 49.15,
"learning_rate": 5.05050505050505e-06,
"loss": 0.3341,
"step": 19500
},
{
"epoch": 49.15,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33255448937416077,
"eval_runtime": 500.9292,
"eval_samples_per_second": 49.596,
"eval_steps_per_second": 24.798,
"step": 19500
},
{
"epoch": 49.4,
"learning_rate": 3.3670033670033663e-06,
"loss": 0.3344,
"step": 19600
},
{
"epoch": 49.4,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33247339725494385,
"eval_runtime": 501.4588,
"eval_samples_per_second": 49.543,
"eval_steps_per_second": 24.772,
"step": 19600
},
{
"epoch": 49.65,
"learning_rate": 1.6835016835016831e-06,
"loss": 0.3207,
"step": 19700
},
{
"epoch": 49.65,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33257797360420227,
"eval_runtime": 501.4807,
"eval_samples_per_second": 49.541,
"eval_steps_per_second": 24.771,
"step": 19700
},
{
"epoch": 49.91,
"learning_rate": 0.0,
"loss": 0.3299,
"step": 19800
},
{
"epoch": 49.91,
"eval_accuracy": 0.8974400257607471,
"eval_loss": 0.33260539174079895,
"eval_runtime": 501.1869,
"eval_samples_per_second": 49.57,
"eval_steps_per_second": 24.785,
"step": 19800
},
{
"epoch": 49.91,
"step": 19800,
"total_flos": 7.53101543607702e+19,
"train_loss": 0.3200095210412536,
"train_runtime": 116243.9445,
"train_samples_per_second": 10.917,
"train_steps_per_second": 0.17
}
],
"logging_steps": 100,
"max_steps": 19800,
"num_train_epochs": 50,
"save_steps": 100,
"total_flos": 7.53101543607702e+19,
"trial_name": null,
"trial_params": null
}