|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 10000, |
|
"global_step": 263566, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.990514709788061e-05, |
|
"loss": 7.0943, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.981029419576122e-05, |
|
"loss": 6.2317, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.971544129364182e-05, |
|
"loss": 5.9001, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9620588391522424e-05, |
|
"loss": 5.6651, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9525735489403035e-05, |
|
"loss": 5.4754, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9430882587283645e-05, |
|
"loss": 5.3245, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.933602968516425e-05, |
|
"loss": 5.1815, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.924117678304486e-05, |
|
"loss": 5.0484, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.914632388092546e-05, |
|
"loss": 4.9349, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.905147097880607e-05, |
|
"loss": 4.8371, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.895661807668668e-05, |
|
"loss": 4.7574, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.886176517456728e-05, |
|
"loss": 4.6903, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.876691227244789e-05, |
|
"loss": 4.626, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.8672059370328496e-05, |
|
"loss": 4.5767, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.85772064682091e-05, |
|
"loss": 4.5388, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.848235356608971e-05, |
|
"loss": 4.4959, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.838750066397032e-05, |
|
"loss": 4.4589, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.8292647761850924e-05, |
|
"loss": 4.4324, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.8197794859731535e-05, |
|
"loss": 4.3988, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.810294195761214e-05, |
|
"loss": 4.375, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.31109238678227563, |
|
"eval_loss": 4.28147029876709, |
|
"eval_runtime": 5060.877, |
|
"eval_samples_per_second": 87.687, |
|
"eval_steps_per_second": 1.37, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.800808905549274e-05, |
|
"loss": 4.3423, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.791323615337335e-05, |
|
"loss": 4.3204, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.7818383251253956e-05, |
|
"loss": 4.2991, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.772353034913457e-05, |
|
"loss": 4.2818, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.762867744701517e-05, |
|
"loss": 4.2627, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.7533824544895774e-05, |
|
"loss": 4.2463, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.7438971642776385e-05, |
|
"loss": 4.2265, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.734411874065699e-05, |
|
"loss": 4.2159, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.72492658385376e-05, |
|
"loss": 4.198, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.715441293641821e-05, |
|
"loss": 4.1886, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.7059560034298813e-05, |
|
"loss": 4.1675, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.696470713217942e-05, |
|
"loss": 4.1553, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.686985423006002e-05, |
|
"loss": 4.1466, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.677500132794063e-05, |
|
"loss": 4.1307, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.668014842582124e-05, |
|
"loss": 4.1282, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.6585295523701846e-05, |
|
"loss": 4.1108, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.6490442621582456e-05, |
|
"loss": 4.1135, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.639558971946305e-05, |
|
"loss": 4.0876, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.6300736817343664e-05, |
|
"loss": 4.0871, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.6205883915224274e-05, |
|
"loss": 4.0754, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.3340521142462603, |
|
"eval_loss": 3.9983978271484375, |
|
"eval_runtime": 5045.5203, |
|
"eval_samples_per_second": 87.954, |
|
"eval_steps_per_second": 1.374, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.611103101310488e-05, |
|
"loss": 4.0747, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.601617811098549e-05, |
|
"loss": 4.0582, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.592132520886609e-05, |
|
"loss": 4.0545, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.5826472306746696e-05, |
|
"loss": 4.0345, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5731619404627307e-05, |
|
"loss": 4.0376, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.563676650250791e-05, |
|
"loss": 4.0315, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.554191360038852e-05, |
|
"loss": 4.0217, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.544706069826913e-05, |
|
"loss": 4.0135, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.535220779614973e-05, |
|
"loss": 4.0034, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.525735489403034e-05, |
|
"loss": 4.004, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.516250199191095e-05, |
|
"loss": 3.9939, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.506764908979155e-05, |
|
"loss": 3.9871, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.4972796187672164e-05, |
|
"loss": 3.9787, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.487794328555277e-05, |
|
"loss": 3.9752, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.478309038343337e-05, |
|
"loss": 3.9605, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.468823748131398e-05, |
|
"loss": 3.9542, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.4593384579194585e-05, |
|
"loss": 3.9518, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.4498531677075196e-05, |
|
"loss": 3.9479, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.44036787749558e-05, |
|
"loss": 3.9445, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.430882587283641e-05, |
|
"loss": 3.9409, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.3456931371386037, |
|
"eval_loss": 3.86149525642395, |
|
"eval_runtime": 5041.4629, |
|
"eval_samples_per_second": 88.025, |
|
"eval_steps_per_second": 1.375, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.4213972970717014e-05, |
|
"loss": 3.9345, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.411912006859762e-05, |
|
"loss": 3.9328, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.402426716647823e-05, |
|
"loss": 3.9248, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.392941426435884e-05, |
|
"loss": 3.9126, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.383456136223944e-05, |
|
"loss": 3.9077, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.3739708460120046e-05, |
|
"loss": 3.9102, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.364485555800065e-05, |
|
"loss": 3.907, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.355000265588126e-05, |
|
"loss": 3.9017, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.345514975376187e-05, |
|
"loss": 3.8976, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.3360296851642475e-05, |
|
"loss": 3.8917, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.3265443949523085e-05, |
|
"loss": 3.8917, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.317059104740369e-05, |
|
"loss": 3.8851, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.307573814528429e-05, |
|
"loss": 3.8899, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.29808852431649e-05, |
|
"loss": 3.8667, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.288603234104551e-05, |
|
"loss": 3.8702, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.279117943892612e-05, |
|
"loss": 3.8705, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.269632653680672e-05, |
|
"loss": 3.8647, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.2601473634687325e-05, |
|
"loss": 3.8612, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.2506620732567936e-05, |
|
"loss": 3.8524, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.241176783044854e-05, |
|
"loss": 3.8554, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.3530753183612612, |
|
"eval_loss": 3.7798092365264893, |
|
"eval_runtime": 5037.6086, |
|
"eval_samples_per_second": 88.092, |
|
"eval_steps_per_second": 1.376, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.231691492832915e-05, |
|
"loss": 3.8487, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.222206202620976e-05, |
|
"loss": 3.851, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.2127209124090364e-05, |
|
"loss": 3.8431, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.203235622197097e-05, |
|
"loss": 3.8401, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.193750331985158e-05, |
|
"loss": 3.8296, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.184265041773218e-05, |
|
"loss": 3.8338, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.174779751561279e-05, |
|
"loss": 3.8295, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.1652944613493396e-05, |
|
"loss": 3.8285, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.1558091711374e-05, |
|
"loss": 3.8217, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.146323880925461e-05, |
|
"loss": 3.8262, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.1368385907135214e-05, |
|
"loss": 3.8241, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.1273533005015825e-05, |
|
"loss": 3.8145, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.117868010289643e-05, |
|
"loss": 3.8207, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.108382720077704e-05, |
|
"loss": 3.8128, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.098897429865764e-05, |
|
"loss": 3.8053, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.089412139653825e-05, |
|
"loss": 3.8023, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.079926849441886e-05, |
|
"loss": 3.8084, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.070441559229947e-05, |
|
"loss": 3.7967, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.060956269018007e-05, |
|
"loss": 3.7947, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.0514709788060675e-05, |
|
"loss": 3.7973, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.35837528569747157, |
|
"eval_loss": 3.7209770679473877, |
|
"eval_runtime": 5052.5501, |
|
"eval_samples_per_second": 87.831, |
|
"eval_steps_per_second": 1.372, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.041985688594128e-05, |
|
"loss": 3.7861, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.032500398382189e-05, |
|
"loss": 3.7878, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.02301510817025e-05, |
|
"loss": 3.7839, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.0135298179583104e-05, |
|
"loss": 3.7898, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.0040445277463714e-05, |
|
"loss": 3.7808, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.994559237534432e-05, |
|
"loss": 3.7857, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.985073947322492e-05, |
|
"loss": 3.7754, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.975588657110553e-05, |
|
"loss": 3.769, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.9661033668986136e-05, |
|
"loss": 3.7723, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.9566180766866747e-05, |
|
"loss": 3.7719, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.947132786474736e-05, |
|
"loss": 3.7684, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.9376474962627954e-05, |
|
"loss": 3.7672, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.9281622060508565e-05, |
|
"loss": 3.7595, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.918676915838917e-05, |
|
"loss": 3.764, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.909191625626978e-05, |
|
"loss": 3.7584, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.899706335415039e-05, |
|
"loss": 3.7532, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.890221045203099e-05, |
|
"loss": 3.7476, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.88073575499116e-05, |
|
"loss": 3.7502, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.871250464779221e-05, |
|
"loss": 3.7584, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.861765174567281e-05, |
|
"loss": 3.7421, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.3629990378932714, |
|
"eval_loss": 3.6750495433807373, |
|
"eval_runtime": 5050.4176, |
|
"eval_samples_per_second": 87.869, |
|
"eval_steps_per_second": 1.373, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.852279884355342e-05, |
|
"loss": 3.7503, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.8427945941434025e-05, |
|
"loss": 3.7485, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.8333093039314636e-05, |
|
"loss": 3.745, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.823824013719524e-05, |
|
"loss": 3.739, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.8143387235075843e-05, |
|
"loss": 3.7411, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.8048534332956454e-05, |
|
"loss": 3.739, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.795368143083706e-05, |
|
"loss": 3.7388, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.785882852871767e-05, |
|
"loss": 3.7339, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.776397562659827e-05, |
|
"loss": 3.7328, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.7669122724478876e-05, |
|
"loss": 3.7209, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.7574269822359486e-05, |
|
"loss": 3.727, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.74794169202401e-05, |
|
"loss": 3.7251, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.73845640181207e-05, |
|
"loss": 3.7199, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.728971111600131e-05, |
|
"loss": 3.7277, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.719485821388191e-05, |
|
"loss": 3.7232, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.710000531176252e-05, |
|
"loss": 3.7154, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.700515240964313e-05, |
|
"loss": 3.7159, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.691029950752373e-05, |
|
"loss": 3.711, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.681544660540434e-05, |
|
"loss": 3.7094, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.672059370328495e-05, |
|
"loss": 3.7097, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.36635205507688484, |
|
"eval_loss": 3.6377646923065186, |
|
"eval_runtime": 5044.3649, |
|
"eval_samples_per_second": 87.974, |
|
"eval_steps_per_second": 1.375, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.662574080116555e-05, |
|
"loss": 3.7109, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.653088789904616e-05, |
|
"loss": 3.7069, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.6436034996926765e-05, |
|
"loss": 3.7025, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.6341182094807376e-05, |
|
"loss": 3.7032, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.6246329192687986e-05, |
|
"loss": 3.7005, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.615147629056859e-05, |
|
"loss": 3.704, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.6056623388449194e-05, |
|
"loss": 3.6903, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.59617704863298e-05, |
|
"loss": 3.6989, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.586691758421041e-05, |
|
"loss": 3.6947, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.577206468209102e-05, |
|
"loss": 3.6969, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.567721177997162e-05, |
|
"loss": 3.696, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.5582358877852226e-05, |
|
"loss": 3.6905, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.5487505975732836e-05, |
|
"loss": 3.6851, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.539265307361344e-05, |
|
"loss": 3.6857, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.529780017149405e-05, |
|
"loss": 3.6889, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.5202947269374654e-05, |
|
"loss": 3.6911, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.5108094367255265e-05, |
|
"loss": 3.687, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.501324146513587e-05, |
|
"loss": 3.6808, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.491838856301647e-05, |
|
"loss": 3.6841, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.482353566089708e-05, |
|
"loss": 3.6741, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.36940481219098525, |
|
"eval_loss": 3.606105089187622, |
|
"eval_runtime": 5043.6804, |
|
"eval_samples_per_second": 87.986, |
|
"eval_steps_per_second": 1.375, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.4728682758777694e-05, |
|
"loss": 3.6789, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.46338298566583e-05, |
|
"loss": 3.6809, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.45389769545389e-05, |
|
"loss": 3.6854, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.4444124052419505e-05, |
|
"loss": 3.6692, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.4349271150300115e-05, |
|
"loss": 3.6771, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.4254418248180726e-05, |
|
"loss": 3.6773, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.415956534606133e-05, |
|
"loss": 3.6701, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.406471244394194e-05, |
|
"loss": 3.666, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.3969859541822544e-05, |
|
"loss": 3.672, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.387500663970315e-05, |
|
"loss": 3.6738, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.378015373758376e-05, |
|
"loss": 3.6705, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.368530083546436e-05, |
|
"loss": 3.6649, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.359044793334497e-05, |
|
"loss": 3.6736, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.3495595031225576e-05, |
|
"loss": 3.6689, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.340074212910618e-05, |
|
"loss": 3.6665, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.330588922698679e-05, |
|
"loss": 3.6641, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.3211036324867394e-05, |
|
"loss": 3.6536, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.3116183422748005e-05, |
|
"loss": 3.6658, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.3021330520628615e-05, |
|
"loss": 3.6544, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.292647761850922e-05, |
|
"loss": 3.6599, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.37180447854264453, |
|
"eval_loss": 3.5803401470184326, |
|
"eval_runtime": 5042.0712, |
|
"eval_samples_per_second": 88.014, |
|
"eval_steps_per_second": 1.375, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.283162471638982e-05, |
|
"loss": 3.6605, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.2736771814270426e-05, |
|
"loss": 3.6603, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.264191891215104e-05, |
|
"loss": 3.6576, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.254706601003165e-05, |
|
"loss": 3.6511, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.245221310791225e-05, |
|
"loss": 3.6518, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.2357360205792855e-05, |
|
"loss": 3.6522, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.2262507303673465e-05, |
|
"loss": 3.646, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.216765440155407e-05, |
|
"loss": 3.6494, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.207280149943468e-05, |
|
"loss": 3.6388, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.1977948597315284e-05, |
|
"loss": 3.6456, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.1883095695195894e-05, |
|
"loss": 3.6398, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.17882427930765e-05, |
|
"loss": 3.6476, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.16933898909571e-05, |
|
"loss": 3.6364, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.159853698883771e-05, |
|
"loss": 3.6456, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.150368408671832e-05, |
|
"loss": 3.645, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.1408831184598926e-05, |
|
"loss": 3.6357, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.131397828247954e-05, |
|
"loss": 3.6415, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.1219125380360134e-05, |
|
"loss": 3.6314, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.1124272478240744e-05, |
|
"loss": 3.64, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.1029419576121355e-05, |
|
"loss": 3.6356, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.37408231347678594, |
|
"eval_loss": 3.558403253555298, |
|
"eval_runtime": 5044.122, |
|
"eval_samples_per_second": 87.978, |
|
"eval_steps_per_second": 1.375, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.093456667400196e-05, |
|
"loss": 3.637, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.083971377188257e-05, |
|
"loss": 3.6353, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.074486086976317e-05, |
|
"loss": 3.6331, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.065000796764378e-05, |
|
"loss": 3.6288, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.055515506552439e-05, |
|
"loss": 3.6273, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.046030216340499e-05, |
|
"loss": 3.6351, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.03654492612856e-05, |
|
"loss": 3.6285, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.027059635916621e-05, |
|
"loss": 3.6256, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.0175743457046812e-05, |
|
"loss": 3.6248, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.0080890554927423e-05, |
|
"loss": 3.6182, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.9986037652808023e-05, |
|
"loss": 3.6242, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.9891184750688634e-05, |
|
"loss": 3.625, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.979633184856924e-05, |
|
"loss": 3.6191, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9701478946449845e-05, |
|
"loss": 3.6267, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9606626044330455e-05, |
|
"loss": 3.6227, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9511773142211062e-05, |
|
"loss": 3.6217, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9416920240091666e-05, |
|
"loss": 3.6168, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9322067337972277e-05, |
|
"loss": 3.6204, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.922721443585288e-05, |
|
"loss": 3.6217, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.9132361533733487e-05, |
|
"loss": 3.6131, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.37584134336520747, |
|
"eval_loss": 3.542346715927124, |
|
"eval_runtime": 5051.4787, |
|
"eval_samples_per_second": 87.85, |
|
"eval_steps_per_second": 1.373, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.9037508631614098e-05, |
|
"loss": 3.6165, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.8942655729494698e-05, |
|
"loss": 3.6129, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.884780282737531e-05, |
|
"loss": 3.6177, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.8752949925255913e-05, |
|
"loss": 3.6187, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.865809702313652e-05, |
|
"loss": 3.6112, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.856324412101713e-05, |
|
"loss": 3.6103, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8468391218897734e-05, |
|
"loss": 3.6103, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.837353831677834e-05, |
|
"loss": 3.615, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.827868541465895e-05, |
|
"loss": 3.6151, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8183832512539555e-05, |
|
"loss": 3.6039, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8088979610420162e-05, |
|
"loss": 3.6133, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.7994126708300766e-05, |
|
"loss": 3.6063, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.7899273806181377e-05, |
|
"loss": 3.6029, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.7804420904061984e-05, |
|
"loss": 3.6082, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7709568001942588e-05, |
|
"loss": 3.6099, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7614715099823195e-05, |
|
"loss": 3.6035, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.75198621977038e-05, |
|
"loss": 3.5997, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.742500929558441e-05, |
|
"loss": 3.6032, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7330156393465016e-05, |
|
"loss": 3.5998, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.723530349134562e-05, |
|
"loss": 3.5991, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.3775510164297428, |
|
"eval_loss": 3.525380849838257, |
|
"eval_runtime": 5041.3032, |
|
"eval_samples_per_second": 88.027, |
|
"eval_steps_per_second": 1.375, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.714045058922623e-05, |
|
"loss": 3.6027, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.7045597687106838e-05, |
|
"loss": 3.5947, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.695074478498744e-05, |
|
"loss": 3.6033, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.6855891882868052e-05, |
|
"loss": 3.5933, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.6761038980748652e-05, |
|
"loss": 3.594, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6666186078629263e-05, |
|
"loss": 3.599, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6571333176509873e-05, |
|
"loss": 3.6013, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6476480274390474e-05, |
|
"loss": 3.5982, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6381627372271084e-05, |
|
"loss": 3.5937, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.628677447015169e-05, |
|
"loss": 3.5945, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.6191921568032295e-05, |
|
"loss": 3.5926, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.6097068665912906e-05, |
|
"loss": 3.5927, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.600221576379351e-05, |
|
"loss": 3.5892, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5907362861674116e-05, |
|
"loss": 3.5938, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5812509959554727e-05, |
|
"loss": 3.5867, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.571765705743533e-05, |
|
"loss": 3.5879, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5622804155315938e-05, |
|
"loss": 3.5909, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.552795125319654e-05, |
|
"loss": 3.5861, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.543309835107715e-05, |
|
"loss": 3.5913, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.533824544895776e-05, |
|
"loss": 3.591, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.37901210353247916, |
|
"eval_loss": 3.510841131210327, |
|
"eval_runtime": 5053.0574, |
|
"eval_samples_per_second": 87.823, |
|
"eval_steps_per_second": 1.372, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.5243392546838363e-05, |
|
"loss": 3.5849, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.514853964471897e-05, |
|
"loss": 3.5868, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.505368674259958e-05, |
|
"loss": 3.5838, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.4958833840480184e-05, |
|
"loss": 3.5848, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.486398093836079e-05, |
|
"loss": 3.5818, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.47691280362414e-05, |
|
"loss": 3.5842, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4674275134122006e-05, |
|
"loss": 3.584, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.457942223200261e-05, |
|
"loss": 3.5817, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.448456932988322e-05, |
|
"loss": 3.5781, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4389716427763827e-05, |
|
"loss": 3.5761, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.429486352564443e-05, |
|
"loss": 3.5755, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4200010623525038e-05, |
|
"loss": 3.5822, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4105157721405645e-05, |
|
"loss": 3.5749, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4010304819286252e-05, |
|
"loss": 3.5812, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.391545191716686e-05, |
|
"loss": 3.5786, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.3820599015047467e-05, |
|
"loss": 3.5826, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.372574611292807e-05, |
|
"loss": 3.5731, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.363089321080868e-05, |
|
"loss": 3.5784, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.3536040308689285e-05, |
|
"loss": 3.5738, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.3441187406569892e-05, |
|
"loss": 3.574, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.3804677520669924, |
|
"eval_loss": 3.4966471195220947, |
|
"eval_runtime": 5044.6959, |
|
"eval_samples_per_second": 87.968, |
|
"eval_steps_per_second": 1.375, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.33463345044505e-05, |
|
"loss": 3.5722, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.3251481602331106e-05, |
|
"loss": 3.5778, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.3156628700211713e-05, |
|
"loss": 3.5722, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.306177579809232e-05, |
|
"loss": 3.5658, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.2966922895972924e-05, |
|
"loss": 3.5671, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.2872069993853535e-05, |
|
"loss": 3.5696, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.2777217091734142e-05, |
|
"loss": 3.5691, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2682364189614745e-05, |
|
"loss": 3.5737, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2587511287495353e-05, |
|
"loss": 3.5693, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.249265838537596e-05, |
|
"loss": 3.5728, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2397805483256567e-05, |
|
"loss": 3.569, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2302952581137174e-05, |
|
"loss": 3.5559, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.220809967901778e-05, |
|
"loss": 3.5673, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.2113246776898385e-05, |
|
"loss": 3.567, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.2018393874778995e-05, |
|
"loss": 3.5692, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.19235409726596e-05, |
|
"loss": 3.5651, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.1828688070540206e-05, |
|
"loss": 3.559, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1733835168420813e-05, |
|
"loss": 3.5657, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.163898226630142e-05, |
|
"loss": 3.5632, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1544129364182028e-05, |
|
"loss": 3.5606, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.38146521499584235, |
|
"eval_loss": 3.486565113067627, |
|
"eval_runtime": 5047.1623, |
|
"eval_samples_per_second": 87.925, |
|
"eval_steps_per_second": 1.374, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1449276462062635e-05, |
|
"loss": 3.5635, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.135442355994324e-05, |
|
"loss": 3.569, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.125957065782385e-05, |
|
"loss": 3.551, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.1164717755704456e-05, |
|
"loss": 3.5543, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.106986485358506e-05, |
|
"loss": 3.5556, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0975011951465667e-05, |
|
"loss": 3.5598, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0880159049346274e-05, |
|
"loss": 3.5592, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.078530614722688e-05, |
|
"loss": 3.5562, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.069045324510749e-05, |
|
"loss": 3.5561, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0595600342988096e-05, |
|
"loss": 3.5573, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.05007474408687e-05, |
|
"loss": 3.5585, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.040589453874931e-05, |
|
"loss": 3.5576, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0311041636629917e-05, |
|
"loss": 3.5529, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.021618873451052e-05, |
|
"loss": 3.5575, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0121335832391128e-05, |
|
"loss": 3.5569, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0026482930271735e-05, |
|
"loss": 3.5537, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.9931630028152342e-05, |
|
"loss": 3.5553, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.983677712603295e-05, |
|
"loss": 3.5524, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9741924223913557e-05, |
|
"loss": 3.5562, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9647071321794164e-05, |
|
"loss": 3.5516, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.3828251390025017, |
|
"eval_loss": 3.4739012718200684, |
|
"eval_runtime": 5050.1987, |
|
"eval_samples_per_second": 87.872, |
|
"eval_steps_per_second": 1.373, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.955221841967477e-05, |
|
"loss": 3.5508, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9457365517555375e-05, |
|
"loss": 3.5424, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.936251261543598e-05, |
|
"loss": 3.5526, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9267659713316592e-05, |
|
"loss": 3.5469, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9172806811197196e-05, |
|
"loss": 3.5401, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9077953909077803e-05, |
|
"loss": 3.5525, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.898310100695841e-05, |
|
"loss": 3.5494, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8888248104839014e-05, |
|
"loss": 3.5527, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8793395202719624e-05, |
|
"loss": 3.5477, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.869854230060023e-05, |
|
"loss": 3.548, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8603689398480835e-05, |
|
"loss": 3.5466, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8508836496361442e-05, |
|
"loss": 3.5491, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8413983594242053e-05, |
|
"loss": 3.5431, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8319130692122657e-05, |
|
"loss": 3.5462, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8224277790003264e-05, |
|
"loss": 3.5453, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.812942488788387e-05, |
|
"loss": 3.5408, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8034571985764478e-05, |
|
"loss": 3.5465, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7939719083645085e-05, |
|
"loss": 3.5437, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.784486618152569e-05, |
|
"loss": 3.533, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7750013279406296e-05, |
|
"loss": 3.5423, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.38377248075624093, |
|
"eval_loss": 3.4649875164031982, |
|
"eval_runtime": 5024.5121, |
|
"eval_samples_per_second": 88.322, |
|
"eval_steps_per_second": 1.38, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7655160377286907e-05, |
|
"loss": 3.5446, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.756030747516751e-05, |
|
"loss": 3.5374, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7465454573048118e-05, |
|
"loss": 3.5426, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7370601670928725e-05, |
|
"loss": 3.5391, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.727574876880933e-05, |
|
"loss": 3.544, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.718089586668994e-05, |
|
"loss": 3.5396, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7086042964570546e-05, |
|
"loss": 3.5385, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.699119006245115e-05, |
|
"loss": 3.534, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6896337160331757e-05, |
|
"loss": 3.5374, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6801484258212368e-05, |
|
"loss": 3.5351, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.670663135609297e-05, |
|
"loss": 3.5391, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.661177845397358e-05, |
|
"loss": 3.5351, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6516925551854186e-05, |
|
"loss": 3.5315, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6422072649734793e-05, |
|
"loss": 3.5324, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.63272197476154e-05, |
|
"loss": 3.5379, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.6232366845496007e-05, |
|
"loss": 3.534, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.613751394337661e-05, |
|
"loss": 3.5366, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.604266104125722e-05, |
|
"loss": 3.5364, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5947808139137825e-05, |
|
"loss": 3.5398, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5852955237018432e-05, |
|
"loss": 3.5298, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.3846720031995081, |
|
"eval_loss": 3.455994129180908, |
|
"eval_runtime": 5033.5621, |
|
"eval_samples_per_second": 88.163, |
|
"eval_steps_per_second": 1.378, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.575810233489904e-05, |
|
"loss": 3.5337, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5663249432779646e-05, |
|
"loss": 3.5324, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5568396530660254e-05, |
|
"loss": 3.5307, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.547354362854086e-05, |
|
"loss": 3.5273, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5378690726421464e-05, |
|
"loss": 3.5301, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.528383782430207e-05, |
|
"loss": 3.5321, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.518898492218268e-05, |
|
"loss": 3.5283, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5094132020063287e-05, |
|
"loss": 3.534, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.4999279117943893e-05, |
|
"loss": 3.5346, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.4904426215824498e-05, |
|
"loss": 3.5234, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.4809573313705107e-05, |
|
"loss": 3.5243, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4714720411585714e-05, |
|
"loss": 3.5292, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.461986750946632e-05, |
|
"loss": 3.5241, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4525014607346927e-05, |
|
"loss": 3.5258, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4430161705227536e-05, |
|
"loss": 3.5241, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4335308803108141e-05, |
|
"loss": 3.5281, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4240455900988747e-05, |
|
"loss": 3.5271, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4145602998869354e-05, |
|
"loss": 3.5222, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.405075009674996e-05, |
|
"loss": 3.5197, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.3955897194630568e-05, |
|
"loss": 3.5287, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.38565153361158844, |
|
"eval_loss": 3.447903871536255, |
|
"eval_runtime": 5039.8049, |
|
"eval_samples_per_second": 88.054, |
|
"eval_steps_per_second": 1.376, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.3861044292511175e-05, |
|
"loss": 3.5309, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.376619139039178e-05, |
|
"loss": 3.5236, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3671338488272386e-05, |
|
"loss": 3.5268, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3576485586152995e-05, |
|
"loss": 3.5261, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3481632684033602e-05, |
|
"loss": 3.5242, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3386779781914207e-05, |
|
"loss": 3.5268, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3291926879794815e-05, |
|
"loss": 3.5302, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3197073977675423e-05, |
|
"loss": 3.525, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3102221075556029e-05, |
|
"loss": 3.5278, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3007368173436634e-05, |
|
"loss": 3.5221, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2912515271317241e-05, |
|
"loss": 3.5231, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.281766236919785e-05, |
|
"loss": 3.5237, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2722809467078456e-05, |
|
"loss": 3.5236, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2627956564959063e-05, |
|
"loss": 3.5201, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2533103662839668e-05, |
|
"loss": 3.5221, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2438250760720275e-05, |
|
"loss": 3.5216, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2343397858600881e-05, |
|
"loss": 3.5202, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.224854495648149e-05, |
|
"loss": 3.5207, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2153692054362095e-05, |
|
"loss": 3.5172, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2058839152242702e-05, |
|
"loss": 3.5187, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.3863054900513532, |
|
"eval_loss": 3.440758466720581, |
|
"eval_runtime": 5028.552, |
|
"eval_samples_per_second": 88.251, |
|
"eval_steps_per_second": 1.379, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.196398625012331e-05, |
|
"loss": 3.5153, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.1869133348003917e-05, |
|
"loss": 3.5164, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.1774280445884522e-05, |
|
"loss": 3.5225, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1679427543765129e-05, |
|
"loss": 3.5142, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1584574641645736e-05, |
|
"loss": 3.519, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1489721739526343e-05, |
|
"loss": 3.522, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1394868837406949e-05, |
|
"loss": 3.5142, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1300015935287558e-05, |
|
"loss": 3.5101, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1205163033168163e-05, |
|
"loss": 3.5151, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.111031013104877e-05, |
|
"loss": 3.5112, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1015457228929377e-05, |
|
"loss": 3.5161, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.0920604326809984e-05, |
|
"loss": 3.5135, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.082575142469059e-05, |
|
"loss": 3.5143, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0730898522571197e-05, |
|
"loss": 3.5134, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0636045620451804e-05, |
|
"loss": 3.5162, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.054119271833241e-05, |
|
"loss": 3.5202, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0446339816213017e-05, |
|
"loss": 3.507, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0351486914093624e-05, |
|
"loss": 3.5108, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0256634011974231e-05, |
|
"loss": 3.5144, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0161781109854836e-05, |
|
"loss": 3.5157, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.3870160062789933, |
|
"eval_loss": 3.4338622093200684, |
|
"eval_runtime": 5031.2535, |
|
"eval_samples_per_second": 88.203, |
|
"eval_steps_per_second": 1.378, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0066928207735445e-05, |
|
"loss": 3.5167, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.97207530561605e-06, |
|
"loss": 3.5107, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.877222403496658e-06, |
|
"loss": 3.5114, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.782369501377265e-06, |
|
"loss": 3.5121, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.687516599257872e-06, |
|
"loss": 3.5084, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.592663697138478e-06, |
|
"loss": 3.5178, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.497810795019085e-06, |
|
"loss": 3.5076, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.402957892899692e-06, |
|
"loss": 3.5102, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.308104990780299e-06, |
|
"loss": 3.5069, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.213252088660904e-06, |
|
"loss": 3.5108, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.118399186541513e-06, |
|
"loss": 3.5087, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.023546284422119e-06, |
|
"loss": 3.5041, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.928693382302724e-06, |
|
"loss": 3.5125, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.833840480183333e-06, |
|
"loss": 3.5084, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.738987578063938e-06, |
|
"loss": 3.5034, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.644134675944546e-06, |
|
"loss": 3.5077, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.549281773825151e-06, |
|
"loss": 3.505, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.45442887170576e-06, |
|
"loss": 3.5072, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.359575969586365e-06, |
|
"loss": 3.5, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.264723067466972e-06, |
|
"loss": 3.5042, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.3876448779010485, |
|
"eval_loss": 3.4285898208618164, |
|
"eval_runtime": 5039.4169, |
|
"eval_samples_per_second": 88.06, |
|
"eval_steps_per_second": 1.376, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.16987016534758e-06, |
|
"loss": 3.5075, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.075017263228187e-06, |
|
"loss": 3.5056, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.980164361108792e-06, |
|
"loss": 3.5089, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.8853114589894e-06, |
|
"loss": 3.5114, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.790458556870006e-06, |
|
"loss": 3.5076, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.695605654750614e-06, |
|
"loss": 3.5053, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.60075275263122e-06, |
|
"loss": 3.5058, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.505899850511827e-06, |
|
"loss": 3.5078, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.411046948392433e-06, |
|
"loss": 3.5084, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.31619404627304e-06, |
|
"loss": 3.5038, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.221341144153647e-06, |
|
"loss": 3.5015, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.126488242034253e-06, |
|
"loss": 3.5034, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.03163533991486e-06, |
|
"loss": 3.5098, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.936782437795466e-06, |
|
"loss": 3.5001, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.841929535676074e-06, |
|
"loss": 3.5055, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.74707663355668e-06, |
|
"loss": 3.5018, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.652223731437288e-06, |
|
"loss": 3.503, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.557370829317894e-06, |
|
"loss": 3.503, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.462517927198501e-06, |
|
"loss": 3.498, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.3676650250791075e-06, |
|
"loss": 3.5033, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.38827686860475785, |
|
"eval_loss": 3.422902822494507, |
|
"eval_runtime": 5026.5934, |
|
"eval_samples_per_second": 88.285, |
|
"eval_steps_per_second": 1.379, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.272812122959715e-06, |
|
"loss": 3.5024, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.177959220840321e-06, |
|
"loss": 3.4986, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.083106318720928e-06, |
|
"loss": 3.5025, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.988253416601534e-06, |
|
"loss": 3.5065, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.8934005144821415e-06, |
|
"loss": 3.5047, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.798547612362748e-06, |
|
"loss": 3.502, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.703694710243355e-06, |
|
"loss": 3.5061, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.608841808123961e-06, |
|
"loss": 3.5036, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.513988906004568e-06, |
|
"loss": 3.4966, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.4191360038851754e-06, |
|
"loss": 3.5048, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.324283101765782e-06, |
|
"loss": 3.5024, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.229430199646389e-06, |
|
"loss": 3.495, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.134577297526995e-06, |
|
"loss": 3.5035, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.039724395407602e-06, |
|
"loss": 3.5026, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.9448714932882094e-06, |
|
"loss": 3.4997, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.850018591168816e-06, |
|
"loss": 3.5035, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.755165689049423e-06, |
|
"loss": 3.4975, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.660312786930029e-06, |
|
"loss": 3.5003, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.5654598848106354e-06, |
|
"loss": 3.4994, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.4706069826912426e-06, |
|
"loss": 3.501, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_accuracy": 0.38875705078485445, |
|
"eval_loss": 3.4187748432159424, |
|
"eval_runtime": 5045.601, |
|
"eval_samples_per_second": 87.952, |
|
"eval_steps_per_second": 1.374, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.375754080571849e-06, |
|
"loss": 3.4917, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.280901178452456e-06, |
|
"loss": 3.4986, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.186048276333063e-06, |
|
"loss": 3.496, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.091195374213669e-06, |
|
"loss": 3.4978, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.9963424720942765e-06, |
|
"loss": 3.491, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.901489569974883e-06, |
|
"loss": 3.4953, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.80663666785549e-06, |
|
"loss": 3.4962, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.7117837657360967e-06, |
|
"loss": 3.4982, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.6169308636167034e-06, |
|
"loss": 3.4973, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.52207796149731e-06, |
|
"loss": 3.4922, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.4272250593779172e-06, |
|
"loss": 3.4921, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.332372157258524e-06, |
|
"loss": 3.5039, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.2375192551391307e-06, |
|
"loss": 3.5025, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.1426663530197374e-06, |
|
"loss": 3.4964, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.0478134509003437e-06, |
|
"loss": 3.4936, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.952960548780951e-06, |
|
"loss": 3.4959, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.8581076466615575e-06, |
|
"loss": 3.4963, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.7632547445421642e-06, |
|
"loss": 3.5014, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.668401842422771e-06, |
|
"loss": 3.4948, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.5735489403033776e-06, |
|
"loss": 3.4946, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.38918558969561506, |
|
"eval_loss": 3.4148616790771484, |
|
"eval_runtime": 5028.0262, |
|
"eval_samples_per_second": 88.26, |
|
"eval_steps_per_second": 1.379, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.4786960381839844e-06, |
|
"loss": 3.4959, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.383843136064591e-06, |
|
"loss": 3.4955, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.2889902339451978e-06, |
|
"loss": 3.4869, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1941373318258045e-06, |
|
"loss": 3.4975, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.099284429706411e-06, |
|
"loss": 3.4871, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0044315275870183e-06, |
|
"loss": 3.4889, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.909578625467625e-06, |
|
"loss": 3.4928, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.8147257233482318e-06, |
|
"loss": 3.4921, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.7198728212288385e-06, |
|
"loss": 3.494, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.625019919109445e-06, |
|
"loss": 3.4918, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5301670169900519e-06, |
|
"loss": 3.4919, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4353141148706586e-06, |
|
"loss": 3.4907, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3404612127512653e-06, |
|
"loss": 3.4937, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2456083106318722e-06, |
|
"loss": 3.4893, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.150755408512479e-06, |
|
"loss": 3.4944, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0559025063930857e-06, |
|
"loss": 3.4914, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.610496042736924e-07, |
|
"loss": 3.4947, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.661967021542991e-07, |
|
"loss": 3.4916, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.713438000349059e-07, |
|
"loss": 3.4884, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.764908979155126e-07, |
|
"loss": 3.4971, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.3894276158938788, |
|
"eval_loss": 3.412609100341797, |
|
"eval_runtime": 5032.8837, |
|
"eval_samples_per_second": 88.175, |
|
"eval_steps_per_second": 1.378, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.816379957961194e-07, |
|
"loss": 3.4929, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.867850936767261e-07, |
|
"loss": 3.4937, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.919321915573329e-07, |
|
"loss": 3.4859, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.9707928943793967e-07, |
|
"loss": 3.4796, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.022263873185464e-07, |
|
"loss": 3.4871, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0737348519915316e-07, |
|
"loss": 3.4949, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.252058307975991e-08, |
|
"loss": 3.4884, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 263566, |
|
"total_flos": 4.407529912270848e+18, |
|
"train_loss": 3.6950655784551065, |
|
"train_runtime": 269467.2599, |
|
"train_samples_per_second": 31.299, |
|
"train_steps_per_second": 0.978 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 263566, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10000, |
|
"total_flos": 4.407529912270848e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|