Wayne_NLP_mT5 / trainer_state.json
Doogie's picture
Upload trainer_state.json
854eb73
raw
history blame
50.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.999549121276575,
"global_step": 204000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2e-05,
"loss": 4.2154,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 1.9950871056174036e-05,
"loss": 3.5237,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 1.990174211234807e-05,
"loss": 3.3401,
"step": 1500
},
{
"epoch": 0.02,
"learning_rate": 1.9852613168522107e-05,
"loss": 3.2179,
"step": 2000
},
{
"epoch": 0.02,
"learning_rate": 1.980348422469614e-05,
"loss": 3.1655,
"step": 2500
},
{
"epoch": 0.03,
"learning_rate": 1.9754355280870174e-05,
"loss": 3.1335,
"step": 3000
},
{
"epoch": 0.03,
"learning_rate": 1.9705226337044208e-05,
"loss": 3.1039,
"step": 3500
},
{
"epoch": 0.04,
"learning_rate": 1.9656097393218242e-05,
"loss": 3.0406,
"step": 4000
},
{
"epoch": 0.04,
"learning_rate": 1.9606968449392276e-05,
"loss": 3.0212,
"step": 4500
},
{
"epoch": 0.05,
"learning_rate": 1.9557839505566313e-05,
"loss": 3.0054,
"step": 5000
},
{
"epoch": 0.05,
"learning_rate": 1.9508710561740344e-05,
"loss": 2.9778,
"step": 5500
},
{
"epoch": 0.06,
"learning_rate": 1.945958161791438e-05,
"loss": 2.9428,
"step": 6000
},
{
"epoch": 0.06,
"learning_rate": 1.9410452674088415e-05,
"loss": 2.9546,
"step": 6500
},
{
"epoch": 0.07,
"learning_rate": 1.936132373026245e-05,
"loss": 2.9442,
"step": 7000
},
{
"epoch": 0.07,
"learning_rate": 1.9312194786436483e-05,
"loss": 2.9142,
"step": 7500
},
{
"epoch": 0.08,
"learning_rate": 1.9263065842610517e-05,
"loss": 2.8991,
"step": 8000
},
{
"epoch": 0.08,
"learning_rate": 1.921393689878455e-05,
"loss": 2.8399,
"step": 8500
},
{
"epoch": 0.09,
"learning_rate": 1.9164807954958588e-05,
"loss": 2.8758,
"step": 9000
},
{
"epoch": 0.09,
"learning_rate": 1.9115679011132618e-05,
"loss": 2.8876,
"step": 9500
},
{
"epoch": 0.1,
"learning_rate": 1.9066550067306656e-05,
"loss": 2.8404,
"step": 10000
},
{
"epoch": 0.1,
"learning_rate": 1.901742112348069e-05,
"loss": 2.8612,
"step": 10500
},
{
"epoch": 0.11,
"learning_rate": 1.8968292179654723e-05,
"loss": 2.8226,
"step": 11000
},
{
"epoch": 0.11,
"learning_rate": 1.8919163235828757e-05,
"loss": 2.8144,
"step": 11500
},
{
"epoch": 0.12,
"learning_rate": 1.887003429200279e-05,
"loss": 2.8132,
"step": 12000
},
{
"epoch": 0.12,
"learning_rate": 1.8820905348176825e-05,
"loss": 2.8182,
"step": 12500
},
{
"epoch": 0.13,
"learning_rate": 1.8771776404350862e-05,
"loss": 2.7883,
"step": 13000
},
{
"epoch": 0.13,
"learning_rate": 1.8722647460524893e-05,
"loss": 2.7961,
"step": 13500
},
{
"epoch": 0.14,
"learning_rate": 1.867351851669893e-05,
"loss": 2.7605,
"step": 14000
},
{
"epoch": 0.14,
"learning_rate": 1.8624389572872964e-05,
"loss": 2.7576,
"step": 14500
},
{
"epoch": 0.15,
"learning_rate": 1.8575260629046998e-05,
"loss": 2.7763,
"step": 15000
},
{
"epoch": 0.15,
"learning_rate": 1.852613168522103e-05,
"loss": 2.7519,
"step": 15500
},
{
"epoch": 0.16,
"learning_rate": 1.8477002741395065e-05,
"loss": 2.7704,
"step": 16000
},
{
"epoch": 0.16,
"learning_rate": 1.8427873797569103e-05,
"loss": 2.6919,
"step": 16500
},
{
"epoch": 0.17,
"learning_rate": 1.8378744853743137e-05,
"loss": 2.7287,
"step": 17000
},
{
"epoch": 0.17,
"learning_rate": 1.832961590991717e-05,
"loss": 2.7121,
"step": 17500
},
{
"epoch": 0.18,
"learning_rate": 1.8280486966091204e-05,
"loss": 2.7137,
"step": 18000
},
{
"epoch": 0.18,
"learning_rate": 1.8231358022265238e-05,
"loss": 2.7361,
"step": 18500
},
{
"epoch": 0.19,
"learning_rate": 1.8182229078439272e-05,
"loss": 2.7176,
"step": 19000
},
{
"epoch": 0.19,
"learning_rate": 1.813310013461331e-05,
"loss": 2.7055,
"step": 19500
},
{
"epoch": 0.2,
"learning_rate": 1.8083971190787343e-05,
"loss": 2.7325,
"step": 20000
},
{
"epoch": 0.2,
"learning_rate": 1.8034842246961377e-05,
"loss": 2.7259,
"step": 20500
},
{
"epoch": 0.21,
"learning_rate": 1.798571330313541e-05,
"loss": 2.6834,
"step": 21000
},
{
"epoch": 0.21,
"learning_rate": 1.7936584359309445e-05,
"loss": 2.6681,
"step": 21500
},
{
"epoch": 0.22,
"learning_rate": 1.788745541548348e-05,
"loss": 2.6709,
"step": 22000
},
{
"epoch": 0.22,
"learning_rate": 1.7838326471657516e-05,
"loss": 2.673,
"step": 22500
},
{
"epoch": 0.23,
"learning_rate": 1.7789197527831547e-05,
"loss": 2.6801,
"step": 23000
},
{
"epoch": 0.23,
"learning_rate": 1.7740068584005584e-05,
"loss": 2.6635,
"step": 23500
},
{
"epoch": 0.24,
"learning_rate": 1.7690939640179618e-05,
"loss": 2.6699,
"step": 24000
},
{
"epoch": 0.24,
"learning_rate": 1.764181069635365e-05,
"loss": 2.6431,
"step": 24500
},
{
"epoch": 0.25,
"learning_rate": 1.7592681752527685e-05,
"loss": 2.6895,
"step": 25000
},
{
"epoch": 0.25,
"learning_rate": 1.754355280870172e-05,
"loss": 2.6452,
"step": 25500
},
{
"epoch": 0.25,
"learning_rate": 1.7494423864875753e-05,
"loss": 2.6619,
"step": 26000
},
{
"epoch": 0.26,
"learning_rate": 1.744529492104979e-05,
"loss": 2.6232,
"step": 26500
},
{
"epoch": 0.26,
"learning_rate": 1.739616597722382e-05,
"loss": 2.663,
"step": 27000
},
{
"epoch": 0.27,
"learning_rate": 1.7347037033397858e-05,
"loss": 2.6151,
"step": 27500
},
{
"epoch": 0.27,
"learning_rate": 1.7297908089571892e-05,
"loss": 2.6166,
"step": 28000
},
{
"epoch": 0.28,
"learning_rate": 1.7248779145745926e-05,
"loss": 2.6394,
"step": 28500
},
{
"epoch": 0.28,
"learning_rate": 1.719965020191996e-05,
"loss": 2.6242,
"step": 29000
},
{
"epoch": 0.29,
"learning_rate": 1.7150521258093994e-05,
"loss": 2.6205,
"step": 29500
},
{
"epoch": 0.29,
"learning_rate": 1.7101392314268028e-05,
"loss": 2.6596,
"step": 30000
},
{
"epoch": 0.3,
"learning_rate": 1.7052263370442065e-05,
"loss": 2.6469,
"step": 30500
},
{
"epoch": 0.3,
"learning_rate": 1.7003134426616095e-05,
"loss": 2.6189,
"step": 31000
},
{
"epoch": 0.31,
"learning_rate": 1.6954005482790133e-05,
"loss": 2.6035,
"step": 31500
},
{
"epoch": 0.31,
"learning_rate": 1.6904876538964167e-05,
"loss": 2.5861,
"step": 32000
},
{
"epoch": 0.32,
"learning_rate": 1.68557475951382e-05,
"loss": 2.6504,
"step": 32500
},
{
"epoch": 0.32,
"learning_rate": 1.6806618651312238e-05,
"loss": 2.6006,
"step": 33000
},
{
"epoch": 0.33,
"learning_rate": 1.6757489707486268e-05,
"loss": 2.6398,
"step": 33500
},
{
"epoch": 0.33,
"learning_rate": 1.6708360763660305e-05,
"loss": 2.6024,
"step": 34000
},
{
"epoch": 0.34,
"learning_rate": 1.665923181983434e-05,
"loss": 2.6039,
"step": 34500
},
{
"epoch": 0.34,
"learning_rate": 1.6610102876008373e-05,
"loss": 2.6224,
"step": 35000
},
{
"epoch": 0.35,
"learning_rate": 1.6560973932182407e-05,
"loss": 2.5644,
"step": 35500
},
{
"epoch": 0.35,
"learning_rate": 1.651184498835644e-05,
"loss": 2.5826,
"step": 36000
},
{
"epoch": 0.36,
"learning_rate": 1.6462716044530475e-05,
"loss": 2.585,
"step": 36500
},
{
"epoch": 0.36,
"learning_rate": 1.6413587100704512e-05,
"loss": 2.5589,
"step": 37000
},
{
"epoch": 0.37,
"learning_rate": 1.6364458156878546e-05,
"loss": 2.6029,
"step": 37500
},
{
"epoch": 0.37,
"learning_rate": 1.631532921305258e-05,
"loss": 2.554,
"step": 38000
},
{
"epoch": 0.38,
"learning_rate": 1.6266200269226614e-05,
"loss": 2.5599,
"step": 38500
},
{
"epoch": 0.38,
"learning_rate": 1.6217071325400648e-05,
"loss": 2.5285,
"step": 39000
},
{
"epoch": 0.39,
"learning_rate": 1.616794238157468e-05,
"loss": 2.5684,
"step": 39500
},
{
"epoch": 0.39,
"learning_rate": 1.611881343774872e-05,
"loss": 2.5816,
"step": 40000
},
{
"epoch": 0.4,
"learning_rate": 1.606968449392275e-05,
"loss": 2.566,
"step": 40500
},
{
"epoch": 0.4,
"learning_rate": 1.6020555550096787e-05,
"loss": 2.5244,
"step": 41000
},
{
"epoch": 0.41,
"learning_rate": 1.597142660627082e-05,
"loss": 2.5571,
"step": 41500
},
{
"epoch": 0.41,
"learning_rate": 1.5922297662444854e-05,
"loss": 2.5071,
"step": 42000
},
{
"epoch": 0.42,
"learning_rate": 1.5873168718618888e-05,
"loss": 2.5497,
"step": 42500
},
{
"epoch": 0.42,
"learning_rate": 1.5824039774792922e-05,
"loss": 2.5566,
"step": 43000
},
{
"epoch": 0.43,
"learning_rate": 1.5774910830966956e-05,
"loss": 2.5448,
"step": 43500
},
{
"epoch": 0.43,
"learning_rate": 1.5725781887140993e-05,
"loss": 2.5649,
"step": 44000
},
{
"epoch": 0.44,
"learning_rate": 1.5676652943315024e-05,
"loss": 2.5562,
"step": 44500
},
{
"epoch": 0.44,
"learning_rate": 1.562752399948906e-05,
"loss": 2.5756,
"step": 45000
},
{
"epoch": 0.45,
"learning_rate": 1.5578395055663095e-05,
"loss": 2.5357,
"step": 45500
},
{
"epoch": 0.45,
"learning_rate": 1.552926611183713e-05,
"loss": 2.5394,
"step": 46000
},
{
"epoch": 0.46,
"learning_rate": 1.5480137168011163e-05,
"loss": 2.5416,
"step": 46500
},
{
"epoch": 0.46,
"learning_rate": 1.5431008224185196e-05,
"loss": 2.523,
"step": 47000
},
{
"epoch": 0.47,
"learning_rate": 1.5381879280359234e-05,
"loss": 2.4957,
"step": 47500
},
{
"epoch": 0.47,
"learning_rate": 1.5332750336533268e-05,
"loss": 2.5392,
"step": 48000
},
{
"epoch": 0.48,
"learning_rate": 1.52836213927073e-05,
"loss": 2.5505,
"step": 48500
},
{
"epoch": 0.48,
"learning_rate": 1.5234492448881335e-05,
"loss": 2.5251,
"step": 49000
},
{
"epoch": 0.49,
"learning_rate": 1.5185363505055371e-05,
"loss": 2.4836,
"step": 49500
},
{
"epoch": 0.49,
"learning_rate": 1.5136234561229403e-05,
"loss": 2.5077,
"step": 50000
},
{
"epoch": 0.49,
"learning_rate": 1.5087105617403439e-05,
"loss": 2.5103,
"step": 50500
},
{
"epoch": 0.5,
"learning_rate": 1.5037976673577473e-05,
"loss": 2.5157,
"step": 51000
},
{
"epoch": 0.5,
"learning_rate": 1.4988847729751508e-05,
"loss": 2.509,
"step": 51500
},
{
"epoch": 0.51,
"learning_rate": 1.493971878592554e-05,
"loss": 2.4982,
"step": 52000
},
{
"epoch": 0.51,
"learning_rate": 1.4890589842099576e-05,
"loss": 2.4741,
"step": 52500
},
{
"epoch": 0.52,
"learning_rate": 1.484146089827361e-05,
"loss": 2.5438,
"step": 53000
},
{
"epoch": 0.52,
"learning_rate": 1.4792331954447645e-05,
"loss": 2.4698,
"step": 53500
},
{
"epoch": 0.53,
"learning_rate": 1.4743203010621678e-05,
"loss": 2.5175,
"step": 54000
},
{
"epoch": 0.53,
"learning_rate": 1.4694074066795713e-05,
"loss": 2.4857,
"step": 54500
},
{
"epoch": 0.54,
"learning_rate": 1.4644945122969747e-05,
"loss": 2.5006,
"step": 55000
},
{
"epoch": 0.54,
"learning_rate": 1.4595816179143783e-05,
"loss": 2.4875,
"step": 55500
},
{
"epoch": 0.55,
"learning_rate": 1.4546687235317816e-05,
"loss": 2.4964,
"step": 56000
},
{
"epoch": 0.55,
"learning_rate": 1.449755829149185e-05,
"loss": 2.5175,
"step": 56500
},
{
"epoch": 0.56,
"learning_rate": 1.4448429347665884e-05,
"loss": 2.4912,
"step": 57000
},
{
"epoch": 0.56,
"learning_rate": 1.439930040383992e-05,
"loss": 2.5074,
"step": 57500
},
{
"epoch": 0.57,
"learning_rate": 1.4350171460013954e-05,
"loss": 2.4655,
"step": 58000
},
{
"epoch": 0.57,
"learning_rate": 1.430104251618799e-05,
"loss": 2.4985,
"step": 58500
},
{
"epoch": 0.58,
"learning_rate": 1.4251913572362021e-05,
"loss": 2.4791,
"step": 59000
},
{
"epoch": 0.58,
"learning_rate": 1.4202784628536057e-05,
"loss": 2.4881,
"step": 59500
},
{
"epoch": 0.59,
"learning_rate": 1.4153655684710091e-05,
"loss": 2.4805,
"step": 60000
},
{
"epoch": 0.59,
"learning_rate": 1.4104526740884126e-05,
"loss": 2.4591,
"step": 60500
},
{
"epoch": 0.6,
"learning_rate": 1.4055397797058159e-05,
"loss": 2.4958,
"step": 61000
},
{
"epoch": 0.6,
"learning_rate": 1.4006268853232194e-05,
"loss": 2.4691,
"step": 61500
},
{
"epoch": 0.61,
"learning_rate": 1.3957139909406228e-05,
"loss": 2.5049,
"step": 62000
},
{
"epoch": 0.61,
"learning_rate": 1.3908010965580264e-05,
"loss": 2.4584,
"step": 62500
},
{
"epoch": 0.62,
"learning_rate": 1.38588820217543e-05,
"loss": 2.4761,
"step": 63000
},
{
"epoch": 0.62,
"learning_rate": 1.3809753077928331e-05,
"loss": 2.4677,
"step": 63500
},
{
"epoch": 0.63,
"learning_rate": 1.3760624134102367e-05,
"loss": 2.4868,
"step": 64000
},
{
"epoch": 0.63,
"learning_rate": 1.3711495190276401e-05,
"loss": 2.4739,
"step": 64500
},
{
"epoch": 0.64,
"learning_rate": 1.3662366246450436e-05,
"loss": 2.4558,
"step": 65000
},
{
"epoch": 0.64,
"learning_rate": 1.3613237302624469e-05,
"loss": 2.4528,
"step": 65500
},
{
"epoch": 0.65,
"learning_rate": 1.3564108358798504e-05,
"loss": 2.4608,
"step": 66000
},
{
"epoch": 0.65,
"learning_rate": 1.3514979414972538e-05,
"loss": 2.4461,
"step": 66500
},
{
"epoch": 0.66,
"learning_rate": 1.3465850471146574e-05,
"loss": 2.4382,
"step": 67000
},
{
"epoch": 0.66,
"learning_rate": 1.3416721527320606e-05,
"loss": 2.4554,
"step": 67500
},
{
"epoch": 0.67,
"learning_rate": 1.3367592583494641e-05,
"loss": 2.4488,
"step": 68000
},
{
"epoch": 0.67,
"learning_rate": 1.3318463639668675e-05,
"loss": 2.4594,
"step": 68500
},
{
"epoch": 0.68,
"learning_rate": 1.3269334695842711e-05,
"loss": 2.4583,
"step": 69000
},
{
"epoch": 0.68,
"learning_rate": 1.3220205752016743e-05,
"loss": 2.4392,
"step": 69500
},
{
"epoch": 0.69,
"learning_rate": 1.3171076808190779e-05,
"loss": 2.4366,
"step": 70000
},
{
"epoch": 0.69,
"learning_rate": 1.3121947864364813e-05,
"loss": 2.4437,
"step": 70500
},
{
"epoch": 0.7,
"learning_rate": 1.3072818920538848e-05,
"loss": 2.4635,
"step": 71000
},
{
"epoch": 0.7,
"learning_rate": 1.302368997671288e-05,
"loss": 2.435,
"step": 71500
},
{
"epoch": 0.71,
"learning_rate": 1.2974561032886916e-05,
"loss": 2.4333,
"step": 72000
},
{
"epoch": 0.71,
"learning_rate": 1.292543208906095e-05,
"loss": 2.4678,
"step": 72500
},
{
"epoch": 0.72,
"learning_rate": 1.2876303145234985e-05,
"loss": 2.4538,
"step": 73000
},
{
"epoch": 0.72,
"learning_rate": 1.282717420140902e-05,
"loss": 2.4561,
"step": 73500
},
{
"epoch": 0.73,
"learning_rate": 1.2778045257583053e-05,
"loss": 2.5159,
"step": 74000
},
{
"epoch": 0.73,
"learning_rate": 1.2728916313757087e-05,
"loss": 2.4392,
"step": 74500
},
{
"epoch": 0.74,
"learning_rate": 1.2679787369931123e-05,
"loss": 2.4503,
"step": 75000
},
{
"epoch": 0.74,
"learning_rate": 1.2630658426105156e-05,
"loss": 2.4428,
"step": 75500
},
{
"epoch": 0.74,
"learning_rate": 1.2581529482279192e-05,
"loss": 2.426,
"step": 76000
},
{
"epoch": 0.75,
"learning_rate": 1.2532400538453224e-05,
"loss": 2.4809,
"step": 76500
},
{
"epoch": 0.75,
"learning_rate": 1.248327159462726e-05,
"loss": 2.4687,
"step": 77000
},
{
"epoch": 0.76,
"learning_rate": 1.2434142650801294e-05,
"loss": 2.4306,
"step": 77500
},
{
"epoch": 0.76,
"learning_rate": 1.238501370697533e-05,
"loss": 2.4455,
"step": 78000
},
{
"epoch": 0.77,
"learning_rate": 1.2335884763149363e-05,
"loss": 2.3706,
"step": 78500
},
{
"epoch": 0.77,
"learning_rate": 1.2286755819323397e-05,
"loss": 2.4224,
"step": 79000
},
{
"epoch": 0.78,
"learning_rate": 1.2237626875497433e-05,
"loss": 2.4242,
"step": 79500
},
{
"epoch": 0.78,
"learning_rate": 1.2188497931671466e-05,
"loss": 2.429,
"step": 80000
},
{
"epoch": 0.79,
"learning_rate": 1.2139368987845502e-05,
"loss": 2.4247,
"step": 80500
},
{
"epoch": 0.79,
"learning_rate": 1.2090240044019534e-05,
"loss": 2.4195,
"step": 81000
},
{
"epoch": 0.8,
"learning_rate": 1.204111110019357e-05,
"loss": 2.4293,
"step": 81500
},
{
"epoch": 0.8,
"learning_rate": 1.1991982156367604e-05,
"loss": 2.4245,
"step": 82000
},
{
"epoch": 0.81,
"learning_rate": 1.194285321254164e-05,
"loss": 2.4415,
"step": 82500
},
{
"epoch": 0.81,
"learning_rate": 1.1893724268715671e-05,
"loss": 2.4199,
"step": 83000
},
{
"epoch": 0.82,
"learning_rate": 1.1844595324889707e-05,
"loss": 2.4386,
"step": 83500
},
{
"epoch": 0.82,
"learning_rate": 1.179546638106374e-05,
"loss": 2.4063,
"step": 84000
},
{
"epoch": 0.83,
"learning_rate": 1.1746337437237776e-05,
"loss": 2.4376,
"step": 84500
},
{
"epoch": 0.83,
"learning_rate": 1.1697208493411809e-05,
"loss": 2.4177,
"step": 85000
},
{
"epoch": 0.84,
"learning_rate": 1.1648079549585844e-05,
"loss": 2.3778,
"step": 85500
},
{
"epoch": 0.84,
"learning_rate": 1.1598950605759878e-05,
"loss": 2.4355,
"step": 86000
},
{
"epoch": 0.85,
"learning_rate": 1.1549821661933914e-05,
"loss": 2.4214,
"step": 86500
},
{
"epoch": 0.85,
"learning_rate": 1.1500692718107946e-05,
"loss": 2.3941,
"step": 87000
},
{
"epoch": 0.86,
"learning_rate": 1.1451563774281981e-05,
"loss": 2.4122,
"step": 87500
},
{
"epoch": 0.86,
"learning_rate": 1.1402434830456015e-05,
"loss": 2.4021,
"step": 88000
},
{
"epoch": 0.87,
"learning_rate": 1.135330588663005e-05,
"loss": 2.4154,
"step": 88500
},
{
"epoch": 0.87,
"learning_rate": 1.1304176942804083e-05,
"loss": 2.4004,
"step": 89000
},
{
"epoch": 0.88,
"learning_rate": 1.1255047998978119e-05,
"loss": 2.4091,
"step": 89500
},
{
"epoch": 0.88,
"learning_rate": 1.1205919055152152e-05,
"loss": 2.4348,
"step": 90000
},
{
"epoch": 0.89,
"learning_rate": 1.1156790111326188e-05,
"loss": 2.3965,
"step": 90500
},
{
"epoch": 0.89,
"learning_rate": 1.1107661167500222e-05,
"loss": 2.3904,
"step": 91000
},
{
"epoch": 0.9,
"learning_rate": 1.1058532223674256e-05,
"loss": 2.3947,
"step": 91500
},
{
"epoch": 0.9,
"learning_rate": 1.100940327984829e-05,
"loss": 2.4075,
"step": 92000
},
{
"epoch": 0.91,
"learning_rate": 1.0960274336022325e-05,
"loss": 2.3987,
"step": 92500
},
{
"epoch": 0.91,
"learning_rate": 1.0911145392196359e-05,
"loss": 2.4116,
"step": 93000
},
{
"epoch": 0.92,
"learning_rate": 1.0862016448370393e-05,
"loss": 2.4416,
"step": 93500
},
{
"epoch": 0.92,
"learning_rate": 1.0812887504544429e-05,
"loss": 2.3899,
"step": 94000
},
{
"epoch": 0.93,
"learning_rate": 1.0763758560718462e-05,
"loss": 2.4015,
"step": 94500
},
{
"epoch": 0.93,
"learning_rate": 1.0714629616892498e-05,
"loss": 2.3741,
"step": 95000
},
{
"epoch": 0.94,
"learning_rate": 1.0665500673066532e-05,
"loss": 2.3951,
"step": 95500
},
{
"epoch": 0.94,
"learning_rate": 1.0616371729240566e-05,
"loss": 2.406,
"step": 96000
},
{
"epoch": 0.95,
"learning_rate": 1.05672427854146e-05,
"loss": 2.4102,
"step": 96500
},
{
"epoch": 0.95,
"learning_rate": 1.0518113841588635e-05,
"loss": 2.4031,
"step": 97000
},
{
"epoch": 0.96,
"learning_rate": 1.0468984897762669e-05,
"loss": 2.417,
"step": 97500
},
{
"epoch": 0.96,
"learning_rate": 1.0419855953936705e-05,
"loss": 2.3978,
"step": 98000
},
{
"epoch": 0.97,
"learning_rate": 1.0370727010110737e-05,
"loss": 2.4009,
"step": 98500
},
{
"epoch": 0.97,
"learning_rate": 1.0321598066284772e-05,
"loss": 2.3966,
"step": 99000
},
{
"epoch": 0.98,
"learning_rate": 1.0272469122458806e-05,
"loss": 2.3918,
"step": 99500
},
{
"epoch": 0.98,
"learning_rate": 1.0223340178632842e-05,
"loss": 2.3853,
"step": 100000
},
{
"epoch": 0.99,
"learning_rate": 1.0174211234806874e-05,
"loss": 2.3996,
"step": 100500
},
{
"epoch": 0.99,
"learning_rate": 1.012508229098091e-05,
"loss": 2.388,
"step": 101000
},
{
"epoch": 0.99,
"learning_rate": 1.0075953347154944e-05,
"loss": 2.3845,
"step": 101500
},
{
"epoch": 1.0,
"learning_rate": 1.0026824403328979e-05,
"loss": 2.385,
"step": 102000
},
{
"epoch": 1.0,
"learning_rate": 9.977695459503013e-06,
"loss": 2.3387,
"step": 102500
},
{
"epoch": 1.01,
"learning_rate": 9.928566515677047e-06,
"loss": 2.3401,
"step": 103000
},
{
"epoch": 1.01,
"learning_rate": 9.87943757185108e-06,
"loss": 2.3291,
"step": 103500
},
{
"epoch": 1.02,
"learning_rate": 9.830308628025116e-06,
"loss": 2.3783,
"step": 104000
},
{
"epoch": 1.02,
"learning_rate": 9.78117968419915e-06,
"loss": 2.3154,
"step": 104500
},
{
"epoch": 1.03,
"learning_rate": 9.732050740373184e-06,
"loss": 2.3229,
"step": 105000
},
{
"epoch": 1.03,
"learning_rate": 9.68292179654722e-06,
"loss": 2.3844,
"step": 105500
},
{
"epoch": 1.04,
"learning_rate": 9.633792852721254e-06,
"loss": 2.3945,
"step": 106000
},
{
"epoch": 1.04,
"learning_rate": 9.584663908895287e-06,
"loss": 2.3354,
"step": 106500
},
{
"epoch": 1.05,
"learning_rate": 9.535534965069321e-06,
"loss": 2.325,
"step": 107000
},
{
"epoch": 1.05,
"learning_rate": 9.486406021243357e-06,
"loss": 2.3722,
"step": 107500
},
{
"epoch": 1.06,
"learning_rate": 9.43727707741739e-06,
"loss": 2.37,
"step": 108000
},
{
"epoch": 1.06,
"learning_rate": 9.388148133591425e-06,
"loss": 2.3454,
"step": 108500
},
{
"epoch": 1.07,
"learning_rate": 9.339019189765458e-06,
"loss": 2.3118,
"step": 109000
},
{
"epoch": 1.07,
"learning_rate": 9.289890245939494e-06,
"loss": 2.34,
"step": 109500
},
{
"epoch": 1.08,
"learning_rate": 9.240761302113528e-06,
"loss": 2.3156,
"step": 110000
},
{
"epoch": 1.08,
"learning_rate": 9.191632358287562e-06,
"loss": 2.3686,
"step": 110500
},
{
"epoch": 1.09,
"learning_rate": 9.142503414461596e-06,
"loss": 2.3641,
"step": 111000
},
{
"epoch": 1.09,
"learning_rate": 9.093374470635631e-06,
"loss": 2.3399,
"step": 111500
},
{
"epoch": 1.1,
"learning_rate": 9.044245526809665e-06,
"loss": 2.3038,
"step": 112000
},
{
"epoch": 1.1,
"learning_rate": 8.995116582983699e-06,
"loss": 2.3394,
"step": 112500
},
{
"epoch": 1.11,
"learning_rate": 8.945987639157735e-06,
"loss": 2.3144,
"step": 113000
},
{
"epoch": 1.11,
"learning_rate": 8.896858695331768e-06,
"loss": 2.3542,
"step": 113500
},
{
"epoch": 1.12,
"learning_rate": 8.847729751505802e-06,
"loss": 2.3378,
"step": 114000
},
{
"epoch": 1.12,
"learning_rate": 8.798600807679836e-06,
"loss": 2.3562,
"step": 114500
},
{
"epoch": 1.13,
"learning_rate": 8.749471863853872e-06,
"loss": 2.3759,
"step": 115000
},
{
"epoch": 1.13,
"learning_rate": 8.700342920027906e-06,
"loss": 2.3459,
"step": 115500
},
{
"epoch": 1.14,
"learning_rate": 8.65121397620194e-06,
"loss": 2.3041,
"step": 116000
},
{
"epoch": 1.14,
"learning_rate": 8.602085032375973e-06,
"loss": 2.3076,
"step": 116500
},
{
"epoch": 1.15,
"learning_rate": 8.552956088550009e-06,
"loss": 2.3598,
"step": 117000
},
{
"epoch": 1.15,
"learning_rate": 8.503827144724045e-06,
"loss": 2.3463,
"step": 117500
},
{
"epoch": 1.16,
"learning_rate": 8.454698200898078e-06,
"loss": 2.3505,
"step": 118000
},
{
"epoch": 1.16,
"learning_rate": 8.405569257072112e-06,
"loss": 2.3106,
"step": 118500
},
{
"epoch": 1.17,
"learning_rate": 8.356440313246146e-06,
"loss": 2.3391,
"step": 119000
},
{
"epoch": 1.17,
"learning_rate": 8.307311369420182e-06,
"loss": 2.3316,
"step": 119500
},
{
"epoch": 1.18,
"learning_rate": 8.258182425594216e-06,
"loss": 2.3122,
"step": 120000
},
{
"epoch": 1.18,
"learning_rate": 8.20905348176825e-06,
"loss": 2.3244,
"step": 120500
},
{
"epoch": 1.19,
"learning_rate": 8.159924537942283e-06,
"loss": 2.3457,
"step": 121000
},
{
"epoch": 1.19,
"learning_rate": 8.110795594116319e-06,
"loss": 2.328,
"step": 121500
},
{
"epoch": 1.2,
"learning_rate": 8.061666650290353e-06,
"loss": 2.319,
"step": 122000
},
{
"epoch": 1.2,
"learning_rate": 8.012537706464387e-06,
"loss": 2.3168,
"step": 122500
},
{
"epoch": 1.21,
"learning_rate": 7.963408762638422e-06,
"loss": 2.3288,
"step": 123000
},
{
"epoch": 1.21,
"learning_rate": 7.914279818812456e-06,
"loss": 2.3169,
"step": 123500
},
{
"epoch": 1.22,
"learning_rate": 7.86515087498649e-06,
"loss": 2.3398,
"step": 124000
},
{
"epoch": 1.22,
"learning_rate": 7.816021931160524e-06,
"loss": 2.3182,
"step": 124500
},
{
"epoch": 1.23,
"learning_rate": 7.76689298733456e-06,
"loss": 2.3219,
"step": 125000
},
{
"epoch": 1.23,
"learning_rate": 7.717764043508593e-06,
"loss": 2.3407,
"step": 125500
},
{
"epoch": 1.24,
"learning_rate": 7.668635099682627e-06,
"loss": 2.2986,
"step": 126000
},
{
"epoch": 1.24,
"learning_rate": 7.619506155856662e-06,
"loss": 2.3192,
"step": 126500
},
{
"epoch": 1.24,
"learning_rate": 7.570377212030696e-06,
"loss": 2.3229,
"step": 127000
},
{
"epoch": 1.25,
"learning_rate": 7.521248268204731e-06,
"loss": 2.3073,
"step": 127500
},
{
"epoch": 1.25,
"learning_rate": 7.4721193243787646e-06,
"loss": 2.2936,
"step": 128000
},
{
"epoch": 1.26,
"learning_rate": 7.422990380552799e-06,
"loss": 2.3078,
"step": 128500
},
{
"epoch": 1.26,
"learning_rate": 7.373861436726833e-06,
"loss": 2.3507,
"step": 129000
},
{
"epoch": 1.27,
"learning_rate": 7.324732492900868e-06,
"loss": 2.3071,
"step": 129500
},
{
"epoch": 1.27,
"learning_rate": 7.275603549074902e-06,
"loss": 2.3076,
"step": 130000
},
{
"epoch": 1.28,
"learning_rate": 7.2264746052489365e-06,
"loss": 2.3321,
"step": 130500
},
{
"epoch": 1.28,
"learning_rate": 7.177345661422971e-06,
"loss": 2.3224,
"step": 131000
},
{
"epoch": 1.29,
"learning_rate": 7.128216717597005e-06,
"loss": 2.3083,
"step": 131500
},
{
"epoch": 1.29,
"learning_rate": 7.07908777377104e-06,
"loss": 2.3196,
"step": 132000
},
{
"epoch": 1.3,
"learning_rate": 7.0299588299450745e-06,
"loss": 2.3311,
"step": 132500
},
{
"epoch": 1.3,
"learning_rate": 6.980829886119109e-06,
"loss": 2.3505,
"step": 133000
},
{
"epoch": 1.31,
"learning_rate": 6.931700942293144e-06,
"loss": 2.3014,
"step": 133500
},
{
"epoch": 1.31,
"learning_rate": 6.882571998467178e-06,
"loss": 2.3001,
"step": 134000
},
{
"epoch": 1.32,
"learning_rate": 6.833443054641213e-06,
"loss": 2.3122,
"step": 134500
},
{
"epoch": 1.32,
"learning_rate": 6.7843141108152465e-06,
"loss": 2.3031,
"step": 135000
},
{
"epoch": 1.33,
"learning_rate": 6.735185166989281e-06,
"loss": 2.3426,
"step": 135500
},
{
"epoch": 1.33,
"learning_rate": 6.686056223163315e-06,
"loss": 2.3242,
"step": 136000
},
{
"epoch": 1.34,
"learning_rate": 6.63692727933735e-06,
"loss": 2.3312,
"step": 136500
},
{
"epoch": 1.34,
"learning_rate": 6.587798335511384e-06,
"loss": 2.3265,
"step": 137000
},
{
"epoch": 1.35,
"learning_rate": 6.5386693916854184e-06,
"loss": 2.3226,
"step": 137500
},
{
"epoch": 1.35,
"learning_rate": 6.489540447859452e-06,
"loss": 2.2682,
"step": 138000
},
{
"epoch": 1.36,
"learning_rate": 6.440411504033487e-06,
"loss": 2.2961,
"step": 138500
},
{
"epoch": 1.36,
"learning_rate": 6.391282560207521e-06,
"loss": 2.2935,
"step": 139000
},
{
"epoch": 1.37,
"learning_rate": 6.342153616381556e-06,
"loss": 2.324,
"step": 139500
},
{
"epoch": 1.37,
"learning_rate": 6.2930246725555895e-06,
"loss": 2.2955,
"step": 140000
},
{
"epoch": 1.38,
"learning_rate": 6.243895728729624e-06,
"loss": 2.292,
"step": 140500
},
{
"epoch": 1.38,
"learning_rate": 6.194766784903659e-06,
"loss": 2.3076,
"step": 141000
},
{
"epoch": 1.39,
"learning_rate": 6.145637841077693e-06,
"loss": 2.3151,
"step": 141500
},
{
"epoch": 1.39,
"learning_rate": 6.0965088972517276e-06,
"loss": 2.3051,
"step": 142000
},
{
"epoch": 1.4,
"learning_rate": 6.0473799534257615e-06,
"loss": 2.3247,
"step": 142500
},
{
"epoch": 1.4,
"learning_rate": 5.998251009599796e-06,
"loss": 2.2741,
"step": 143000
},
{
"epoch": 1.41,
"learning_rate": 5.94912206577383e-06,
"loss": 2.2937,
"step": 143500
},
{
"epoch": 1.41,
"learning_rate": 5.899993121947865e-06,
"loss": 2.2883,
"step": 144000
},
{
"epoch": 1.42,
"learning_rate": 5.850864178121899e-06,
"loss": 2.2985,
"step": 144500
},
{
"epoch": 1.42,
"learning_rate": 5.801735234295933e-06,
"loss": 2.3006,
"step": 145000
},
{
"epoch": 1.43,
"learning_rate": 5.752606290469967e-06,
"loss": 2.2663,
"step": 145500
},
{
"epoch": 1.43,
"learning_rate": 5.703477346644002e-06,
"loss": 2.3,
"step": 146000
},
{
"epoch": 1.44,
"learning_rate": 5.654348402818036e-06,
"loss": 2.2688,
"step": 146500
},
{
"epoch": 1.44,
"learning_rate": 5.605219458992071e-06,
"loss": 2.2915,
"step": 147000
},
{
"epoch": 1.45,
"learning_rate": 5.5560905151661045e-06,
"loss": 2.2897,
"step": 147500
},
{
"epoch": 1.45,
"learning_rate": 5.50696157134014e-06,
"loss": 2.3316,
"step": 148000
},
{
"epoch": 1.46,
"learning_rate": 5.457832627514175e-06,
"loss": 2.3386,
"step": 148500
},
{
"epoch": 1.46,
"learning_rate": 5.408703683688209e-06,
"loss": 2.3215,
"step": 149000
},
{
"epoch": 1.47,
"learning_rate": 5.359574739862243e-06,
"loss": 2.3012,
"step": 149500
},
{
"epoch": 1.47,
"learning_rate": 5.310445796036277e-06,
"loss": 2.2939,
"step": 150000
},
{
"epoch": 1.48,
"learning_rate": 5.261316852210312e-06,
"loss": 2.2933,
"step": 150500
},
{
"epoch": 1.48,
"learning_rate": 5.212187908384346e-06,
"loss": 2.3057,
"step": 151000
},
{
"epoch": 1.48,
"learning_rate": 5.163058964558381e-06,
"loss": 2.2883,
"step": 151500
},
{
"epoch": 1.49,
"learning_rate": 5.113930020732415e-06,
"loss": 2.3136,
"step": 152000
},
{
"epoch": 1.49,
"learning_rate": 5.064801076906449e-06,
"loss": 2.2841,
"step": 152500
},
{
"epoch": 1.5,
"learning_rate": 5.015672133080484e-06,
"loss": 2.2684,
"step": 153000
},
{
"epoch": 1.5,
"learning_rate": 4.966543189254518e-06,
"loss": 2.2535,
"step": 153500
},
{
"epoch": 1.51,
"learning_rate": 4.9174142454285525e-06,
"loss": 2.2899,
"step": 154000
},
{
"epoch": 1.51,
"learning_rate": 4.868285301602586e-06,
"loss": 2.308,
"step": 154500
},
{
"epoch": 1.52,
"learning_rate": 4.819156357776621e-06,
"loss": 2.297,
"step": 155000
},
{
"epoch": 1.52,
"learning_rate": 4.770027413950655e-06,
"loss": 2.3058,
"step": 155500
},
{
"epoch": 1.53,
"learning_rate": 4.72089847012469e-06,
"loss": 2.2709,
"step": 156000
},
{
"epoch": 1.53,
"learning_rate": 4.671769526298724e-06,
"loss": 2.2958,
"step": 156500
},
{
"epoch": 1.54,
"learning_rate": 4.622640582472758e-06,
"loss": 2.3003,
"step": 157000
},
{
"epoch": 1.54,
"learning_rate": 4.573511638646792e-06,
"loss": 2.2724,
"step": 157500
},
{
"epoch": 1.55,
"learning_rate": 4.524382694820827e-06,
"loss": 2.2806,
"step": 158000
},
{
"epoch": 1.55,
"learning_rate": 4.475253750994861e-06,
"loss": 2.2622,
"step": 158500
},
{
"epoch": 1.56,
"learning_rate": 4.4261248071688956e-06,
"loss": 2.2915,
"step": 159000
},
{
"epoch": 1.56,
"learning_rate": 4.37699586334293e-06,
"loss": 2.3153,
"step": 159500
},
{
"epoch": 1.57,
"learning_rate": 4.327866919516965e-06,
"loss": 2.2784,
"step": 160000
},
{
"epoch": 1.57,
"learning_rate": 4.278737975690999e-06,
"loss": 2.2704,
"step": 160500
},
{
"epoch": 1.58,
"learning_rate": 4.229609031865034e-06,
"loss": 2.2792,
"step": 161000
},
{
"epoch": 1.58,
"learning_rate": 4.1804800880390675e-06,
"loss": 2.2642,
"step": 161500
},
{
"epoch": 1.59,
"learning_rate": 4.131351144213102e-06,
"loss": 2.2858,
"step": 162000
},
{
"epoch": 1.59,
"learning_rate": 4.082222200387136e-06,
"loss": 2.2784,
"step": 162500
},
{
"epoch": 1.6,
"learning_rate": 4.033093256561171e-06,
"loss": 2.2851,
"step": 163000
},
{
"epoch": 1.6,
"learning_rate": 3.983964312735205e-06,
"loss": 2.2762,
"step": 163500
},
{
"epoch": 1.61,
"learning_rate": 3.9348353689092394e-06,
"loss": 2.3028,
"step": 164000
},
{
"epoch": 1.61,
"learning_rate": 3.885706425083274e-06,
"loss": 2.2735,
"step": 164500
},
{
"epoch": 1.62,
"learning_rate": 3.836577481257308e-06,
"loss": 2.2511,
"step": 165000
},
{
"epoch": 1.62,
"learning_rate": 3.7874485374313424e-06,
"loss": 2.2856,
"step": 165500
},
{
"epoch": 1.63,
"learning_rate": 3.7383195936053767e-06,
"loss": 2.2979,
"step": 166000
},
{
"epoch": 1.63,
"learning_rate": 3.689190649779411e-06,
"loss": 2.2537,
"step": 166500
},
{
"epoch": 1.64,
"learning_rate": 3.6400617059534453e-06,
"loss": 2.2746,
"step": 167000
},
{
"epoch": 1.64,
"learning_rate": 3.5909327621274804e-06,
"loss": 2.3058,
"step": 167500
},
{
"epoch": 1.65,
"learning_rate": 3.5418038183015147e-06,
"loss": 2.3015,
"step": 168000
},
{
"epoch": 1.65,
"learning_rate": 3.492674874475549e-06,
"loss": 2.2863,
"step": 168500
},
{
"epoch": 1.66,
"learning_rate": 3.4435459306495833e-06,
"loss": 2.2733,
"step": 169000
},
{
"epoch": 1.66,
"learning_rate": 3.3944169868236176e-06,
"loss": 2.2806,
"step": 169500
},
{
"epoch": 1.67,
"learning_rate": 3.345288042997652e-06,
"loss": 2.259,
"step": 170000
},
{
"epoch": 1.67,
"learning_rate": 3.2961590991716862e-06,
"loss": 2.3225,
"step": 170500
},
{
"epoch": 1.68,
"learning_rate": 3.2470301553457205e-06,
"loss": 2.2887,
"step": 171000
},
{
"epoch": 1.68,
"learning_rate": 3.197901211519755e-06,
"loss": 2.2957,
"step": 171500
},
{
"epoch": 1.69,
"learning_rate": 3.148772267693789e-06,
"loss": 2.294,
"step": 172000
},
{
"epoch": 1.69,
"learning_rate": 3.0996433238678234e-06,
"loss": 2.2976,
"step": 172500
},
{
"epoch": 1.7,
"learning_rate": 3.050514380041858e-06,
"loss": 2.3028,
"step": 173000
},
{
"epoch": 1.7,
"learning_rate": 3.0013854362158925e-06,
"loss": 2.2875,
"step": 173500
},
{
"epoch": 1.71,
"learning_rate": 2.9522564923899268e-06,
"loss": 2.3208,
"step": 174000
},
{
"epoch": 1.71,
"learning_rate": 2.903127548563961e-06,
"loss": 2.3068,
"step": 174500
},
{
"epoch": 1.72,
"learning_rate": 2.853998604737996e-06,
"loss": 2.28,
"step": 175000
},
{
"epoch": 1.72,
"learning_rate": 2.80486966091203e-06,
"loss": 2.301,
"step": 175500
},
{
"epoch": 1.73,
"learning_rate": 2.7557407170860644e-06,
"loss": 2.2428,
"step": 176000
},
{
"epoch": 1.73,
"learning_rate": 2.7066117732600987e-06,
"loss": 2.2843,
"step": 176500
},
{
"epoch": 1.73,
"learning_rate": 2.657482829434133e-06,
"loss": 2.2756,
"step": 177000
},
{
"epoch": 1.74,
"learning_rate": 2.6083538856081673e-06,
"loss": 2.289,
"step": 177500
},
{
"epoch": 1.74,
"learning_rate": 2.5592249417822016e-06,
"loss": 2.2902,
"step": 178000
},
{
"epoch": 1.75,
"learning_rate": 2.5100959979562363e-06,
"loss": 2.298,
"step": 178500
},
{
"epoch": 1.75,
"learning_rate": 2.4609670541302707e-06,
"loss": 2.2764,
"step": 179000
},
{
"epoch": 1.76,
"learning_rate": 2.411838110304305e-06,
"loss": 2.2487,
"step": 179500
},
{
"epoch": 1.76,
"learning_rate": 2.3627091664783393e-06,
"loss": 2.323,
"step": 180000
},
{
"epoch": 1.77,
"learning_rate": 2.3135802226523736e-06,
"loss": 2.3174,
"step": 180500
},
{
"epoch": 1.77,
"learning_rate": 2.2644512788264083e-06,
"loss": 2.3205,
"step": 181000
},
{
"epoch": 1.78,
"learning_rate": 2.2153223350004426e-06,
"loss": 2.313,
"step": 181500
},
{
"epoch": 1.78,
"learning_rate": 2.166193391174477e-06,
"loss": 2.3015,
"step": 182000
},
{
"epoch": 1.79,
"learning_rate": 2.117064447348511e-06,
"loss": 2.2836,
"step": 182500
},
{
"epoch": 1.79,
"learning_rate": 2.0679355035225455e-06,
"loss": 2.2737,
"step": 183000
},
{
"epoch": 1.8,
"learning_rate": 2.01880655969658e-06,
"loss": 2.2824,
"step": 183500
},
{
"epoch": 1.8,
"learning_rate": 1.969677615870614e-06,
"loss": 2.2577,
"step": 184000
},
{
"epoch": 1.81,
"learning_rate": 1.920548672044649e-06,
"loss": 2.2563,
"step": 184500
},
{
"epoch": 1.81,
"learning_rate": 1.871419728218683e-06,
"loss": 2.3253,
"step": 185000
},
{
"epoch": 1.82,
"learning_rate": 1.8222907843927174e-06,
"loss": 2.2504,
"step": 185500
},
{
"epoch": 1.82,
"learning_rate": 1.7731618405667517e-06,
"loss": 2.3124,
"step": 186000
},
{
"epoch": 1.83,
"learning_rate": 1.724032896740786e-06,
"loss": 2.2784,
"step": 186500
},
{
"epoch": 1.83,
"learning_rate": 1.6749039529148203e-06,
"loss": 2.2832,
"step": 187000
},
{
"epoch": 1.84,
"learning_rate": 1.6257750090888546e-06,
"loss": 2.2924,
"step": 187500
},
{
"epoch": 1.84,
"learning_rate": 1.576646065262889e-06,
"loss": 2.2843,
"step": 188000
},
{
"epoch": 1.85,
"learning_rate": 1.5275171214369237e-06,
"loss": 2.2724,
"step": 188500
},
{
"epoch": 1.85,
"learning_rate": 1.478388177610958e-06,
"loss": 2.3194,
"step": 189000
},
{
"epoch": 1.86,
"learning_rate": 1.4292592337849923e-06,
"loss": 2.2532,
"step": 189500
},
{
"epoch": 1.86,
"learning_rate": 1.3801302899590266e-06,
"loss": 2.2773,
"step": 190000
},
{
"epoch": 1.87,
"learning_rate": 1.3310013461330609e-06,
"loss": 2.2869,
"step": 190500
},
{
"epoch": 1.87,
"learning_rate": 1.2818724023070952e-06,
"loss": 2.2751,
"step": 191000
},
{
"epoch": 1.88,
"learning_rate": 1.2327434584811297e-06,
"loss": 2.3004,
"step": 191500
},
{
"epoch": 1.88,
"learning_rate": 1.183614514655164e-06,
"loss": 2.3015,
"step": 192000
},
{
"epoch": 1.89,
"learning_rate": 1.1344855708291983e-06,
"loss": 2.2566,
"step": 192500
},
{
"epoch": 1.89,
"learning_rate": 1.0853566270032326e-06,
"loss": 2.2978,
"step": 193000
},
{
"epoch": 1.9,
"learning_rate": 1.0362276831772671e-06,
"loss": 2.2898,
"step": 193500
},
{
"epoch": 1.9,
"learning_rate": 9.870987393513014e-07,
"loss": 2.2806,
"step": 194000
},
{
"epoch": 1.91,
"learning_rate": 9.379697955253358e-07,
"loss": 2.2777,
"step": 194500
},
{
"epoch": 1.91,
"learning_rate": 8.888408516993701e-07,
"loss": 2.261,
"step": 195000
},
{
"epoch": 1.92,
"learning_rate": 8.397119078734047e-07,
"loss": 2.252,
"step": 195500
},
{
"epoch": 1.92,
"learning_rate": 7.90582964047439e-07,
"loss": 2.2906,
"step": 196000
},
{
"epoch": 1.93,
"learning_rate": 7.414540202214733e-07,
"loss": 2.2968,
"step": 196500
},
{
"epoch": 1.93,
"learning_rate": 6.923250763955077e-07,
"loss": 2.2463,
"step": 197000
},
{
"epoch": 1.94,
"learning_rate": 6.431961325695421e-07,
"loss": 2.2681,
"step": 197500
},
{
"epoch": 1.94,
"learning_rate": 5.940671887435765e-07,
"loss": 2.2965,
"step": 198000
},
{
"epoch": 1.95,
"learning_rate": 5.449382449176108e-07,
"loss": 2.2705,
"step": 198500
},
{
"epoch": 1.95,
"learning_rate": 4.958093010916452e-07,
"loss": 2.2669,
"step": 199000
},
{
"epoch": 1.96,
"learning_rate": 4.4668035726567956e-07,
"loss": 2.2478,
"step": 199500
},
{
"epoch": 1.96,
"learning_rate": 3.975514134397139e-07,
"loss": 2.3053,
"step": 200000
},
{
"epoch": 1.97,
"learning_rate": 3.484224696137483e-07,
"loss": 2.2731,
"step": 200500
},
{
"epoch": 1.97,
"learning_rate": 2.992935257877826e-07,
"loss": 2.2518,
"step": 201000
},
{
"epoch": 1.98,
"learning_rate": 2.50164581961817e-07,
"loss": 2.2657,
"step": 201500
},
{
"epoch": 1.98,
"learning_rate": 2.0103563813585136e-07,
"loss": 2.2967,
"step": 202000
},
{
"epoch": 1.98,
"learning_rate": 1.5190669430988575e-07,
"loss": 2.2874,
"step": 202500
},
{
"epoch": 1.99,
"learning_rate": 1.0277775048392012e-07,
"loss": 2.2764,
"step": 203000
},
{
"epoch": 1.99,
"learning_rate": 5.364880665795447e-08,
"loss": 2.2628,
"step": 203500
},
{
"epoch": 2.0,
"learning_rate": 4.519862831988839e-09,
"loss": 2.2711,
"step": 204000
}
],
"max_steps": 204046,
"num_train_epochs": 2,
"total_flos": 6.966781556932454e+17,
"trial_name": null,
"trial_params": null
}