m2m100_418M_tsn_en_rel / trainer_state.json
Davlan's picture
add MT model
42bc944
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 260010,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.990384985192877e-05,
"loss": 2.7022,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 4.980769970385754e-05,
"loss": 2.1685,
"step": 1000
},
{
"epoch": 0.02,
"learning_rate": 4.971154955578632e-05,
"loss": 2.011,
"step": 1500
},
{
"epoch": 0.02,
"learning_rate": 4.961539940771509e-05,
"loss": 1.8707,
"step": 2000
},
{
"epoch": 0.03,
"learning_rate": 4.951924925964386e-05,
"loss": 1.8268,
"step": 2500
},
{
"epoch": 0.03,
"learning_rate": 4.942309911157263e-05,
"loss": 1.7562,
"step": 3000
},
{
"epoch": 0.04,
"learning_rate": 4.932694896350141e-05,
"loss": 1.7447,
"step": 3500
},
{
"epoch": 0.05,
"learning_rate": 4.923079881543018e-05,
"loss": 1.7022,
"step": 4000
},
{
"epoch": 0.05,
"learning_rate": 4.913464866735895e-05,
"loss": 1.6657,
"step": 4500
},
{
"epoch": 0.06,
"learning_rate": 4.903849851928772e-05,
"loss": 1.6436,
"step": 5000
},
{
"epoch": 0.06,
"learning_rate": 4.89423483712165e-05,
"loss": 1.6203,
"step": 5500
},
{
"epoch": 0.07,
"learning_rate": 4.884619822314527e-05,
"loss": 1.5773,
"step": 6000
},
{
"epoch": 0.07,
"learning_rate": 4.875004807507404e-05,
"loss": 1.5829,
"step": 6500
},
{
"epoch": 0.08,
"learning_rate": 4.865389792700281e-05,
"loss": 1.5611,
"step": 7000
},
{
"epoch": 0.09,
"learning_rate": 4.8557747778931586e-05,
"loss": 1.5456,
"step": 7500
},
{
"epoch": 0.09,
"learning_rate": 4.8461597630860356e-05,
"loss": 1.5471,
"step": 8000
},
{
"epoch": 0.1,
"learning_rate": 4.836544748278913e-05,
"loss": 1.5208,
"step": 8500
},
{
"epoch": 0.1,
"learning_rate": 4.82692973347179e-05,
"loss": 1.5099,
"step": 9000
},
{
"epoch": 0.11,
"learning_rate": 4.8173147186646674e-05,
"loss": 1.4846,
"step": 9500
},
{
"epoch": 0.12,
"learning_rate": 4.8076997038575445e-05,
"loss": 1.4845,
"step": 10000
},
{
"epoch": 0.12,
"learning_rate": 4.7980846890504215e-05,
"loss": 1.4877,
"step": 10500
},
{
"epoch": 0.13,
"learning_rate": 4.7884696742432986e-05,
"loss": 1.4947,
"step": 11000
},
{
"epoch": 0.13,
"learning_rate": 4.7788546594361756e-05,
"loss": 1.4707,
"step": 11500
},
{
"epoch": 0.14,
"learning_rate": 4.7692396446290527e-05,
"loss": 1.4568,
"step": 12000
},
{
"epoch": 0.14,
"learning_rate": 4.75962462982193e-05,
"loss": 1.4249,
"step": 12500
},
{
"epoch": 0.15,
"learning_rate": 4.7500096150148074e-05,
"loss": 1.4197,
"step": 13000
},
{
"epoch": 0.16,
"learning_rate": 4.7403946002076845e-05,
"loss": 1.4406,
"step": 13500
},
{
"epoch": 0.16,
"learning_rate": 4.7307795854005615e-05,
"loss": 1.4452,
"step": 14000
},
{
"epoch": 0.17,
"learning_rate": 4.7211645705934385e-05,
"loss": 1.3891,
"step": 14500
},
{
"epoch": 0.17,
"learning_rate": 4.711549555786316e-05,
"loss": 1.4029,
"step": 15000
},
{
"epoch": 0.18,
"learning_rate": 4.701934540979193e-05,
"loss": 1.3949,
"step": 15500
},
{
"epoch": 0.18,
"learning_rate": 4.6923195261720703e-05,
"loss": 1.4012,
"step": 16000
},
{
"epoch": 0.19,
"learning_rate": 4.6827045113649474e-05,
"loss": 1.3649,
"step": 16500
},
{
"epoch": 0.2,
"learning_rate": 4.673089496557825e-05,
"loss": 1.3831,
"step": 17000
},
{
"epoch": 0.2,
"learning_rate": 4.663474481750702e-05,
"loss": 1.3716,
"step": 17500
},
{
"epoch": 0.21,
"learning_rate": 4.653859466943579e-05,
"loss": 1.3672,
"step": 18000
},
{
"epoch": 0.21,
"learning_rate": 4.644244452136456e-05,
"loss": 1.348,
"step": 18500
},
{
"epoch": 0.22,
"learning_rate": 4.634629437329334e-05,
"loss": 1.3364,
"step": 19000
},
{
"epoch": 0.22,
"learning_rate": 4.625014422522211e-05,
"loss": 1.3271,
"step": 19500
},
{
"epoch": 0.23,
"learning_rate": 4.615399407715088e-05,
"loss": 1.3145,
"step": 20000
},
{
"epoch": 0.24,
"learning_rate": 4.605784392907965e-05,
"loss": 1.3291,
"step": 20500
},
{
"epoch": 0.24,
"learning_rate": 4.596169378100843e-05,
"loss": 1.3227,
"step": 21000
},
{
"epoch": 0.25,
"learning_rate": 4.58655436329372e-05,
"loss": 1.335,
"step": 21500
},
{
"epoch": 0.25,
"learning_rate": 4.576939348486597e-05,
"loss": 1.3266,
"step": 22000
},
{
"epoch": 0.26,
"learning_rate": 4.567324333679474e-05,
"loss": 1.3244,
"step": 22500
},
{
"epoch": 0.27,
"learning_rate": 4.557709318872351e-05,
"loss": 1.302,
"step": 23000
},
{
"epoch": 0.27,
"learning_rate": 4.548094304065229e-05,
"loss": 1.303,
"step": 23500
},
{
"epoch": 0.28,
"learning_rate": 4.538479289258106e-05,
"loss": 1.3059,
"step": 24000
},
{
"epoch": 0.28,
"learning_rate": 4.528864274450983e-05,
"loss": 1.3215,
"step": 24500
},
{
"epoch": 0.29,
"learning_rate": 4.51924925964386e-05,
"loss": 1.3002,
"step": 25000
},
{
"epoch": 0.29,
"learning_rate": 4.5096342448367375e-05,
"loss": 1.2681,
"step": 25500
},
{
"epoch": 0.3,
"learning_rate": 4.5000192300296146e-05,
"loss": 1.2959,
"step": 26000
},
{
"epoch": 0.31,
"learning_rate": 4.4904042152224916e-05,
"loss": 1.2797,
"step": 26500
},
{
"epoch": 0.31,
"learning_rate": 4.480789200415369e-05,
"loss": 1.308,
"step": 27000
},
{
"epoch": 0.32,
"learning_rate": 4.4711741856082464e-05,
"loss": 1.2873,
"step": 27500
},
{
"epoch": 0.32,
"learning_rate": 4.4615591708011234e-05,
"loss": 1.2932,
"step": 28000
},
{
"epoch": 0.33,
"learning_rate": 4.4519441559940005e-05,
"loss": 1.2749,
"step": 28500
},
{
"epoch": 0.33,
"learning_rate": 4.4423291411868775e-05,
"loss": 1.2497,
"step": 29000
},
{
"epoch": 0.34,
"learning_rate": 4.432714126379755e-05,
"loss": 1.2735,
"step": 29500
},
{
"epoch": 0.35,
"learning_rate": 4.423099111572632e-05,
"loss": 1.2777,
"step": 30000
},
{
"epoch": 0.35,
"learning_rate": 4.413484096765509e-05,
"loss": 1.2641,
"step": 30500
},
{
"epoch": 0.36,
"learning_rate": 4.4038690819583864e-05,
"loss": 1.2625,
"step": 31000
},
{
"epoch": 0.36,
"learning_rate": 4.394254067151264e-05,
"loss": 1.2414,
"step": 31500
},
{
"epoch": 0.37,
"learning_rate": 4.384639052344141e-05,
"loss": 1.2534,
"step": 32000
},
{
"epoch": 0.37,
"learning_rate": 4.375024037537018e-05,
"loss": 1.24,
"step": 32500
},
{
"epoch": 0.38,
"learning_rate": 4.365409022729895e-05,
"loss": 1.2473,
"step": 33000
},
{
"epoch": 0.39,
"learning_rate": 4.355794007922772e-05,
"loss": 1.2415,
"step": 33500
},
{
"epoch": 0.39,
"learning_rate": 4.346178993115649e-05,
"loss": 1.2263,
"step": 34000
},
{
"epoch": 0.4,
"learning_rate": 4.3365639783085264e-05,
"loss": 1.235,
"step": 34500
},
{
"epoch": 0.4,
"learning_rate": 4.326948963501404e-05,
"loss": 1.2291,
"step": 35000
},
{
"epoch": 0.41,
"learning_rate": 4.317333948694281e-05,
"loss": 1.2161,
"step": 35500
},
{
"epoch": 0.42,
"learning_rate": 4.307718933887158e-05,
"loss": 1.2292,
"step": 36000
},
{
"epoch": 0.42,
"learning_rate": 4.298103919080035e-05,
"loss": 1.2329,
"step": 36500
},
{
"epoch": 0.43,
"learning_rate": 4.288488904272913e-05,
"loss": 1.2205,
"step": 37000
},
{
"epoch": 0.43,
"learning_rate": 4.27887388946579e-05,
"loss": 1.2117,
"step": 37500
},
{
"epoch": 0.44,
"learning_rate": 4.269258874658667e-05,
"loss": 1.2276,
"step": 38000
},
{
"epoch": 0.44,
"learning_rate": 4.259643859851544e-05,
"loss": 1.2138,
"step": 38500
},
{
"epoch": 0.45,
"learning_rate": 4.250028845044422e-05,
"loss": 1.2131,
"step": 39000
},
{
"epoch": 0.46,
"learning_rate": 4.240413830237299e-05,
"loss": 1.2088,
"step": 39500
},
{
"epoch": 0.46,
"learning_rate": 4.230798815430176e-05,
"loss": 1.2029,
"step": 40000
},
{
"epoch": 0.47,
"learning_rate": 4.221183800623053e-05,
"loss": 1.1927,
"step": 40500
},
{
"epoch": 0.47,
"learning_rate": 4.2115687858159306e-05,
"loss": 1.2093,
"step": 41000
},
{
"epoch": 0.48,
"learning_rate": 4.2019537710088077e-05,
"loss": 1.19,
"step": 41500
},
{
"epoch": 0.48,
"learning_rate": 4.192338756201685e-05,
"loss": 1.2042,
"step": 42000
},
{
"epoch": 0.49,
"learning_rate": 4.182723741394562e-05,
"loss": 1.201,
"step": 42500
},
{
"epoch": 0.5,
"learning_rate": 4.173108726587439e-05,
"loss": 1.1916,
"step": 43000
},
{
"epoch": 0.5,
"learning_rate": 4.1634937117803165e-05,
"loss": 1.1834,
"step": 43500
},
{
"epoch": 0.51,
"learning_rate": 4.1538786969731935e-05,
"loss": 1.1919,
"step": 44000
},
{
"epoch": 0.51,
"learning_rate": 4.1442636821660706e-05,
"loss": 1.1969,
"step": 44500
},
{
"epoch": 0.52,
"learning_rate": 4.1346486673589476e-05,
"loss": 1.1765,
"step": 45000
},
{
"epoch": 0.52,
"learning_rate": 4.1250336525518254e-05,
"loss": 1.2079,
"step": 45500
},
{
"epoch": 0.53,
"learning_rate": 4.1154186377447024e-05,
"loss": 1.1711,
"step": 46000
},
{
"epoch": 0.54,
"learning_rate": 4.1058036229375794e-05,
"loss": 1.1819,
"step": 46500
},
{
"epoch": 0.54,
"learning_rate": 4.0961886081304565e-05,
"loss": 1.1569,
"step": 47000
},
{
"epoch": 0.55,
"learning_rate": 4.086573593323334e-05,
"loss": 1.1762,
"step": 47500
},
{
"epoch": 0.55,
"learning_rate": 4.076958578516211e-05,
"loss": 1.1818,
"step": 48000
},
{
"epoch": 0.56,
"learning_rate": 4.067343563709088e-05,
"loss": 1.1843,
"step": 48500
},
{
"epoch": 0.57,
"learning_rate": 4.057728548901965e-05,
"loss": 1.1776,
"step": 49000
},
{
"epoch": 0.57,
"learning_rate": 4.048113534094843e-05,
"loss": 1.1853,
"step": 49500
},
{
"epoch": 0.58,
"learning_rate": 4.03849851928772e-05,
"loss": 1.1608,
"step": 50000
},
{
"epoch": 0.58,
"learning_rate": 4.028883504480597e-05,
"loss": 1.1748,
"step": 50500
},
{
"epoch": 0.59,
"learning_rate": 4.019268489673474e-05,
"loss": 1.1615,
"step": 51000
},
{
"epoch": 0.59,
"learning_rate": 4.009653474866352e-05,
"loss": 1.153,
"step": 51500
},
{
"epoch": 0.6,
"learning_rate": 4.000038460059229e-05,
"loss": 1.1643,
"step": 52000
},
{
"epoch": 0.61,
"learning_rate": 3.990423445252106e-05,
"loss": 1.154,
"step": 52500
},
{
"epoch": 0.61,
"learning_rate": 3.980808430444983e-05,
"loss": 1.1675,
"step": 53000
},
{
"epoch": 0.62,
"learning_rate": 3.971193415637861e-05,
"loss": 1.1612,
"step": 53500
},
{
"epoch": 0.62,
"learning_rate": 3.961578400830738e-05,
"loss": 1.1754,
"step": 54000
},
{
"epoch": 0.63,
"learning_rate": 3.951963386023615e-05,
"loss": 1.1577,
"step": 54500
},
{
"epoch": 0.63,
"learning_rate": 3.942348371216492e-05,
"loss": 1.1551,
"step": 55000
},
{
"epoch": 0.64,
"learning_rate": 3.932733356409369e-05,
"loss": 1.1584,
"step": 55500
},
{
"epoch": 0.65,
"learning_rate": 3.923118341602246e-05,
"loss": 1.1387,
"step": 56000
},
{
"epoch": 0.65,
"learning_rate": 3.913503326795123e-05,
"loss": 1.1469,
"step": 56500
},
{
"epoch": 0.66,
"learning_rate": 3.903888311988001e-05,
"loss": 1.1597,
"step": 57000
},
{
"epoch": 0.66,
"learning_rate": 3.894273297180878e-05,
"loss": 1.1478,
"step": 57500
},
{
"epoch": 0.67,
"learning_rate": 3.884658282373755e-05,
"loss": 1.1452,
"step": 58000
},
{
"epoch": 0.67,
"learning_rate": 3.875043267566632e-05,
"loss": 1.1199,
"step": 58500
},
{
"epoch": 0.68,
"learning_rate": 3.8654282527595096e-05,
"loss": 1.1151,
"step": 59000
},
{
"epoch": 0.69,
"learning_rate": 3.8558132379523866e-05,
"loss": 1.1492,
"step": 59500
},
{
"epoch": 0.69,
"learning_rate": 3.8461982231452637e-05,
"loss": 1.1368,
"step": 60000
},
{
"epoch": 0.7,
"learning_rate": 3.836583208338141e-05,
"loss": 1.1406,
"step": 60500
},
{
"epoch": 0.7,
"learning_rate": 3.8269681935310184e-05,
"loss": 1.124,
"step": 61000
},
{
"epoch": 0.71,
"learning_rate": 3.8173531787238955e-05,
"loss": 1.1445,
"step": 61500
},
{
"epoch": 0.72,
"learning_rate": 3.8077381639167725e-05,
"loss": 1.13,
"step": 62000
},
{
"epoch": 0.72,
"learning_rate": 3.7981231491096496e-05,
"loss": 1.1114,
"step": 62500
},
{
"epoch": 0.73,
"learning_rate": 3.788508134302527e-05,
"loss": 1.1394,
"step": 63000
},
{
"epoch": 0.73,
"learning_rate": 3.778893119495404e-05,
"loss": 1.1319,
"step": 63500
},
{
"epoch": 0.74,
"learning_rate": 3.7692781046882814e-05,
"loss": 1.1191,
"step": 64000
},
{
"epoch": 0.74,
"learning_rate": 3.7596630898811584e-05,
"loss": 1.1146,
"step": 64500
},
{
"epoch": 0.75,
"learning_rate": 3.7500480750740354e-05,
"loss": 1.1106,
"step": 65000
},
{
"epoch": 0.76,
"learning_rate": 3.740433060266913e-05,
"loss": 1.1113,
"step": 65500
},
{
"epoch": 0.76,
"learning_rate": 3.73081804545979e-05,
"loss": 1.0962,
"step": 66000
},
{
"epoch": 0.77,
"learning_rate": 3.721203030652667e-05,
"loss": 1.1239,
"step": 66500
},
{
"epoch": 0.77,
"learning_rate": 3.711588015845544e-05,
"loss": 1.1105,
"step": 67000
},
{
"epoch": 0.78,
"learning_rate": 3.701973001038422e-05,
"loss": 1.1102,
"step": 67500
},
{
"epoch": 0.78,
"learning_rate": 3.692357986231299e-05,
"loss": 1.1035,
"step": 68000
},
{
"epoch": 0.79,
"learning_rate": 3.682742971424176e-05,
"loss": 1.1265,
"step": 68500
},
{
"epoch": 0.8,
"learning_rate": 3.673127956617053e-05,
"loss": 1.1136,
"step": 69000
},
{
"epoch": 0.8,
"learning_rate": 3.663512941809931e-05,
"loss": 1.1042,
"step": 69500
},
{
"epoch": 0.81,
"learning_rate": 3.653897927002808e-05,
"loss": 1.0983,
"step": 70000
},
{
"epoch": 0.81,
"learning_rate": 3.644282912195685e-05,
"loss": 1.0823,
"step": 70500
},
{
"epoch": 0.82,
"learning_rate": 3.634667897388562e-05,
"loss": 1.1024,
"step": 71000
},
{
"epoch": 0.82,
"learning_rate": 3.62505288258144e-05,
"loss": 1.1067,
"step": 71500
},
{
"epoch": 0.83,
"learning_rate": 3.615437867774317e-05,
"loss": 1.093,
"step": 72000
},
{
"epoch": 0.84,
"learning_rate": 3.605822852967194e-05,
"loss": 1.0896,
"step": 72500
},
{
"epoch": 0.84,
"learning_rate": 3.596207838160071e-05,
"loss": 1.1115,
"step": 73000
},
{
"epoch": 0.85,
"learning_rate": 3.5865928233529486e-05,
"loss": 1.1029,
"step": 73500
},
{
"epoch": 0.85,
"learning_rate": 3.5769778085458256e-05,
"loss": 1.0828,
"step": 74000
},
{
"epoch": 0.86,
"learning_rate": 3.5673627937387026e-05,
"loss": 1.0969,
"step": 74500
},
{
"epoch": 0.87,
"learning_rate": 3.55774777893158e-05,
"loss": 1.0817,
"step": 75000
},
{
"epoch": 0.87,
"learning_rate": 3.5481327641244574e-05,
"loss": 1.0967,
"step": 75500
},
{
"epoch": 0.88,
"learning_rate": 3.5385177493173344e-05,
"loss": 1.1131,
"step": 76000
},
{
"epoch": 0.88,
"learning_rate": 3.5289027345102115e-05,
"loss": 1.0754,
"step": 76500
},
{
"epoch": 0.89,
"learning_rate": 3.5192877197030885e-05,
"loss": 1.0707,
"step": 77000
},
{
"epoch": 0.89,
"learning_rate": 3.5096727048959656e-05,
"loss": 1.0947,
"step": 77500
},
{
"epoch": 0.9,
"learning_rate": 3.5000576900888426e-05,
"loss": 1.104,
"step": 78000
},
{
"epoch": 0.91,
"learning_rate": 3.4904426752817197e-05,
"loss": 1.0834,
"step": 78500
},
{
"epoch": 0.91,
"learning_rate": 3.4808276604745974e-05,
"loss": 1.0953,
"step": 79000
},
{
"epoch": 0.92,
"learning_rate": 3.4712126456674744e-05,
"loss": 1.0776,
"step": 79500
},
{
"epoch": 0.92,
"learning_rate": 3.4615976308603515e-05,
"loss": 1.0832,
"step": 80000
},
{
"epoch": 0.93,
"learning_rate": 3.4519826160532285e-05,
"loss": 1.0796,
"step": 80500
},
{
"epoch": 0.93,
"learning_rate": 3.442367601246106e-05,
"loss": 1.0981,
"step": 81000
},
{
"epoch": 0.94,
"learning_rate": 3.432752586438983e-05,
"loss": 1.0704,
"step": 81500
},
{
"epoch": 0.95,
"learning_rate": 3.42313757163186e-05,
"loss": 1.0678,
"step": 82000
},
{
"epoch": 0.95,
"learning_rate": 3.4135225568247374e-05,
"loss": 1.0775,
"step": 82500
},
{
"epoch": 0.96,
"learning_rate": 3.403907542017615e-05,
"loss": 1.0857,
"step": 83000
},
{
"epoch": 0.96,
"learning_rate": 3.394292527210492e-05,
"loss": 1.0775,
"step": 83500
},
{
"epoch": 0.97,
"learning_rate": 3.384677512403369e-05,
"loss": 1.0674,
"step": 84000
},
{
"epoch": 0.97,
"learning_rate": 3.375062497596246e-05,
"loss": 1.0601,
"step": 84500
},
{
"epoch": 0.98,
"learning_rate": 3.365447482789123e-05,
"loss": 1.0549,
"step": 85000
},
{
"epoch": 0.99,
"learning_rate": 3.355832467982001e-05,
"loss": 1.0586,
"step": 85500
},
{
"epoch": 0.99,
"learning_rate": 3.346217453174878e-05,
"loss": 1.0681,
"step": 86000
},
{
"epoch": 1.0,
"learning_rate": 3.336602438367755e-05,
"loss": 1.0644,
"step": 86500
},
{
"epoch": 1.0,
"learning_rate": 3.326987423560632e-05,
"loss": 1.0063,
"step": 87000
},
{
"epoch": 1.01,
"learning_rate": 3.31737240875351e-05,
"loss": 0.9661,
"step": 87500
},
{
"epoch": 1.02,
"learning_rate": 3.307757393946387e-05,
"loss": 0.956,
"step": 88000
},
{
"epoch": 1.02,
"learning_rate": 3.298142379139264e-05,
"loss": 0.9734,
"step": 88500
},
{
"epoch": 1.03,
"learning_rate": 3.288527364332141e-05,
"loss": 0.9486,
"step": 89000
},
{
"epoch": 1.03,
"learning_rate": 3.278912349525019e-05,
"loss": 0.9358,
"step": 89500
},
{
"epoch": 1.04,
"learning_rate": 3.269297334717896e-05,
"loss": 0.9559,
"step": 90000
},
{
"epoch": 1.04,
"learning_rate": 3.259682319910773e-05,
"loss": 0.9637,
"step": 90500
},
{
"epoch": 1.05,
"learning_rate": 3.25006730510365e-05,
"loss": 0.9583,
"step": 91000
},
{
"epoch": 1.06,
"learning_rate": 3.2404522902965275e-05,
"loss": 0.9432,
"step": 91500
},
{
"epoch": 1.06,
"learning_rate": 3.2308372754894046e-05,
"loss": 0.9727,
"step": 92000
},
{
"epoch": 1.07,
"learning_rate": 3.2212222606822816e-05,
"loss": 0.9463,
"step": 92500
},
{
"epoch": 1.07,
"learning_rate": 3.2116072458751586e-05,
"loss": 0.9658,
"step": 93000
},
{
"epoch": 1.08,
"learning_rate": 3.2019922310680364e-05,
"loss": 0.974,
"step": 93500
},
{
"epoch": 1.08,
"learning_rate": 3.1923772162609134e-05,
"loss": 0.9577,
"step": 94000
},
{
"epoch": 1.09,
"learning_rate": 3.1827622014537904e-05,
"loss": 0.9679,
"step": 94500
},
{
"epoch": 1.1,
"learning_rate": 3.1731471866466675e-05,
"loss": 0.9531,
"step": 95000
},
{
"epoch": 1.1,
"learning_rate": 3.163532171839545e-05,
"loss": 0.9511,
"step": 95500
},
{
"epoch": 1.11,
"learning_rate": 3.153917157032422e-05,
"loss": 0.9703,
"step": 96000
},
{
"epoch": 1.11,
"learning_rate": 3.144302142225299e-05,
"loss": 0.9489,
"step": 96500
},
{
"epoch": 1.12,
"learning_rate": 3.1346871274181763e-05,
"loss": 0.9691,
"step": 97000
},
{
"epoch": 1.12,
"learning_rate": 3.125072112611054e-05,
"loss": 0.9512,
"step": 97500
},
{
"epoch": 1.13,
"learning_rate": 3.115457097803931e-05,
"loss": 0.9622,
"step": 98000
},
{
"epoch": 1.14,
"learning_rate": 3.105842082996808e-05,
"loss": 0.9493,
"step": 98500
},
{
"epoch": 1.14,
"learning_rate": 3.096227068189685e-05,
"loss": 0.9485,
"step": 99000
},
{
"epoch": 1.15,
"learning_rate": 3.086612053382562e-05,
"loss": 0.9555,
"step": 99500
},
{
"epoch": 1.15,
"learning_rate": 3.076997038575439e-05,
"loss": 0.9462,
"step": 100000
},
{
"epoch": 1.16,
"learning_rate": 3.067382023768316e-05,
"loss": 0.9637,
"step": 100500
},
{
"epoch": 1.17,
"learning_rate": 3.057767008961194e-05,
"loss": 0.9474,
"step": 101000
},
{
"epoch": 1.17,
"learning_rate": 3.048151994154071e-05,
"loss": 0.963,
"step": 101500
},
{
"epoch": 1.18,
"learning_rate": 3.038536979346948e-05,
"loss": 0.9462,
"step": 102000
},
{
"epoch": 1.18,
"learning_rate": 3.0289219645398255e-05,
"loss": 0.9531,
"step": 102500
},
{
"epoch": 1.19,
"learning_rate": 3.0193069497327025e-05,
"loss": 0.9338,
"step": 103000
},
{
"epoch": 1.19,
"learning_rate": 3.00969193492558e-05,
"loss": 0.9576,
"step": 103500
},
{
"epoch": 1.2,
"learning_rate": 3.000076920118457e-05,
"loss": 0.9486,
"step": 104000
},
{
"epoch": 1.21,
"learning_rate": 2.9904619053113344e-05,
"loss": 0.9636,
"step": 104500
},
{
"epoch": 1.21,
"learning_rate": 2.9808468905042114e-05,
"loss": 0.9499,
"step": 105000
},
{
"epoch": 1.22,
"learning_rate": 2.9712318756970888e-05,
"loss": 0.9605,
"step": 105500
},
{
"epoch": 1.22,
"learning_rate": 2.9616168608899658e-05,
"loss": 0.937,
"step": 106000
},
{
"epoch": 1.23,
"learning_rate": 2.952001846082843e-05,
"loss": 0.9469,
"step": 106500
},
{
"epoch": 1.23,
"learning_rate": 2.9423868312757202e-05,
"loss": 0.9519,
"step": 107000
},
{
"epoch": 1.24,
"learning_rate": 2.9327718164685973e-05,
"loss": 0.9548,
"step": 107500
},
{
"epoch": 1.25,
"learning_rate": 2.9231568016614747e-05,
"loss": 0.9411,
"step": 108000
},
{
"epoch": 1.25,
"learning_rate": 2.9135417868543517e-05,
"loss": 0.9549,
"step": 108500
},
{
"epoch": 1.26,
"learning_rate": 2.903926772047229e-05,
"loss": 0.9353,
"step": 109000
},
{
"epoch": 1.26,
"learning_rate": 2.894311757240106e-05,
"loss": 0.9543,
"step": 109500
},
{
"epoch": 1.27,
"learning_rate": 2.8846967424329835e-05,
"loss": 0.9484,
"step": 110000
},
{
"epoch": 1.27,
"learning_rate": 2.8750817276258606e-05,
"loss": 0.9491,
"step": 110500
},
{
"epoch": 1.28,
"learning_rate": 2.865466712818738e-05,
"loss": 0.9495,
"step": 111000
},
{
"epoch": 1.29,
"learning_rate": 2.855851698011615e-05,
"loss": 0.9383,
"step": 111500
},
{
"epoch": 1.29,
"learning_rate": 2.8462366832044924e-05,
"loss": 0.9243,
"step": 112000
},
{
"epoch": 1.3,
"learning_rate": 2.8366216683973694e-05,
"loss": 0.9468,
"step": 112500
},
{
"epoch": 1.3,
"learning_rate": 2.8270066535902468e-05,
"loss": 0.946,
"step": 113000
},
{
"epoch": 1.31,
"learning_rate": 2.817391638783124e-05,
"loss": 0.9511,
"step": 113500
},
{
"epoch": 1.32,
"learning_rate": 2.8077766239760012e-05,
"loss": 0.9264,
"step": 114000
},
{
"epoch": 1.32,
"learning_rate": 2.7981616091688783e-05,
"loss": 0.9368,
"step": 114500
},
{
"epoch": 1.33,
"learning_rate": 2.7885465943617556e-05,
"loss": 0.9461,
"step": 115000
},
{
"epoch": 1.33,
"learning_rate": 2.7789315795546327e-05,
"loss": 0.9422,
"step": 115500
},
{
"epoch": 1.34,
"learning_rate": 2.76931656474751e-05,
"loss": 0.9478,
"step": 116000
},
{
"epoch": 1.34,
"learning_rate": 2.759701549940387e-05,
"loss": 0.9536,
"step": 116500
},
{
"epoch": 1.35,
"learning_rate": 2.7500865351332645e-05,
"loss": 0.9419,
"step": 117000
},
{
"epoch": 1.36,
"learning_rate": 2.7404715203261415e-05,
"loss": 0.9361,
"step": 117500
},
{
"epoch": 1.36,
"learning_rate": 2.730856505519019e-05,
"loss": 0.9397,
"step": 118000
},
{
"epoch": 1.37,
"learning_rate": 2.721241490711896e-05,
"loss": 0.9427,
"step": 118500
},
{
"epoch": 1.37,
"learning_rate": 2.7116264759047733e-05,
"loss": 0.9353,
"step": 119000
},
{
"epoch": 1.38,
"learning_rate": 2.7020114610976504e-05,
"loss": 0.9294,
"step": 119500
},
{
"epoch": 1.38,
"learning_rate": 2.6923964462905278e-05,
"loss": 0.9401,
"step": 120000
},
{
"epoch": 1.39,
"learning_rate": 2.6827814314834048e-05,
"loss": 0.9443,
"step": 120500
},
{
"epoch": 1.4,
"learning_rate": 2.6731664166762815e-05,
"loss": 0.9357,
"step": 121000
},
{
"epoch": 1.4,
"learning_rate": 2.663551401869159e-05,
"loss": 0.9508,
"step": 121500
},
{
"epoch": 1.41,
"learning_rate": 2.653936387062036e-05,
"loss": 0.9391,
"step": 122000
},
{
"epoch": 1.41,
"learning_rate": 2.6443213722549133e-05,
"loss": 0.9471,
"step": 122500
},
{
"epoch": 1.42,
"learning_rate": 2.6347063574477904e-05,
"loss": 0.9323,
"step": 123000
},
{
"epoch": 1.42,
"learning_rate": 2.6250913426406677e-05,
"loss": 0.9275,
"step": 123500
},
{
"epoch": 1.43,
"learning_rate": 2.6154763278335448e-05,
"loss": 0.9461,
"step": 124000
},
{
"epoch": 1.44,
"learning_rate": 2.605861313026422e-05,
"loss": 0.9252,
"step": 124500
},
{
"epoch": 1.44,
"learning_rate": 2.5962462982192992e-05,
"loss": 0.9392,
"step": 125000
},
{
"epoch": 1.45,
"learning_rate": 2.5866312834121766e-05,
"loss": 0.9449,
"step": 125500
},
{
"epoch": 1.45,
"learning_rate": 2.5770162686050536e-05,
"loss": 0.9354,
"step": 126000
},
{
"epoch": 1.46,
"learning_rate": 2.567401253797931e-05,
"loss": 0.9291,
"step": 126500
},
{
"epoch": 1.47,
"learning_rate": 2.557786238990808e-05,
"loss": 0.9141,
"step": 127000
},
{
"epoch": 1.47,
"learning_rate": 2.548171224183685e-05,
"loss": 0.9358,
"step": 127500
},
{
"epoch": 1.48,
"learning_rate": 2.5385562093765625e-05,
"loss": 0.9286,
"step": 128000
},
{
"epoch": 1.48,
"learning_rate": 2.5289411945694395e-05,
"loss": 0.9194,
"step": 128500
},
{
"epoch": 1.49,
"learning_rate": 2.519326179762317e-05,
"loss": 0.9386,
"step": 129000
},
{
"epoch": 1.49,
"learning_rate": 2.509711164955194e-05,
"loss": 0.9239,
"step": 129500
},
{
"epoch": 1.5,
"learning_rate": 2.5000961501480713e-05,
"loss": 0.9418,
"step": 130000
},
{
"epoch": 1.51,
"learning_rate": 2.4904811353409484e-05,
"loss": 0.9197,
"step": 130500
},
{
"epoch": 1.51,
"learning_rate": 2.4808661205338257e-05,
"loss": 0.9265,
"step": 131000
},
{
"epoch": 1.52,
"learning_rate": 2.4712511057267028e-05,
"loss": 0.9294,
"step": 131500
},
{
"epoch": 1.52,
"learning_rate": 2.4616360909195802e-05,
"loss": 0.9153,
"step": 132000
},
{
"epoch": 1.53,
"learning_rate": 2.4520210761124572e-05,
"loss": 0.9201,
"step": 132500
},
{
"epoch": 1.53,
"learning_rate": 2.4424060613053346e-05,
"loss": 0.9222,
"step": 133000
},
{
"epoch": 1.54,
"learning_rate": 2.4327910464982116e-05,
"loss": 0.928,
"step": 133500
},
{
"epoch": 1.55,
"learning_rate": 2.423176031691089e-05,
"loss": 0.9388,
"step": 134000
},
{
"epoch": 1.55,
"learning_rate": 2.413561016883966e-05,
"loss": 0.9244,
"step": 134500
},
{
"epoch": 1.56,
"learning_rate": 2.4039460020768434e-05,
"loss": 0.9152,
"step": 135000
},
{
"epoch": 1.56,
"learning_rate": 2.3943309872697205e-05,
"loss": 0.927,
"step": 135500
},
{
"epoch": 1.57,
"learning_rate": 2.384715972462598e-05,
"loss": 0.9179,
"step": 136000
},
{
"epoch": 1.57,
"learning_rate": 2.375100957655475e-05,
"loss": 0.9242,
"step": 136500
},
{
"epoch": 1.58,
"learning_rate": 2.3654859428483523e-05,
"loss": 0.9155,
"step": 137000
},
{
"epoch": 1.59,
"learning_rate": 2.355870928041229e-05,
"loss": 0.925,
"step": 137500
},
{
"epoch": 1.59,
"learning_rate": 2.3462559132341064e-05,
"loss": 0.9251,
"step": 138000
},
{
"epoch": 1.6,
"learning_rate": 2.3366408984269834e-05,
"loss": 0.929,
"step": 138500
},
{
"epoch": 1.6,
"learning_rate": 2.3270258836198608e-05,
"loss": 0.923,
"step": 139000
},
{
"epoch": 1.61,
"learning_rate": 2.317410868812738e-05,
"loss": 0.9419,
"step": 139500
},
{
"epoch": 1.62,
"learning_rate": 2.3077958540056152e-05,
"loss": 0.9305,
"step": 140000
},
{
"epoch": 1.62,
"learning_rate": 2.2981808391984923e-05,
"loss": 0.9138,
"step": 140500
},
{
"epoch": 1.63,
"learning_rate": 2.2885658243913696e-05,
"loss": 0.9151,
"step": 141000
},
{
"epoch": 1.63,
"learning_rate": 2.2789508095842467e-05,
"loss": 0.9246,
"step": 141500
},
{
"epoch": 1.64,
"learning_rate": 2.269335794777124e-05,
"loss": 0.9279,
"step": 142000
},
{
"epoch": 1.64,
"learning_rate": 2.259720779970001e-05,
"loss": 0.9066,
"step": 142500
},
{
"epoch": 1.65,
"learning_rate": 2.2501057651628785e-05,
"loss": 0.906,
"step": 143000
},
{
"epoch": 1.66,
"learning_rate": 2.2404907503557555e-05,
"loss": 0.9079,
"step": 143500
},
{
"epoch": 1.66,
"learning_rate": 2.230875735548633e-05,
"loss": 0.9161,
"step": 144000
},
{
"epoch": 1.67,
"learning_rate": 2.22126072074151e-05,
"loss": 0.9151,
"step": 144500
},
{
"epoch": 1.67,
"learning_rate": 2.2116457059343873e-05,
"loss": 0.9143,
"step": 145000
},
{
"epoch": 1.68,
"learning_rate": 2.2020306911272644e-05,
"loss": 0.9329,
"step": 145500
},
{
"epoch": 1.68,
"learning_rate": 2.1924156763201418e-05,
"loss": 0.9118,
"step": 146000
},
{
"epoch": 1.69,
"learning_rate": 2.1828006615130188e-05,
"loss": 0.9103,
"step": 146500
},
{
"epoch": 1.7,
"learning_rate": 2.1731856467058962e-05,
"loss": 0.9225,
"step": 147000
},
{
"epoch": 1.7,
"learning_rate": 2.1635706318987732e-05,
"loss": 0.9325,
"step": 147500
},
{
"epoch": 1.71,
"learning_rate": 2.1539556170916506e-05,
"loss": 0.9267,
"step": 148000
},
{
"epoch": 1.71,
"learning_rate": 2.1443406022845273e-05,
"loss": 0.916,
"step": 148500
},
{
"epoch": 1.72,
"learning_rate": 2.1347255874774047e-05,
"loss": 0.904,
"step": 149000
},
{
"epoch": 1.72,
"learning_rate": 2.1251105726702817e-05,
"loss": 0.9097,
"step": 149500
},
{
"epoch": 1.73,
"learning_rate": 2.115495557863159e-05,
"loss": 0.8927,
"step": 150000
},
{
"epoch": 1.74,
"learning_rate": 2.1058805430560362e-05,
"loss": 0.8965,
"step": 150500
},
{
"epoch": 1.74,
"learning_rate": 2.0962655282489136e-05,
"loss": 0.9112,
"step": 151000
},
{
"epoch": 1.75,
"learning_rate": 2.0866505134417906e-05,
"loss": 0.9087,
"step": 151500
},
{
"epoch": 1.75,
"learning_rate": 2.077035498634668e-05,
"loss": 0.8964,
"step": 152000
},
{
"epoch": 1.76,
"learning_rate": 2.067420483827545e-05,
"loss": 0.905,
"step": 152500
},
{
"epoch": 1.77,
"learning_rate": 2.0578054690204224e-05,
"loss": 0.9104,
"step": 153000
},
{
"epoch": 1.77,
"learning_rate": 2.0481904542132994e-05,
"loss": 0.9009,
"step": 153500
},
{
"epoch": 1.78,
"learning_rate": 2.0385754394061768e-05,
"loss": 0.9134,
"step": 154000
},
{
"epoch": 1.78,
"learning_rate": 2.028960424599054e-05,
"loss": 0.8799,
"step": 154500
},
{
"epoch": 1.79,
"learning_rate": 2.0193454097919313e-05,
"loss": 0.8913,
"step": 155000
},
{
"epoch": 1.79,
"learning_rate": 2.0097303949848083e-05,
"loss": 0.8978,
"step": 155500
},
{
"epoch": 1.8,
"learning_rate": 2.0001153801776857e-05,
"loss": 0.9004,
"step": 156000
},
{
"epoch": 1.81,
"learning_rate": 1.9905003653705627e-05,
"loss": 0.9072,
"step": 156500
},
{
"epoch": 1.81,
"learning_rate": 1.98088535056344e-05,
"loss": 0.8987,
"step": 157000
},
{
"epoch": 1.82,
"learning_rate": 1.971270335756317e-05,
"loss": 0.8858,
"step": 157500
},
{
"epoch": 1.82,
"learning_rate": 1.9616553209491945e-05,
"loss": 0.898,
"step": 158000
},
{
"epoch": 1.83,
"learning_rate": 1.9520403061420716e-05,
"loss": 0.897,
"step": 158500
},
{
"epoch": 1.83,
"learning_rate": 1.942425291334949e-05,
"loss": 0.9073,
"step": 159000
},
{
"epoch": 1.84,
"learning_rate": 1.9328102765278257e-05,
"loss": 0.8768,
"step": 159500
},
{
"epoch": 1.85,
"learning_rate": 1.923195261720703e-05,
"loss": 0.8915,
"step": 160000
},
{
"epoch": 1.85,
"learning_rate": 1.91358024691358e-05,
"loss": 0.882,
"step": 160500
},
{
"epoch": 1.86,
"learning_rate": 1.9039652321064575e-05,
"loss": 0.9006,
"step": 161000
},
{
"epoch": 1.86,
"learning_rate": 1.8943502172993345e-05,
"loss": 0.8819,
"step": 161500
},
{
"epoch": 1.87,
"learning_rate": 1.884735202492212e-05,
"loss": 0.9125,
"step": 162000
},
{
"epoch": 1.87,
"learning_rate": 1.875120187685089e-05,
"loss": 0.8786,
"step": 162500
},
{
"epoch": 1.88,
"learning_rate": 1.8655051728779663e-05,
"loss": 0.8794,
"step": 163000
},
{
"epoch": 1.89,
"learning_rate": 1.8558901580708433e-05,
"loss": 0.8776,
"step": 163500
},
{
"epoch": 1.89,
"learning_rate": 1.8462751432637207e-05,
"loss": 0.898,
"step": 164000
},
{
"epoch": 1.9,
"learning_rate": 1.8366601284565978e-05,
"loss": 0.8917,
"step": 164500
},
{
"epoch": 1.9,
"learning_rate": 1.827045113649475e-05,
"loss": 0.9,
"step": 165000
},
{
"epoch": 1.91,
"learning_rate": 1.8174300988423522e-05,
"loss": 0.8923,
"step": 165500
},
{
"epoch": 1.92,
"learning_rate": 1.8078150840352296e-05,
"loss": 0.9107,
"step": 166000
},
{
"epoch": 1.92,
"learning_rate": 1.7982000692281066e-05,
"loss": 0.8976,
"step": 166500
},
{
"epoch": 1.93,
"learning_rate": 1.788585054420984e-05,
"loss": 0.9077,
"step": 167000
},
{
"epoch": 1.93,
"learning_rate": 1.778970039613861e-05,
"loss": 0.8855,
"step": 167500
},
{
"epoch": 1.94,
"learning_rate": 1.7693550248067384e-05,
"loss": 0.89,
"step": 168000
},
{
"epoch": 1.94,
"learning_rate": 1.7597400099996155e-05,
"loss": 0.9004,
"step": 168500
},
{
"epoch": 1.95,
"learning_rate": 1.750124995192493e-05,
"loss": 0.8804,
"step": 169000
},
{
"epoch": 1.96,
"learning_rate": 1.74050998038537e-05,
"loss": 0.9081,
"step": 169500
},
{
"epoch": 1.96,
"learning_rate": 1.7308949655782473e-05,
"loss": 0.9013,
"step": 170000
},
{
"epoch": 1.97,
"learning_rate": 1.721279950771124e-05,
"loss": 0.8874,
"step": 170500
},
{
"epoch": 1.97,
"learning_rate": 1.7116649359640014e-05,
"loss": 0.9167,
"step": 171000
},
{
"epoch": 1.98,
"learning_rate": 1.7020499211568784e-05,
"loss": 0.8841,
"step": 171500
},
{
"epoch": 1.98,
"learning_rate": 1.6924349063497558e-05,
"loss": 0.8982,
"step": 172000
},
{
"epoch": 1.99,
"learning_rate": 1.6828198915426328e-05,
"loss": 0.8789,
"step": 172500
},
{
"epoch": 2.0,
"learning_rate": 1.6732048767355102e-05,
"loss": 0.8933,
"step": 173000
},
{
"epoch": 2.0,
"learning_rate": 1.6635898619283873e-05,
"loss": 0.8458,
"step": 173500
},
{
"epoch": 2.01,
"learning_rate": 1.6539748471212646e-05,
"loss": 0.7751,
"step": 174000
},
{
"epoch": 2.01,
"learning_rate": 1.6443598323141417e-05,
"loss": 0.7772,
"step": 174500
},
{
"epoch": 2.02,
"learning_rate": 1.634744817507019e-05,
"loss": 0.7611,
"step": 175000
},
{
"epoch": 2.02,
"learning_rate": 1.625129802699896e-05,
"loss": 0.7745,
"step": 175500
},
{
"epoch": 2.03,
"learning_rate": 1.6155147878927735e-05,
"loss": 0.7699,
"step": 176000
},
{
"epoch": 2.04,
"learning_rate": 1.6058997730856505e-05,
"loss": 0.7727,
"step": 176500
},
{
"epoch": 2.04,
"learning_rate": 1.596284758278528e-05,
"loss": 0.7907,
"step": 177000
},
{
"epoch": 2.05,
"learning_rate": 1.586669743471405e-05,
"loss": 0.7675,
"step": 177500
},
{
"epoch": 2.05,
"learning_rate": 1.5770547286642823e-05,
"loss": 0.7707,
"step": 178000
},
{
"epoch": 2.06,
"learning_rate": 1.5674397138571594e-05,
"loss": 0.7717,
"step": 178500
},
{
"epoch": 2.07,
"learning_rate": 1.5578246990500368e-05,
"loss": 0.75,
"step": 179000
},
{
"epoch": 2.07,
"learning_rate": 1.5482096842429138e-05,
"loss": 0.7788,
"step": 179500
},
{
"epoch": 2.08,
"learning_rate": 1.5385946694357912e-05,
"loss": 0.7711,
"step": 180000
},
{
"epoch": 2.08,
"learning_rate": 1.5289796546286682e-05,
"loss": 0.768,
"step": 180500
},
{
"epoch": 2.09,
"learning_rate": 1.5193646398215456e-05,
"loss": 0.7748,
"step": 181000
},
{
"epoch": 2.09,
"learning_rate": 1.5097496250144225e-05,
"loss": 0.7892,
"step": 181500
},
{
"epoch": 2.1,
"learning_rate": 1.5001346102072997e-05,
"loss": 0.7742,
"step": 182000
},
{
"epoch": 2.11,
"learning_rate": 1.4905195954001769e-05,
"loss": 0.7751,
"step": 182500
},
{
"epoch": 2.11,
"learning_rate": 1.4809045805930541e-05,
"loss": 0.7782,
"step": 183000
},
{
"epoch": 2.12,
"learning_rate": 1.4712895657859313e-05,
"loss": 0.7731,
"step": 183500
},
{
"epoch": 2.12,
"learning_rate": 1.4616745509788085e-05,
"loss": 0.7739,
"step": 184000
},
{
"epoch": 2.13,
"learning_rate": 1.4520595361716857e-05,
"loss": 0.7677,
"step": 184500
},
{
"epoch": 2.13,
"learning_rate": 1.442444521364563e-05,
"loss": 0.7643,
"step": 185000
},
{
"epoch": 2.14,
"learning_rate": 1.4328295065574402e-05,
"loss": 0.7814,
"step": 185500
},
{
"epoch": 2.15,
"learning_rate": 1.4232144917503174e-05,
"loss": 0.7742,
"step": 186000
},
{
"epoch": 2.15,
"learning_rate": 1.4135994769431946e-05,
"loss": 0.7682,
"step": 186500
},
{
"epoch": 2.16,
"learning_rate": 1.4039844621360718e-05,
"loss": 0.7803,
"step": 187000
},
{
"epoch": 2.16,
"learning_rate": 1.394369447328949e-05,
"loss": 0.7869,
"step": 187500
},
{
"epoch": 2.17,
"learning_rate": 1.3847544325218262e-05,
"loss": 0.7779,
"step": 188000
},
{
"epoch": 2.17,
"learning_rate": 1.3751394177147034e-05,
"loss": 0.7697,
"step": 188500
},
{
"epoch": 2.18,
"learning_rate": 1.3655244029075807e-05,
"loss": 0.7836,
"step": 189000
},
{
"epoch": 2.19,
"learning_rate": 1.3559093881004579e-05,
"loss": 0.7681,
"step": 189500
},
{
"epoch": 2.19,
"learning_rate": 1.346294373293335e-05,
"loss": 0.7792,
"step": 190000
},
{
"epoch": 2.2,
"learning_rate": 1.3366793584862123e-05,
"loss": 0.7801,
"step": 190500
},
{
"epoch": 2.2,
"learning_rate": 1.3270643436790895e-05,
"loss": 0.7693,
"step": 191000
},
{
"epoch": 2.21,
"learning_rate": 1.3174493288719667e-05,
"loss": 0.7708,
"step": 191500
},
{
"epoch": 2.22,
"learning_rate": 1.307834314064844e-05,
"loss": 0.7735,
"step": 192000
},
{
"epoch": 2.22,
"learning_rate": 1.2982192992577208e-05,
"loss": 0.7638,
"step": 192500
},
{
"epoch": 2.23,
"learning_rate": 1.288604284450598e-05,
"loss": 0.766,
"step": 193000
},
{
"epoch": 2.23,
"learning_rate": 1.2789892696434752e-05,
"loss": 0.7737,
"step": 193500
},
{
"epoch": 2.24,
"learning_rate": 1.2693742548363524e-05,
"loss": 0.7823,
"step": 194000
},
{
"epoch": 2.24,
"learning_rate": 1.2597592400292297e-05,
"loss": 0.7772,
"step": 194500
},
{
"epoch": 2.25,
"learning_rate": 1.2501442252221069e-05,
"loss": 0.7648,
"step": 195000
},
{
"epoch": 2.26,
"learning_rate": 1.240529210414984e-05,
"loss": 0.7803,
"step": 195500
},
{
"epoch": 2.26,
"learning_rate": 1.2309141956078613e-05,
"loss": 0.7615,
"step": 196000
},
{
"epoch": 2.27,
"learning_rate": 1.2212991808007385e-05,
"loss": 0.7523,
"step": 196500
},
{
"epoch": 2.27,
"learning_rate": 1.2116841659936157e-05,
"loss": 0.772,
"step": 197000
},
{
"epoch": 2.28,
"learning_rate": 1.202069151186493e-05,
"loss": 0.7659,
"step": 197500
},
{
"epoch": 2.28,
"learning_rate": 1.1924541363793701e-05,
"loss": 0.7674,
"step": 198000
},
{
"epoch": 2.29,
"learning_rate": 1.1828391215722473e-05,
"loss": 0.7698,
"step": 198500
},
{
"epoch": 2.3,
"learning_rate": 1.1732241067651246e-05,
"loss": 0.7626,
"step": 199000
},
{
"epoch": 2.3,
"learning_rate": 1.1636090919580018e-05,
"loss": 0.7697,
"step": 199500
},
{
"epoch": 2.31,
"learning_rate": 1.153994077150879e-05,
"loss": 0.7707,
"step": 200000
},
{
"epoch": 2.31,
"learning_rate": 1.144379062343756e-05,
"loss": 0.7798,
"step": 200500
},
{
"epoch": 2.32,
"learning_rate": 1.1347640475366332e-05,
"loss": 0.7816,
"step": 201000
},
{
"epoch": 2.32,
"learning_rate": 1.1251490327295105e-05,
"loss": 0.771,
"step": 201500
},
{
"epoch": 2.33,
"learning_rate": 1.1155340179223877e-05,
"loss": 0.7622,
"step": 202000
},
{
"epoch": 2.34,
"learning_rate": 1.1059190031152649e-05,
"loss": 0.7726,
"step": 202500
},
{
"epoch": 2.34,
"learning_rate": 1.0963039883081421e-05,
"loss": 0.7529,
"step": 203000
},
{
"epoch": 2.35,
"learning_rate": 1.0866889735010193e-05,
"loss": 0.7717,
"step": 203500
},
{
"epoch": 2.35,
"learning_rate": 1.0770739586938965e-05,
"loss": 0.7672,
"step": 204000
},
{
"epoch": 2.36,
"learning_rate": 1.0674589438867737e-05,
"loss": 0.7554,
"step": 204500
},
{
"epoch": 2.37,
"learning_rate": 1.057843929079651e-05,
"loss": 0.764,
"step": 205000
},
{
"epoch": 2.37,
"learning_rate": 1.0482289142725281e-05,
"loss": 0.7489,
"step": 205500
},
{
"epoch": 2.38,
"learning_rate": 1.0386138994654052e-05,
"loss": 0.7656,
"step": 206000
},
{
"epoch": 2.38,
"learning_rate": 1.0289988846582824e-05,
"loss": 0.7638,
"step": 206500
},
{
"epoch": 2.39,
"learning_rate": 1.0193838698511596e-05,
"loss": 0.7664,
"step": 207000
},
{
"epoch": 2.39,
"learning_rate": 1.0097688550440368e-05,
"loss": 0.7607,
"step": 207500
},
{
"epoch": 2.4,
"learning_rate": 1.000153840236914e-05,
"loss": 0.7649,
"step": 208000
},
{
"epoch": 2.41,
"learning_rate": 9.905388254297913e-06,
"loss": 0.7673,
"step": 208500
},
{
"epoch": 2.41,
"learning_rate": 9.809238106226685e-06,
"loss": 0.7509,
"step": 209000
},
{
"epoch": 2.42,
"learning_rate": 9.713087958155457e-06,
"loss": 0.7552,
"step": 209500
},
{
"epoch": 2.42,
"learning_rate": 9.616937810084229e-06,
"loss": 0.7632,
"step": 210000
},
{
"epoch": 2.43,
"learning_rate": 9.520787662013001e-06,
"loss": 0.7499,
"step": 210500
},
{
"epoch": 2.43,
"learning_rate": 9.424637513941773e-06,
"loss": 0.7441,
"step": 211000
},
{
"epoch": 2.44,
"learning_rate": 9.328487365870544e-06,
"loss": 0.7666,
"step": 211500
},
{
"epoch": 2.45,
"learning_rate": 9.232337217799316e-06,
"loss": 0.7629,
"step": 212000
},
{
"epoch": 2.45,
"learning_rate": 9.136187069728088e-06,
"loss": 0.76,
"step": 212500
},
{
"epoch": 2.46,
"learning_rate": 9.04003692165686e-06,
"loss": 0.7484,
"step": 213000
},
{
"epoch": 2.46,
"learning_rate": 8.943886773585632e-06,
"loss": 0.7486,
"step": 213500
},
{
"epoch": 2.47,
"learning_rate": 8.847736625514404e-06,
"loss": 0.769,
"step": 214000
},
{
"epoch": 2.47,
"learning_rate": 8.751586477443176e-06,
"loss": 0.7488,
"step": 214500
},
{
"epoch": 2.48,
"learning_rate": 8.655436329371948e-06,
"loss": 0.7639,
"step": 215000
},
{
"epoch": 2.49,
"learning_rate": 8.55928618130072e-06,
"loss": 0.7822,
"step": 215500
},
{
"epoch": 2.49,
"learning_rate": 8.463136033229493e-06,
"loss": 0.7591,
"step": 216000
},
{
"epoch": 2.5,
"learning_rate": 8.366985885158265e-06,
"loss": 0.7733,
"step": 216500
},
{
"epoch": 2.5,
"learning_rate": 8.270835737087035e-06,
"loss": 0.7594,
"step": 217000
},
{
"epoch": 2.51,
"learning_rate": 8.174685589015807e-06,
"loss": 0.7451,
"step": 217500
},
{
"epoch": 2.52,
"learning_rate": 8.07853544094458e-06,
"loss": 0.7477,
"step": 218000
},
{
"epoch": 2.52,
"learning_rate": 7.982385292873352e-06,
"loss": 0.7584,
"step": 218500
},
{
"epoch": 2.53,
"learning_rate": 7.886235144802124e-06,
"loss": 0.7598,
"step": 219000
},
{
"epoch": 2.53,
"learning_rate": 7.790084996730896e-06,
"loss": 0.7593,
"step": 219500
},
{
"epoch": 2.54,
"learning_rate": 7.693934848659668e-06,
"loss": 0.7528,
"step": 220000
},
{
"epoch": 2.54,
"learning_rate": 7.597784700588439e-06,
"loss": 0.7416,
"step": 220500
},
{
"epoch": 2.55,
"learning_rate": 7.501634552517211e-06,
"loss": 0.7163,
"step": 221000
},
{
"epoch": 2.56,
"learning_rate": 7.4054844044459834e-06,
"loss": 0.7674,
"step": 221500
},
{
"epoch": 2.56,
"learning_rate": 7.3093342563747556e-06,
"loss": 0.7475,
"step": 222000
},
{
"epoch": 2.57,
"learning_rate": 7.213184108303527e-06,
"loss": 0.7462,
"step": 222500
},
{
"epoch": 2.57,
"learning_rate": 7.117033960232299e-06,
"loss": 0.7629,
"step": 223000
},
{
"epoch": 2.58,
"learning_rate": 7.020883812161071e-06,
"loss": 0.7414,
"step": 223500
},
{
"epoch": 2.58,
"learning_rate": 6.924733664089842e-06,
"loss": 0.7661,
"step": 224000
},
{
"epoch": 2.59,
"learning_rate": 6.8285835160186145e-06,
"loss": 0.7772,
"step": 224500
},
{
"epoch": 2.6,
"learning_rate": 6.732433367947387e-06,
"loss": 0.7511,
"step": 225000
},
{
"epoch": 2.6,
"learning_rate": 6.636283219876159e-06,
"loss": 0.7655,
"step": 225500
},
{
"epoch": 2.61,
"learning_rate": 6.540133071804931e-06,
"loss": 0.7523,
"step": 226000
},
{
"epoch": 2.61,
"learning_rate": 6.443982923733703e-06,
"loss": 0.7469,
"step": 226500
},
{
"epoch": 2.62,
"learning_rate": 6.347832775662475e-06,
"loss": 0.7464,
"step": 227000
},
{
"epoch": 2.62,
"learning_rate": 6.251682627591247e-06,
"loss": 0.7576,
"step": 227500
},
{
"epoch": 2.63,
"learning_rate": 6.1555324795200185e-06,
"loss": 0.749,
"step": 228000
},
{
"epoch": 2.64,
"learning_rate": 6.059382331448791e-06,
"loss": 0.738,
"step": 228500
},
{
"epoch": 2.64,
"learning_rate": 5.963232183377563e-06,
"loss": 0.7549,
"step": 229000
},
{
"epoch": 2.65,
"learning_rate": 5.867082035306334e-06,
"loss": 0.7563,
"step": 229500
},
{
"epoch": 2.65,
"learning_rate": 5.770931887235106e-06,
"loss": 0.7334,
"step": 230000
},
{
"epoch": 2.66,
"learning_rate": 5.674781739163878e-06,
"loss": 0.7488,
"step": 230500
},
{
"epoch": 2.67,
"learning_rate": 5.57863159109265e-06,
"loss": 0.7462,
"step": 231000
},
{
"epoch": 2.67,
"learning_rate": 5.4824814430214225e-06,
"loss": 0.7757,
"step": 231500
},
{
"epoch": 2.68,
"learning_rate": 5.386331294950194e-06,
"loss": 0.7534,
"step": 232000
},
{
"epoch": 2.68,
"learning_rate": 5.290181146878966e-06,
"loss": 0.7527,
"step": 232500
},
{
"epoch": 2.69,
"learning_rate": 5.194030998807738e-06,
"loss": 0.7371,
"step": 233000
},
{
"epoch": 2.69,
"learning_rate": 5.09788085073651e-06,
"loss": 0.7477,
"step": 233500
},
{
"epoch": 2.7,
"learning_rate": 5.001730702665282e-06,
"loss": 0.7467,
"step": 234000
},
{
"epoch": 2.71,
"learning_rate": 4.905580554594054e-06,
"loss": 0.7448,
"step": 234500
},
{
"epoch": 2.71,
"learning_rate": 4.809430406522826e-06,
"loss": 0.7473,
"step": 235000
},
{
"epoch": 2.72,
"learning_rate": 4.713280258451598e-06,
"loss": 0.7498,
"step": 235500
},
{
"epoch": 2.72,
"learning_rate": 4.61713011038037e-06,
"loss": 0.73,
"step": 236000
},
{
"epoch": 2.73,
"learning_rate": 4.520979962309142e-06,
"loss": 0.7356,
"step": 236500
},
{
"epoch": 2.73,
"learning_rate": 4.424829814237914e-06,
"loss": 0.7358,
"step": 237000
},
{
"epoch": 2.74,
"learning_rate": 4.328679666166685e-06,
"loss": 0.7377,
"step": 237500
},
{
"epoch": 2.75,
"learning_rate": 4.2325295180954575e-06,
"loss": 0.7381,
"step": 238000
},
{
"epoch": 2.75,
"learning_rate": 4.13637937002423e-06,
"loss": 0.7542,
"step": 238500
},
{
"epoch": 2.76,
"learning_rate": 4.040229221953002e-06,
"loss": 0.7445,
"step": 239000
},
{
"epoch": 2.76,
"learning_rate": 3.944079073881774e-06,
"loss": 0.7493,
"step": 239500
},
{
"epoch": 2.77,
"learning_rate": 3.847928925810546e-06,
"loss": 0.7251,
"step": 240000
},
{
"epoch": 2.77,
"learning_rate": 3.7517787777393177e-06,
"loss": 0.7538,
"step": 240500
},
{
"epoch": 2.78,
"learning_rate": 3.65562862966809e-06,
"loss": 0.7415,
"step": 241000
},
{
"epoch": 2.79,
"learning_rate": 3.559478481596862e-06,
"loss": 0.7514,
"step": 241500
},
{
"epoch": 2.79,
"learning_rate": 3.463328333525634e-06,
"loss": 0.7354,
"step": 242000
},
{
"epoch": 2.8,
"learning_rate": 3.367178185454406e-06,
"loss": 0.7417,
"step": 242500
},
{
"epoch": 2.8,
"learning_rate": 3.2710280373831774e-06,
"loss": 0.7465,
"step": 243000
},
{
"epoch": 2.81,
"learning_rate": 3.1748778893119496e-06,
"loss": 0.7365,
"step": 243500
},
{
"epoch": 2.82,
"learning_rate": 3.0787277412407217e-06,
"loss": 0.7406,
"step": 244000
},
{
"epoch": 2.82,
"learning_rate": 2.982577593169494e-06,
"loss": 0.7478,
"step": 244500
},
{
"epoch": 2.83,
"learning_rate": 2.8864274450982655e-06,
"loss": 0.7535,
"step": 245000
},
{
"epoch": 2.83,
"learning_rate": 2.7902772970270376e-06,
"loss": 0.7387,
"step": 245500
},
{
"epoch": 2.84,
"learning_rate": 2.6941271489558097e-06,
"loss": 0.7416,
"step": 246000
},
{
"epoch": 2.84,
"learning_rate": 2.5979770008845814e-06,
"loss": 0.7439,
"step": 246500
},
{
"epoch": 2.85,
"learning_rate": 2.5018268528133536e-06,
"loss": 0.7606,
"step": 247000
},
{
"epoch": 2.86,
"learning_rate": 2.4056767047421257e-06,
"loss": 0.7316,
"step": 247500
},
{
"epoch": 2.86,
"learning_rate": 2.3095265566708974e-06,
"loss": 0.7409,
"step": 248000
},
{
"epoch": 2.87,
"learning_rate": 2.2133764085996695e-06,
"loss": 0.7414,
"step": 248500
},
{
"epoch": 2.87,
"learning_rate": 2.1172262605284416e-06,
"loss": 0.7496,
"step": 249000
},
{
"epoch": 2.88,
"learning_rate": 2.0210761124572133e-06,
"loss": 0.7389,
"step": 249500
},
{
"epoch": 2.88,
"learning_rate": 1.9249259643859854e-06,
"loss": 0.7486,
"step": 250000
},
{
"epoch": 2.89,
"learning_rate": 1.8287758163147571e-06,
"loss": 0.7439,
"step": 250500
},
{
"epoch": 2.9,
"learning_rate": 1.732625668243529e-06,
"loss": 0.7463,
"step": 251000
},
{
"epoch": 2.9,
"learning_rate": 1.6364755201723012e-06,
"loss": 0.739,
"step": 251500
},
{
"epoch": 2.91,
"learning_rate": 1.540325372101073e-06,
"loss": 0.7424,
"step": 252000
},
{
"epoch": 2.91,
"learning_rate": 1.444175224029845e-06,
"loss": 0.7398,
"step": 252500
},
{
"epoch": 2.92,
"learning_rate": 1.348025075958617e-06,
"loss": 0.7307,
"step": 253000
},
{
"epoch": 2.92,
"learning_rate": 1.251874927887389e-06,
"loss": 0.7204,
"step": 253500
},
{
"epoch": 2.93,
"learning_rate": 1.155724779816161e-06,
"loss": 0.7299,
"step": 254000
},
{
"epoch": 2.94,
"learning_rate": 1.0595746317449328e-06,
"loss": 0.7319,
"step": 254500
},
{
"epoch": 2.94,
"learning_rate": 9.634244836737047e-07,
"loss": 0.7451,
"step": 255000
},
{
"epoch": 2.95,
"learning_rate": 8.67274335602477e-07,
"loss": 0.7379,
"step": 255500
},
{
"epoch": 2.95,
"learning_rate": 7.711241875312489e-07,
"loss": 0.7209,
"step": 256000
},
{
"epoch": 2.96,
"learning_rate": 6.749740394600208e-07,
"loss": 0.7311,
"step": 256500
},
{
"epoch": 2.97,
"learning_rate": 5.788238913887928e-07,
"loss": 0.7405,
"step": 257000
},
{
"epoch": 2.97,
"learning_rate": 4.826737433175648e-07,
"loss": 0.7318,
"step": 257500
},
{
"epoch": 2.98,
"learning_rate": 3.865235952463367e-07,
"loss": 0.7411,
"step": 258000
},
{
"epoch": 2.98,
"learning_rate": 2.9037344717510864e-07,
"loss": 0.7262,
"step": 258500
},
{
"epoch": 2.99,
"learning_rate": 1.9422329910388063e-07,
"loss": 0.7422,
"step": 259000
},
{
"epoch": 2.99,
"learning_rate": 9.80731510326526e-08,
"loss": 0.7411,
"step": 259500
},
{
"epoch": 3.0,
"learning_rate": 1.9230029614245606e-09,
"loss": 0.7414,
"step": 260000
},
{
"epoch": 3.0,
"step": 260010,
"total_flos": 4.586582510270546e+17,
"train_loss": 0.9812617931965292,
"train_runtime": 72588.7542,
"train_samples_per_second": 35.82,
"train_steps_per_second": 3.582
}
],
"max_steps": 260010,
"num_train_epochs": 3,
"total_flos": 4.586582510270546e+17,
"trial_name": null,
"trial_params": null
}