|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9646153910676615, |
|
"global_step": 690000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.998839665544078e-05, |
|
"loss": 1.4934, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9976746711104215e-05, |
|
"loss": 1.3318, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.996509676676765e-05, |
|
"loss": 1.2797, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.995344682243108e-05, |
|
"loss": 1.244, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.994186677776054e-05, |
|
"loss": 1.2432, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.993026343320132e-05, |
|
"loss": 1.2192, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.991863678875343e-05, |
|
"loss": 1.4231, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.990701014430553e-05, |
|
"loss": 1.6854, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.989536019996897e-05, |
|
"loss": 1.3492, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.98837102556324e-05, |
|
"loss": 1.3758, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.987206031129584e-05, |
|
"loss": 1.2596, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.986041036695927e-05, |
|
"loss": 1.5168, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9848760422622705e-05, |
|
"loss": 1.465, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.983713377817481e-05, |
|
"loss": 1.408, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.982550713372692e-05, |
|
"loss": 1.6209, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.981385718939035e-05, |
|
"loss": 1.681, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.980220724505378e-05, |
|
"loss": 1.6519, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.979055730071722e-05, |
|
"loss": 1.8553, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9778907356380654e-05, |
|
"loss": 1.7466, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.976728071193276e-05, |
|
"loss": 1.7394, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9755630767596196e-05, |
|
"loss": 1.648, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.974398082325963e-05, |
|
"loss": 1.788, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.973233087892306e-05, |
|
"loss": 2.0314, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.972068093458649e-05, |
|
"loss": 1.7554, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.97090542901386e-05, |
|
"loss": 1.66, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.969740434580204e-05, |
|
"loss": 1.5643, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9685777701354144e-05, |
|
"loss": 1.4357, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.967412775701758e-05, |
|
"loss": 1.4221, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9662477812681016e-05, |
|
"loss": 1.3905, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.965082786834445e-05, |
|
"loss": 1.5023, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.963917792400788e-05, |
|
"loss": 1.298, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.962752797967132e-05, |
|
"loss": 1.1518, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.961587803533475e-05, |
|
"loss": 1.1188, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9604228090998185e-05, |
|
"loss": 1.1016, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.959257814666162e-05, |
|
"loss": 1.0946, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.958092820232505e-05, |
|
"loss": 1.0894, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.956927825798849e-05, |
|
"loss": 1.07, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.955762831365192e-05, |
|
"loss": 1.0765, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.954597836931535e-05, |
|
"loss": 1.1306, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9534328424978786e-05, |
|
"loss": 1.0957, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.952267848064222e-05, |
|
"loss": 1.0914, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.951102853630565e-05, |
|
"loss": 1.0757, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.949937859196909e-05, |
|
"loss": 1.0464, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.948772864763253e-05, |
|
"loss": 1.0437, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.947610200318463e-05, |
|
"loss": 1.0503, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9464452058848064e-05, |
|
"loss": 1.0416, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9452802114511496e-05, |
|
"loss": 1.0405, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.944115217017493e-05, |
|
"loss": 1.0279, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.942950222583837e-05, |
|
"loss": 1.0252, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.94178522815018e-05, |
|
"loss": 1.0149, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.940620233716524e-05, |
|
"loss": 1.0163, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.939455239282867e-05, |
|
"loss": 1.0117, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9382902448492104e-05, |
|
"loss": 1.0097, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9371252504155536e-05, |
|
"loss": 1.0097, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9359602559818975e-05, |
|
"loss": 1.0041, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.934795261548241e-05, |
|
"loss": 1.008, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.933630267114584e-05, |
|
"loss": 1.0131, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.932465272680927e-05, |
|
"loss": 1.0003, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.931302608236138e-05, |
|
"loss": 1.0064, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9301376138024815e-05, |
|
"loss": 1.0111, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.928974949357692e-05, |
|
"loss": 1.0034, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9278099549240357e-05, |
|
"loss": 1.0013, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.926644960490379e-05, |
|
"loss": 0.9905, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.92548229604559e-05, |
|
"loss": 0.992, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.924317301611933e-05, |
|
"loss": 0.9861, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.923152307178276e-05, |
|
"loss": 0.9846, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9219873127446196e-05, |
|
"loss": 0.9831, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.920822318310963e-05, |
|
"loss": 0.985, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.919657323877307e-05, |
|
"loss": 0.9822, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.91849232944365e-05, |
|
"loss": 0.9923, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.917327335009994e-05, |
|
"loss": 0.9875, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.916162340576337e-05, |
|
"loss": 0.9812, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9149973461426804e-05, |
|
"loss": 0.9751, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.913832351709024e-05, |
|
"loss": 0.9709, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9126673572753675e-05, |
|
"loss": 0.984, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.911502362841711e-05, |
|
"loss": 0.9851, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.910337368408054e-05, |
|
"loss": 0.9652, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.909172373974397e-05, |
|
"loss": 0.9695, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9080073795407405e-05, |
|
"loss": 0.9655, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9068423851070844e-05, |
|
"loss": 0.9629, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9056773906734276e-05, |
|
"loss": 0.956, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.904512396239771e-05, |
|
"loss": 0.9642, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.903347401806114e-05, |
|
"loss": 0.9581, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.902182407372457e-05, |
|
"loss": 0.9582, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.901017412938801e-05, |
|
"loss": 0.9511, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8998524185051445e-05, |
|
"loss": 0.9475, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8986944140380895e-05, |
|
"loss": 0.9716, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8975317495933005e-05, |
|
"loss": 0.9773, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8963667551596444e-05, |
|
"loss": 0.9565, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8952017607259876e-05, |
|
"loss": 0.9489, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8940390962811986e-05, |
|
"loss": 0.9542, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.892874101847542e-05, |
|
"loss": 0.9575, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.891711437402752e-05, |
|
"loss": 0.954, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8905464429690954e-05, |
|
"loss": 0.9449, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8893814485354386e-05, |
|
"loss": 0.9468, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8882164541017825e-05, |
|
"loss": 0.9457, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.887051459668126e-05, |
|
"loss": 0.9456, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.88588646523447e-05, |
|
"loss": 0.9636, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.884721470800813e-05, |
|
"loss": 0.9583, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.883558806356023e-05, |
|
"loss": 0.9474, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.882396141911234e-05, |
|
"loss": 1.0342, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8812311474775774e-05, |
|
"loss": 1.0187, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.880066153043921e-05, |
|
"loss": 0.9742, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8789011586102645e-05, |
|
"loss": 0.9483, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.877736164176608e-05, |
|
"loss": 0.9584, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.876571169742951e-05, |
|
"loss": 0.959, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.875406175309294e-05, |
|
"loss": 0.9821, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8742411808756375e-05, |
|
"loss": 0.9683, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8730785164308485e-05, |
|
"loss": 0.9494, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8719135219971924e-05, |
|
"loss": 0.9441, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8707485275635356e-05, |
|
"loss": 0.9676, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.869583533129879e-05, |
|
"loss": 0.9357, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.868418538696222e-05, |
|
"loss": 0.9492, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.867253544262565e-05, |
|
"loss": 0.9378, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.866088549828909e-05, |
|
"loss": 0.9318, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8649235553952525e-05, |
|
"loss": 0.9271, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.863758560961596e-05, |
|
"loss": 0.9432, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8625935665279396e-05, |
|
"loss": 0.931, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.861428572094283e-05, |
|
"loss": 0.9315, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.860263577660626e-05, |
|
"loss": 0.9427, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.859100913215837e-05, |
|
"loss": 0.9496, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.85793591878218e-05, |
|
"loss": 0.9518, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.8567709243485235e-05, |
|
"loss": 0.933, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.855605929914867e-05, |
|
"loss": 0.9394, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.854443265470078e-05, |
|
"loss": 0.9464, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.853278271036421e-05, |
|
"loss": 0.9254, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.852113276602764e-05, |
|
"loss": 0.9263, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.850948282169108e-05, |
|
"loss": 0.9255, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.8497832877354514e-05, |
|
"loss": 0.9209, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.8486182933017946e-05, |
|
"loss": 0.9194, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.847453298868138e-05, |
|
"loss": 0.9198, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.846288304434482e-05, |
|
"loss": 0.9218, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.845123310000825e-05, |
|
"loss": 0.9217, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.843958315567169e-05, |
|
"loss": 0.9279, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.842793321133512e-05, |
|
"loss": 0.9383, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8416283266998554e-05, |
|
"loss": 0.9232, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8404633322661986e-05, |
|
"loss": 0.9182, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.839300667821409e-05, |
|
"loss": 0.9315, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.838135673387753e-05, |
|
"loss": 0.9224, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.836973008942963e-05, |
|
"loss": 0.9153, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.835808014509307e-05, |
|
"loss": 0.9168, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.83464302007565e-05, |
|
"loss": 0.9121, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8334780256419935e-05, |
|
"loss": 0.9196, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.832313031208337e-05, |
|
"loss": 0.9037, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8311480367746806e-05, |
|
"loss": 0.9065, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.829983042341024e-05, |
|
"loss": 0.9086, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.828818047907368e-05, |
|
"loss": 0.9017, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.827653053473711e-05, |
|
"loss": 0.9111, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.826488059040054e-05, |
|
"loss": 0.9074, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8253230646063975e-05, |
|
"loss": 0.9048, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.824158070172741e-05, |
|
"loss": 0.9062, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8229930757390847e-05, |
|
"loss": 0.9056, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.821830411294295e-05, |
|
"loss": 0.9076, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.820665416860639e-05, |
|
"loss": 0.9077, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.819502752415849e-05, |
|
"loss": 0.9056, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8183377579821924e-05, |
|
"loss": 0.9144, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8171727635485356e-05, |
|
"loss": 0.9062, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.816007769114879e-05, |
|
"loss": 0.9229, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.814842774681223e-05, |
|
"loss": 0.9129, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.813677780247566e-05, |
|
"loss": 0.9158, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.81251278581391e-05, |
|
"loss": 0.9036, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.81135012136912e-05, |
|
"loss": 0.9322, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8101851269354634e-05, |
|
"loss": 0.9122, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.809020132501807e-05, |
|
"loss": 0.9021, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.8078551380681506e-05, |
|
"loss": 0.8997, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.806690143634494e-05, |
|
"loss": 0.901, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.805525149200837e-05, |
|
"loss": 0.8983, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.804360154767181e-05, |
|
"loss": 0.898, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.803195160333524e-05, |
|
"loss": 0.8959, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.802034825877602e-05, |
|
"loss": 0.9191, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.8008698314439455e-05, |
|
"loss": 0.9007, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.799704837010289e-05, |
|
"loss": 0.8908, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7985398425766326e-05, |
|
"loss": 0.9045, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.797374848142976e-05, |
|
"loss": 0.8994, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.796209853709319e-05, |
|
"loss": 0.8981, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.795044859275662e-05, |
|
"loss": 0.8943, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7938798648420056e-05, |
|
"loss": 0.9004, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7927148704083495e-05, |
|
"loss": 0.8906, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.791549875974693e-05, |
|
"loss": 0.8881, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.790384881541036e-05, |
|
"loss": 0.889, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.78921988710738e-05, |
|
"loss": 0.8866, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.788054892673723e-05, |
|
"loss": 0.8868, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.7868922282289334e-05, |
|
"loss": 0.8909, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.785727233795277e-05, |
|
"loss": 0.8825, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.7845622393616205e-05, |
|
"loss": 0.884, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.783397244927964e-05, |
|
"loss": 0.8906, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.782232250494307e-05, |
|
"loss": 0.8841, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.781067256060651e-05, |
|
"loss": 0.8847, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.779902261626994e-05, |
|
"loss": 0.8936, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.7787395971822045e-05, |
|
"loss": 0.8905, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.7775769327374154e-05, |
|
"loss": 0.8817, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.776411938303759e-05, |
|
"loss": 0.8854, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.7752492738589696e-05, |
|
"loss": 0.8862, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7740842794253135e-05, |
|
"loss": 0.8958, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.772919284991657e-05, |
|
"loss": 0.9013, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.771754290558e-05, |
|
"loss": 0.8853, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.770589296124343e-05, |
|
"loss": 0.8809, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.769424301690687e-05, |
|
"loss": 0.8817, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7682593072570304e-05, |
|
"loss": 0.8817, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7670943128233736e-05, |
|
"loss": 0.881, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.765929318389717e-05, |
|
"loss": 0.9004, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.76476432395606e-05, |
|
"loss": 0.8794, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7635993295224033e-05, |
|
"loss": 0.8715, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.762434335088747e-05, |
|
"loss": 0.8809, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7612693406550905e-05, |
|
"loss": 0.8814, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.760104346221434e-05, |
|
"loss": 0.8783, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.758939351787777e-05, |
|
"loss": 0.8761, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.757776687342988e-05, |
|
"loss": 0.8793, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.756611692909331e-05, |
|
"loss": 0.8768, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.755446698475675e-05, |
|
"loss": 0.8768, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.754281704042018e-05, |
|
"loss": 0.8745, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7531167096083616e-05, |
|
"loss": 0.8727, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7519563751524396e-05, |
|
"loss": 0.8746, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7507913807187835e-05, |
|
"loss": 0.8743, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.749626386285127e-05, |
|
"loss": 0.8775, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.74846139185147e-05, |
|
"loss": 0.8712, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.747296397417814e-05, |
|
"loss": 0.8725, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.746133732973024e-05, |
|
"loss": 0.8777, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7449687385393674e-05, |
|
"loss": 0.8891, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7438037441057106e-05, |
|
"loss": 0.8747, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7426387496720546e-05, |
|
"loss": 0.8706, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.741476085227265e-05, |
|
"loss": 0.8772, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.740311090793608e-05, |
|
"loss": 0.8666, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.739146096359951e-05, |
|
"loss": 0.8743, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.737981101926295e-05, |
|
"loss": 0.8679, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.736818437481506e-05, |
|
"loss": 0.8814, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7356534430478494e-05, |
|
"loss": 0.8725, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.734488448614193e-05, |
|
"loss": 0.8693, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.733323454180536e-05, |
|
"loss": 0.8713, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.732158459746879e-05, |
|
"loss": 0.8712, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.73099579530209e-05, |
|
"loss": 0.8651, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.729830800868434e-05, |
|
"loss": 0.8671, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.728665806434777e-05, |
|
"loss": 0.8665, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7275008120011205e-05, |
|
"loss": 0.8701, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.726335817567464e-05, |
|
"loss": 0.8673, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.725170823133807e-05, |
|
"loss": 0.8647, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.724005828700151e-05, |
|
"loss": 0.8629, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.722840834266494e-05, |
|
"loss": 0.866, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7216758398328374e-05, |
|
"loss": 0.8644, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7205108453991806e-05, |
|
"loss": 0.8627, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7193481809543916e-05, |
|
"loss": 0.8642, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.718183186520735e-05, |
|
"loss": 0.8631, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.717018192087078e-05, |
|
"loss": 0.8719, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.715853197653422e-05, |
|
"loss": 0.876, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.714688203219765e-05, |
|
"loss": 0.8672, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.713525538774976e-05, |
|
"loss": 0.871, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7123605443413194e-05, |
|
"loss": 0.8581, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7111978798965303e-05, |
|
"loss": 0.867, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7100328854628736e-05, |
|
"loss": 0.87, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.708870221018084e-05, |
|
"loss": 0.8602, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.707705226584427e-05, |
|
"loss": 0.8628, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.706540232150771e-05, |
|
"loss": 0.8648, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.705375237717114e-05, |
|
"loss": 0.8581, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.704210243283458e-05, |
|
"loss": 0.861, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.7030452488498014e-05, |
|
"loss": 0.8733, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.7018802544161446e-05, |
|
"loss": 0.8674, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.7007152599824886e-05, |
|
"loss": 0.86, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.699550265548832e-05, |
|
"loss": 0.8552, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.698385271115175e-05, |
|
"loss": 0.8596, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.697220276681518e-05, |
|
"loss": 0.8566, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.6960552822478615e-05, |
|
"loss": 0.8625, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.6948926178030725e-05, |
|
"loss": 0.8611, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.693727623369416e-05, |
|
"loss": 0.8611, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.6925626289357596e-05, |
|
"loss": 0.8582, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.691397634502103e-05, |
|
"loss": 0.8596, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.690232640068446e-05, |
|
"loss": 0.8542, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.689067645634789e-05, |
|
"loss": 0.8568, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.68790498119e-05, |
|
"loss": 0.8594, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6867399867563435e-05, |
|
"loss": 0.8529, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6855749923226874e-05, |
|
"loss": 0.8501, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.684409997889031e-05, |
|
"loss": 0.8544, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.683245003455374e-05, |
|
"loss": 0.8526, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.682080009021717e-05, |
|
"loss": 0.8649, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6809150145880604e-05, |
|
"loss": 0.852, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6797500201544036e-05, |
|
"loss": 0.8574, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6785850257207476e-05, |
|
"loss": 0.8558, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.677420031287091e-05, |
|
"loss": 0.8509, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.676255036853434e-05, |
|
"loss": 0.8587, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.675090042419777e-05, |
|
"loss": 0.8534, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6739250479861205e-05, |
|
"loss": 0.8504, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6727600535524644e-05, |
|
"loss": 0.8564, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6715973891076754e-05, |
|
"loss": 0.8495, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6704323946740186e-05, |
|
"loss": 0.8519, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.669267400240362e-05, |
|
"loss": 0.8472, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.668102405806705e-05, |
|
"loss": 0.8479, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.666937411373048e-05, |
|
"loss": 0.8503, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6657724169393916e-05, |
|
"loss": 0.8516, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.664609752494603e-05, |
|
"loss": 0.849, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6634470880498135e-05, |
|
"loss": 0.8481, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6622844236050244e-05, |
|
"loss": 0.8456, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.661119429171368e-05, |
|
"loss": 0.8506, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.659954434737711e-05, |
|
"loss": 0.8504, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.658789440304054e-05, |
|
"loss": 0.848, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6576244458703974e-05, |
|
"loss": 0.8492, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.656459451436741e-05, |
|
"loss": 0.849, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6552944570030846e-05, |
|
"loss": 0.8546, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6541294625694285e-05, |
|
"loss": 0.8427, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.652964468135772e-05, |
|
"loss": 0.8508, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.651799473702115e-05, |
|
"loss": 0.8483, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.650634479268458e-05, |
|
"loss": 0.848, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.649469484834802e-05, |
|
"loss": 0.8455, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6483091503788794e-05, |
|
"loss": 0.8497, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6471441559452233e-05, |
|
"loss": 0.852, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6459791615115666e-05, |
|
"loss": 0.8497, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.64481416707791e-05, |
|
"loss": 0.8444, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.643649172644253e-05, |
|
"loss": 0.8655, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.642484178210596e-05, |
|
"loss": 0.843, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6413191837769395e-05, |
|
"loss": 0.8427, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6401541893432834e-05, |
|
"loss": 0.8416, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6389891949096274e-05, |
|
"loss": 0.8401, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6378242004759706e-05, |
|
"loss": 0.8429, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.636659206042314e-05, |
|
"loss": 0.8458, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.635494211608657e-05, |
|
"loss": 0.8574, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.634329217175001e-05, |
|
"loss": 0.845, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.633168882719078e-05, |
|
"loss": 0.8415, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.632003888285422e-05, |
|
"loss": 0.8424, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6308388938517655e-05, |
|
"loss": 0.8431, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.629673899418109e-05, |
|
"loss": 0.8437, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.628508904984452e-05, |
|
"loss": 0.8367, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.627343910550795e-05, |
|
"loss": 0.8404, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.626181246106006e-05, |
|
"loss": 0.8385, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.62501625167235e-05, |
|
"loss": 0.8422, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.623851257238693e-05, |
|
"loss": 0.84, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6226862628050365e-05, |
|
"loss": 0.8419, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.62152126837138e-05, |
|
"loss": 0.8398, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.620356273937723e-05, |
|
"loss": 0.8415, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.619191279504066e-05, |
|
"loss": 0.8439, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.61802628507041e-05, |
|
"loss": 0.8414, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6168612906367534e-05, |
|
"loss": 0.8402, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6156962962030966e-05, |
|
"loss": 0.8433, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6145313017694405e-05, |
|
"loss": 0.8403, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.613368637324651e-05, |
|
"loss": 0.8376, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.612203642890994e-05, |
|
"loss": 0.8375, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.611038648457338e-05, |
|
"loss": 0.8396, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.609873654023681e-05, |
|
"loss": 0.8345, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6087086595900245e-05, |
|
"loss": 0.8361, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.6075436651563684e-05, |
|
"loss": 0.8351, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.6063786707227116e-05, |
|
"loss": 0.8379, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.605213676289055e-05, |
|
"loss": 0.8309, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.604048681855399e-05, |
|
"loss": 0.8381, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.602883687421742e-05, |
|
"loss": 0.832, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.601718692988085e-05, |
|
"loss": 0.8333, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.6005536985544285e-05, |
|
"loss": 0.8338, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.599388704120772e-05, |
|
"loss": 0.8335, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.5982237096871156e-05, |
|
"loss": 0.8351, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.597058715253459e-05, |
|
"loss": 0.8336, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.595893720819802e-05, |
|
"loss": 0.8344, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.5947287263861453e-05, |
|
"loss": 0.8373, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.593570721919091e-05, |
|
"loss": 0.8386, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.592405727485434e-05, |
|
"loss": 0.8302, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5912407330517775e-05, |
|
"loss": 0.8354, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.590075738618121e-05, |
|
"loss": 0.8339, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.588913074173332e-05, |
|
"loss": 0.8419, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.587750409728542e-05, |
|
"loss": 0.8406, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.586585415294886e-05, |
|
"loss": 0.835, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.585420420861229e-05, |
|
"loss": 0.8351, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.584255426427573e-05, |
|
"loss": 0.8365, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5830927619827834e-05, |
|
"loss": 0.8316, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5819277675491266e-05, |
|
"loss": 0.8244, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.58076277311547e-05, |
|
"loss": 0.8276, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.579597778681814e-05, |
|
"loss": 0.8323, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.578432784248157e-05, |
|
"loss": 0.8283, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5772677898145e-05, |
|
"loss": 0.829, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.576102795380844e-05, |
|
"loss": 0.832, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5749378009471874e-05, |
|
"loss": 0.8349, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5737728065135306e-05, |
|
"loss": 0.8335, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5726078120798746e-05, |
|
"loss": 0.8316, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.571442817646218e-05, |
|
"loss": 0.8307, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.570277823212561e-05, |
|
"loss": 0.8273, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.569112828778904e-05, |
|
"loss": 0.8277, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5679478343452475e-05, |
|
"loss": 0.8278, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5667828399115914e-05, |
|
"loss": 0.8298, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5656178454779347e-05, |
|
"loss": 0.8277, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5644551810331456e-05, |
|
"loss": 0.8248, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.563290186599489e-05, |
|
"loss": 0.8233, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.562125192165832e-05, |
|
"loss": 0.8258, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.560960197732175e-05, |
|
"loss": 0.8208, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5597952032985186e-05, |
|
"loss": 0.823, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5586302088648625e-05, |
|
"loss": 0.8265, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.557465214431206e-05, |
|
"loss": 0.8266, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.556300219997549e-05, |
|
"loss": 0.8288, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.55513755555276e-05, |
|
"loss": 0.8261, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.553972561119103e-05, |
|
"loss": 0.827, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5528075666854464e-05, |
|
"loss": 0.8253, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5516449022406574e-05, |
|
"loss": 0.824, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.550479907807001e-05, |
|
"loss": 0.8229, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5493149133733445e-05, |
|
"loss": 0.8278, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.548149918939688e-05, |
|
"loss": 0.823, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.546984924506031e-05, |
|
"loss": 0.8241, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.545819930072374e-05, |
|
"loss": 0.8194, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5446549356387175e-05, |
|
"loss": 0.8217, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5434899412050614e-05, |
|
"loss": 0.8224, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.542327276760272e-05, |
|
"loss": 0.8192, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5411646123154826e-05, |
|
"loss": 0.8228, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.539999617881826e-05, |
|
"loss": 0.8213, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.538836953437037e-05, |
|
"loss": 0.8341, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.53767195900338e-05, |
|
"loss": 0.8326, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536506964569723e-05, |
|
"loss": 0.8581, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.535344300124934e-05, |
|
"loss": 0.8591, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5341793056912775e-05, |
|
"loss": 0.8319, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5330143112576214e-05, |
|
"loss": 0.8294, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5318493168239646e-05, |
|
"loss": 0.8245, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.530684322390308e-05, |
|
"loss": 0.8229, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.529519327956651e-05, |
|
"loss": 0.8261, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5283543335229944e-05, |
|
"loss": 0.8417, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.527189339089338e-05, |
|
"loss": 0.8273, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5260243446556815e-05, |
|
"loss": 0.825, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5248616802108925e-05, |
|
"loss": 0.8281, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.523699015766103e-05, |
|
"loss": 0.8238, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.522534021332446e-05, |
|
"loss": 0.8215, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.52136902689879e-05, |
|
"loss": 0.8256, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.520204032465133e-05, |
|
"loss": 0.8219, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.519039038031477e-05, |
|
"loss": 0.8211, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.51787404359782e-05, |
|
"loss": 0.8247, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5167090491641635e-05, |
|
"loss": 0.8256, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.515546384719374e-05, |
|
"loss": 0.8391, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.514381390285717e-05, |
|
"loss": 0.8362, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.513216395852061e-05, |
|
"loss": 0.8285, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.512051401418404e-05, |
|
"loss": 0.8232, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.510886406984748e-05, |
|
"loss": 0.8222, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5097214125510914e-05, |
|
"loss": 0.8231, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5085564181174346e-05, |
|
"loss": 0.8229, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.507391423683778e-05, |
|
"loss": 0.8194, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.506226429250121e-05, |
|
"loss": 0.8189, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.505061434816465e-05, |
|
"loss": 0.8164, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.503896440382808e-05, |
|
"loss": 0.822, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.5027314459491515e-05, |
|
"loss": 0.8151, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.501566451515495e-05, |
|
"loss": 0.8195, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.500403787070706e-05, |
|
"loss": 0.8306, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.499238792637049e-05, |
|
"loss": 0.8205, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.49807612819226e-05, |
|
"loss": 0.8271, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.496911133758603e-05, |
|
"loss": 0.8212, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.495746139324947e-05, |
|
"loss": 0.825, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.49458114489129e-05, |
|
"loss": 0.817, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4934161504576335e-05, |
|
"loss": 0.8181, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.492251156023977e-05, |
|
"loss": 0.8214, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.49108616159032e-05, |
|
"loss": 0.8148, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.489921167156664e-05, |
|
"loss": 0.8189, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.488756172723007e-05, |
|
"loss": 0.8179, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4875911782893504e-05, |
|
"loss": 0.8149, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4864261838556936e-05, |
|
"loss": 0.82, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4852635194109046e-05, |
|
"loss": 0.8152, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.484098524977248e-05, |
|
"loss": 0.8125, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.482933530543592e-05, |
|
"loss": 0.8136, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.481768536109935e-05, |
|
"loss": 0.8138, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.480603541676278e-05, |
|
"loss": 0.8107, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4794385472426214e-05, |
|
"loss": 0.8158, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4782735528089647e-05, |
|
"loss": 0.8135, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4771108883641756e-05, |
|
"loss": 0.816, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.475945893930519e-05, |
|
"loss": 0.8199, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.474780899496863e-05, |
|
"loss": 0.8142, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.473615905063206e-05, |
|
"loss": 0.8129, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.472450910629549e-05, |
|
"loss": 0.8195, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4712859161958925e-05, |
|
"loss": 0.8131, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.470120921762236e-05, |
|
"loss": 0.8128, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4689559273285796e-05, |
|
"loss": 0.8113, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4677932628837906e-05, |
|
"loss": 0.8127, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.466628268450134e-05, |
|
"loss": 0.814, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.465463274016477e-05, |
|
"loss": 0.8139, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.464300609571688e-05, |
|
"loss": 0.8151, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.463135615138031e-05, |
|
"loss": 0.8121, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4619706207043745e-05, |
|
"loss": 0.8115, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.460805626270718e-05, |
|
"loss": 0.8128, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4596406318370617e-05, |
|
"loss": 0.8125, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.458475637403405e-05, |
|
"loss": 0.8092, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.457310642969748e-05, |
|
"loss": 0.8085, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4561456485360914e-05, |
|
"loss": 0.8093, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4549806541024346e-05, |
|
"loss": 0.816, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4538203196465126e-05, |
|
"loss": 0.8086, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.452655325212856e-05, |
|
"loss": 0.8163, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4514903307792e-05, |
|
"loss": 0.8117, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.450325336345543e-05, |
|
"loss": 0.8066, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.449160341911886e-05, |
|
"loss": 0.815, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.44799534747823e-05, |
|
"loss": 0.8116, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4468326830334404e-05, |
|
"loss": 0.8091, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.445667688599784e-05, |
|
"loss": 0.8184, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4445026941661276e-05, |
|
"loss": 0.8122, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4433400297213386e-05, |
|
"loss": 0.8081, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.442175035287682e-05, |
|
"loss": 0.8166, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.441012370842893e-05, |
|
"loss": 0.8095, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.439847376409236e-05, |
|
"loss": 0.8067, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.438682381975579e-05, |
|
"loss": 0.8133, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4375173875419225e-05, |
|
"loss": 0.8031, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.436352393108266e-05, |
|
"loss": 0.8135, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4351873986746096e-05, |
|
"loss": 0.8104, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.434022404240953e-05, |
|
"loss": 0.8069, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.432857409807296e-05, |
|
"loss": 0.8095, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.431692415373639e-05, |
|
"loss": 0.8065, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4305274209399826e-05, |
|
"loss": 0.8072, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4293647564951935e-05, |
|
"loss": 0.8085, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4281997620615374e-05, |
|
"loss": 0.8125, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.427034767627881e-05, |
|
"loss": 0.8063, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.425869773194224e-05, |
|
"loss": 0.8096, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.424704778760567e-05, |
|
"loss": 0.8092, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4235397843269104e-05, |
|
"loss": 0.8079, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4223771198821214e-05, |
|
"loss": 0.8065, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.421212125448465e-05, |
|
"loss": 0.8095, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4200471310148085e-05, |
|
"loss": 0.8067, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.418882136581152e-05, |
|
"loss": 0.8038, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.417719472136363e-05, |
|
"loss": 0.8113, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.416554477702706e-05, |
|
"loss": 0.8055, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.415389483269049e-05, |
|
"loss": 0.803, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4142244888353924e-05, |
|
"loss": 0.8099, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4130594944017363e-05, |
|
"loss": 0.8102, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4118968299569466e-05, |
|
"loss": 0.8033, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4107341655121576e-05, |
|
"loss": 0.8094, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.409569171078501e-05, |
|
"loss": 0.8063, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.408404176644844e-05, |
|
"loss": 0.8095, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.407239182211187e-05, |
|
"loss": 0.8048, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.4060741877775305e-05, |
|
"loss": 0.8051, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.4049091933438745e-05, |
|
"loss": 0.8058, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.403744198910218e-05, |
|
"loss": 0.8087, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.402579204476561e-05, |
|
"loss": 0.8039, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.401414210042905e-05, |
|
"loss": 0.7978, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.400249215609248e-05, |
|
"loss": 0.8056, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.399084221175591e-05, |
|
"loss": 0.7997, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.397919226741935e-05, |
|
"loss": 0.803, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.3967542323082785e-05, |
|
"loss": 0.802, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.395589237874622e-05, |
|
"loss": 0.8048, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.394424243440965e-05, |
|
"loss": 0.801, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.393259249007308e-05, |
|
"loss": 0.8046, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.392094254573652e-05, |
|
"loss": 0.8051, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.390931590128863e-05, |
|
"loss": 0.8089, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.389766595695206e-05, |
|
"loss": 0.8077, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3886016012615495e-05, |
|
"loss": 0.803, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.387436606827893e-05, |
|
"loss": 0.8043, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.386271612394236e-05, |
|
"loss": 0.8027, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.38510661796058e-05, |
|
"loss": 0.8022, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.383941623526923e-05, |
|
"loss": 0.8056, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.382778959082134e-05, |
|
"loss": 0.8055, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3816139646484774e-05, |
|
"loss": 0.802, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3804489702148206e-05, |
|
"loss": 0.7967, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.379283975781164e-05, |
|
"loss": 0.8052, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.378118981347507e-05, |
|
"loss": 0.7993, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.376953986913851e-05, |
|
"loss": 0.7974, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.375788992480194e-05, |
|
"loss": 0.8015, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.374626328035405e-05, |
|
"loss": 0.8023, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3734613336017484e-05, |
|
"loss": 0.7947, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3722963391680917e-05, |
|
"loss": 0.8031, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.371131344734435e-05, |
|
"loss": 0.801, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.369966350300779e-05, |
|
"loss": 0.7999, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.368803685855989e-05, |
|
"loss": 0.8036, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.367638691422333e-05, |
|
"loss": 0.8005, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.366473696988676e-05, |
|
"loss": 0.7988, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3653087025550195e-05, |
|
"loss": 0.7989, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.364143708121363e-05, |
|
"loss": 0.7995, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.362978713687706e-05, |
|
"loss": 0.8034, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.36181371925405e-05, |
|
"loss": 0.8013, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.360648724820393e-05, |
|
"loss": 0.7954, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.359486060375604e-05, |
|
"loss": 0.8002, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.358321065941947e-05, |
|
"loss": 0.8041, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3571560715082905e-05, |
|
"loss": 0.7984, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.355991077074634e-05, |
|
"loss": 0.7967, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.354826082640978e-05, |
|
"loss": 0.7998, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.353661088207321e-05, |
|
"loss": 0.7988, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.352496093773664e-05, |
|
"loss": 0.7952, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3513310993400074e-05, |
|
"loss": 0.7984, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.350173094872953e-05, |
|
"loss": 0.7991, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3490081004392964e-05, |
|
"loss": 0.7961, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3478431060056396e-05, |
|
"loss": 0.8015, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.346678111571983e-05, |
|
"loss": 0.7988, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.345513117138327e-05, |
|
"loss": 0.7987, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.34434812270467e-05, |
|
"loss": 0.7979, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.343183128271013e-05, |
|
"loss": 0.7958, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3420181338373565e-05, |
|
"loss": 0.7923, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3408531394037e-05, |
|
"loss": 0.8003, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.339688144970043e-05, |
|
"loss": 0.7944, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.338523150536387e-05, |
|
"loss": 0.7981, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.33735815610273e-05, |
|
"loss": 0.7953, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.336195491657941e-05, |
|
"loss": 0.7931, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.335030497224284e-05, |
|
"loss": 0.7919, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3338655027906276e-05, |
|
"loss": 0.796, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.332700508356971e-05, |
|
"loss": 0.7923, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.331537843912182e-05, |
|
"loss": 0.7909, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.330372849478526e-05, |
|
"loss": 0.8002, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.329207855044869e-05, |
|
"loss": 0.7961, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.328042860611212e-05, |
|
"loss": 0.7997, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3268778661775554e-05, |
|
"loss": 0.7937, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3257175317216334e-05, |
|
"loss": 0.7928, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3245525372879766e-05, |
|
"loss": 0.7977, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3233875428543205e-05, |
|
"loss": 0.7929, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.322222548420664e-05, |
|
"loss": 0.7884, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.321057553987008e-05, |
|
"loss": 0.7985, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.319894889542218e-05, |
|
"loss": 0.7942, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.318732225097429e-05, |
|
"loss": 0.7968, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.317567230663772e-05, |
|
"loss": 0.797, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3164022362301154e-05, |
|
"loss": 0.7997, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3152372417964586e-05, |
|
"loss": 0.809, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3140745773516696e-05, |
|
"loss": 0.7955, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3129095829180135e-05, |
|
"loss": 0.7933, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.311744588484357e-05, |
|
"loss": 0.7966, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3105795940507e-05, |
|
"loss": 0.7945, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.309414599617043e-05, |
|
"loss": 0.7947, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.3082496051833865e-05, |
|
"loss": 0.797, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.3070869407385974e-05, |
|
"loss": 0.7984, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.305921946304941e-05, |
|
"loss": 0.821, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.3047569518712846e-05, |
|
"loss": 0.8131, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.303591957437628e-05, |
|
"loss": 0.806, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.302426963003971e-05, |
|
"loss": 0.8243, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.301261968570314e-05, |
|
"loss": 0.8011, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.300099304125525e-05, |
|
"loss": 0.7979, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.2989343096918685e-05, |
|
"loss": 0.8148, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.2977693152582124e-05, |
|
"loss": 0.8092, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.296606650813423e-05, |
|
"loss": 0.8071, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.295441656379766e-05, |
|
"loss": 0.8033, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.294276661946109e-05, |
|
"loss": 0.8039, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.293111667512453e-05, |
|
"loss": 0.7987, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.291946673078796e-05, |
|
"loss": 0.7926, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.29078167864514e-05, |
|
"loss": 0.7963, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2896166842114835e-05, |
|
"loss": 0.8042, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.288451689777827e-05, |
|
"loss": 0.8003, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.28728669534417e-05, |
|
"loss": 0.7982, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.286121700910513e-05, |
|
"loss": 0.7946, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.284959036465724e-05, |
|
"loss": 0.7949, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2837940420320674e-05, |
|
"loss": 0.7978, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.282631377587278e-05, |
|
"loss": 0.7875, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2814663831536216e-05, |
|
"loss": 0.7979, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.280301388719965e-05, |
|
"loss": 0.8013, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.279136394286308e-05, |
|
"loss": 0.8133, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.277971399852651e-05, |
|
"loss": 0.8316, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.276808735407862e-05, |
|
"loss": 0.8268, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2756437409742055e-05, |
|
"loss": 0.8176, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2744787465405494e-05, |
|
"loss": 0.8166, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2733137521068927e-05, |
|
"loss": 0.8111, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.272148757673236e-05, |
|
"loss": 0.8183, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.270983763239579e-05, |
|
"loss": 0.8156, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2698187688059224e-05, |
|
"loss": 0.8149, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.268653774372266e-05, |
|
"loss": 0.8104, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2674887799386095e-05, |
|
"loss": 0.8024, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2663237855049534e-05, |
|
"loss": 0.8, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.265158791071297e-05, |
|
"loss": 0.8098, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.263996126626507e-05, |
|
"loss": 0.7983, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.26283113219285e-05, |
|
"loss": 0.8053, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2616661377591934e-05, |
|
"loss": 0.8107, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2605034733144044e-05, |
|
"loss": 0.8356, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.259338478880748e-05, |
|
"loss": 0.8317, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2581734844470915e-05, |
|
"loss": 0.8162, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.257008490013435e-05, |
|
"loss": 0.8457, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.255845825568646e-05, |
|
"loss": 0.8166, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.254680831134989e-05, |
|
"loss": 0.8169, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.253515836701332e-05, |
|
"loss": 0.8308, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.252353172256543e-05, |
|
"loss": 0.8338, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.251188177822887e-05, |
|
"loss": 0.8385, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.25002318338923e-05, |
|
"loss": 0.8332, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2488581889555736e-05, |
|
"loss": 0.8459, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.247693194521917e-05, |
|
"loss": 0.7985, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.24652820008826e-05, |
|
"loss": 0.7977, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.245363205654603e-05, |
|
"loss": 0.8423, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.244198211220947e-05, |
|
"loss": 0.8291, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2430332167872904e-05, |
|
"loss": 0.8161, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2418705523425014e-05, |
|
"loss": 0.8181, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2407055579088446e-05, |
|
"loss": 0.8091, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.239540563475188e-05, |
|
"loss": 0.839, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.238375569041531e-05, |
|
"loss": 0.8544, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.237210574607875e-05, |
|
"loss": 0.8582, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.236047910163086e-05, |
|
"loss": 0.8648, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.234882915729429e-05, |
|
"loss": 0.8724, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2337179212957725e-05, |
|
"loss": 0.8575, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.232552926862116e-05, |
|
"loss": 0.8607, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.231387932428459e-05, |
|
"loss": 0.8543, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.230222937994803e-05, |
|
"loss": 0.8287, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.229057943561146e-05, |
|
"loss": 0.8257, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.227895279116357e-05, |
|
"loss": 0.8382, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2267302846827e-05, |
|
"loss": 0.8311, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2255652902490435e-05, |
|
"loss": 0.8336, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.224400295815387e-05, |
|
"loss": 0.826, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.22323530138173e-05, |
|
"loss": 0.8268, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.222072636936941e-05, |
|
"loss": 0.829, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.220907642503284e-05, |
|
"loss": 0.829, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.219742648069628e-05, |
|
"loss": 0.8276, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2185776536359714e-05, |
|
"loss": 0.8348, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2174126592023146e-05, |
|
"loss": 0.8374, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.216247664768658e-05, |
|
"loss": 0.8373, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.215082670335002e-05, |
|
"loss": 0.8385, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.213917675901345e-05, |
|
"loss": 0.8631, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.212755011456555e-05, |
|
"loss": 0.8468, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.211590017022899e-05, |
|
"loss": 0.8287, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2104250225892424e-05, |
|
"loss": 0.8369, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2092600281555857e-05, |
|
"loss": 0.8269, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.208095033721929e-05, |
|
"loss": 0.8374, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.206930039288273e-05, |
|
"loss": 0.8268, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.205767374843483e-05, |
|
"loss": 0.8253, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.204602380409827e-05, |
|
"loss": 0.823, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.203439715965037e-05, |
|
"loss": 0.8634, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.2022747215313805e-05, |
|
"loss": 0.8651, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.2011120570865915e-05, |
|
"loss": 0.8343, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.199949392641802e-05, |
|
"loss": 0.8262, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.198784398208146e-05, |
|
"loss": 0.8061, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.197619403774489e-05, |
|
"loss": 0.8069, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.196454409340833e-05, |
|
"loss": 0.8068, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.195289414907176e-05, |
|
"loss": 0.8182, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.194124420473519e-05, |
|
"loss": 0.8081, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.1929594260398626e-05, |
|
"loss": 0.8062, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.191794431606206e-05, |
|
"loss": 0.8123, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.19062943717255e-05, |
|
"loss": 0.8168, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.189464442738893e-05, |
|
"loss": 0.8224, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.188299448305236e-05, |
|
"loss": 0.8218, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1871344538715794e-05, |
|
"loss": 0.8066, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1859694594379227e-05, |
|
"loss": 0.8019, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.184804465004266e-05, |
|
"loss": 0.8052, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.18363947057061e-05, |
|
"loss": 0.7851, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.182474476136953e-05, |
|
"loss": 0.7908, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.181309481703296e-05, |
|
"loss": 0.7895, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.18014448726964e-05, |
|
"loss": 0.786, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1789818228248505e-05, |
|
"loss": 0.7863, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.177816828391194e-05, |
|
"loss": 0.7956, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.176654163946405e-05, |
|
"loss": 0.7947, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1754891695127486e-05, |
|
"loss": 0.7881, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.174324175079092e-05, |
|
"loss": 0.7873, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.173159180645435e-05, |
|
"loss": 0.7989, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.171994186211778e-05, |
|
"loss": 0.7978, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1708291917781215e-05, |
|
"loss": 0.7814, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1696641973444655e-05, |
|
"loss": 0.7898, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1685015328996764e-05, |
|
"loss": 0.7904, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1673365384660197e-05, |
|
"loss": 0.7859, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.166171544032363e-05, |
|
"loss": 0.79, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.165006549598706e-05, |
|
"loss": 0.7886, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1638415551650494e-05, |
|
"loss": 0.7877, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1626765607313926e-05, |
|
"loss": 0.7856, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1615115662977365e-05, |
|
"loss": 0.7807, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.16034657186408e-05, |
|
"loss": 0.7847, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.159181577430423e-05, |
|
"loss": 0.787, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.158016582996766e-05, |
|
"loss": 0.7851, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.15685158856311e-05, |
|
"loss": 0.787, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.1556865941294534e-05, |
|
"loss": 0.7851, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.154521599695797e-05, |
|
"loss": 0.7885, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.1533566052621405e-05, |
|
"loss": 0.7823, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.152191610828484e-05, |
|
"loss": 0.7876, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.151026616394827e-05, |
|
"loss": 0.7874, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.14986162196117e-05, |
|
"loss": 0.7853, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.148698957516381e-05, |
|
"loss": 0.7946, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.147533963082725e-05, |
|
"loss": 0.7879, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.1463689686490684e-05, |
|
"loss": 0.788, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.1452063042042787e-05, |
|
"loss": 0.7834, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.144041309770622e-05, |
|
"loss": 0.7832, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.142876315336965e-05, |
|
"loss": 0.7851, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.141711320903309e-05, |
|
"loss": 0.7814, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.140546326469652e-05, |
|
"loss": 0.7828, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.139381332035996e-05, |
|
"loss": 0.7884, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1382186675912065e-05, |
|
"loss": 0.787, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.13705367315755e-05, |
|
"loss": 0.7932, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.135888678723893e-05, |
|
"loss": 0.788, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.134723684290236e-05, |
|
"loss": 0.7815, |
|
"step": 371500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.13355868985658e-05, |
|
"loss": 0.7907, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.132396025411791e-05, |
|
"loss": 0.7947, |
|
"step": 372500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.131231030978134e-05, |
|
"loss": 0.7902, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1300660365444775e-05, |
|
"loss": 0.7888, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.128901042110821e-05, |
|
"loss": 0.786, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.127738377666032e-05, |
|
"loss": 0.7844, |
|
"step": 374500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.126573383232375e-05, |
|
"loss": 0.7838, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.125408388798718e-05, |
|
"loss": 0.7809, |
|
"step": 375500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.124243394365062e-05, |
|
"loss": 0.7837, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.123080729920273e-05, |
|
"loss": 0.7841, |
|
"step": 376500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.121915735486616e-05, |
|
"loss": 0.7862, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1207507410529596e-05, |
|
"loss": 0.7818, |
|
"step": 377500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.119585746619303e-05, |
|
"loss": 0.7807, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.118420752185646e-05, |
|
"loss": 0.7823, |
|
"step": 378500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.11725575775199e-05, |
|
"loss": 0.7818, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.116090763318333e-05, |
|
"loss": 0.7872, |
|
"step": 379500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.114928098873544e-05, |
|
"loss": 0.7896, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1137631044398874e-05, |
|
"loss": 0.7799, |
|
"step": 380500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1125981100062306e-05, |
|
"loss": 0.7825, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.111433115572574e-05, |
|
"loss": 0.803, |
|
"step": 381500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.110268121138917e-05, |
|
"loss": 0.8095, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.109103126705261e-05, |
|
"loss": 0.8068, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.107938132271604e-05, |
|
"loss": 0.7902, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.1067731378379475e-05, |
|
"loss": 0.7808, |
|
"step": 383500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.105608143404291e-05, |
|
"loss": 0.7997, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.104445478959502e-05, |
|
"loss": 0.8161, |
|
"step": 384500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.103280484525845e-05, |
|
"loss": 0.7839, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.102115490092189e-05, |
|
"loss": 0.7872, |
|
"step": 385500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.100950495658532e-05, |
|
"loss": 0.7847, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.099785501224875e-05, |
|
"loss": 0.78, |
|
"step": 386500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.098622836780086e-05, |
|
"loss": 0.7824, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.0974578423464295e-05, |
|
"loss": 0.7872, |
|
"step": 387500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.09629517790164e-05, |
|
"loss": 0.7835, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.095130183467984e-05, |
|
"loss": 0.7871, |
|
"step": 388500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.093965189034327e-05, |
|
"loss": 0.7863, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.092800194600671e-05, |
|
"loss": 0.7843, |
|
"step": 389500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.091635200167014e-05, |
|
"loss": 0.7778, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0904702057333573e-05, |
|
"loss": 0.7853, |
|
"step": 390500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0893052112997006e-05, |
|
"loss": 0.7824, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.088140216866044e-05, |
|
"loss": 0.7862, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.086975222432388e-05, |
|
"loss": 0.7814, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.085810227998731e-05, |
|
"loss": 0.7866, |
|
"step": 392500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.084645233565074e-05, |
|
"loss": 0.7844, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0834802391314174e-05, |
|
"loss": 0.7828, |
|
"step": 393500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.082315244697761e-05, |
|
"loss": 0.7886, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0811525802529716e-05, |
|
"loss": 0.7858, |
|
"step": 394500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0799875858193156e-05, |
|
"loss": 0.7856, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.078822591385659e-05, |
|
"loss": 0.786, |
|
"step": 395500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.077657596952002e-05, |
|
"loss": 0.7984, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.076492602518345e-05, |
|
"loss": 0.7928, |
|
"step": 396500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0753276080846885e-05, |
|
"loss": 0.806, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0741649436398995e-05, |
|
"loss": 0.7992, |
|
"step": 397500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.072999949206243e-05, |
|
"loss": 0.8015, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0718349547725866e-05, |
|
"loss": 0.8038, |
|
"step": 398500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.07066996033893e-05, |
|
"loss": 0.7918, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.069504965905273e-05, |
|
"loss": 0.7883, |
|
"step": 399500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0683399714716163e-05, |
|
"loss": 0.7783, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0671749770379596e-05, |
|
"loss": 0.7752, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0660099826043035e-05, |
|
"loss": 0.7721, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.064844988170647e-05, |
|
"loss": 0.7773, |
|
"step": 401500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.06367999373699e-05, |
|
"loss": 0.7706, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.062514999303333e-05, |
|
"loss": 0.7756, |
|
"step": 402500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.061352334858544e-05, |
|
"loss": 0.7757, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0601873404248874e-05, |
|
"loss": 0.7725, |
|
"step": 403500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.059022345991231e-05, |
|
"loss": 0.7725, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0578573515575746e-05, |
|
"loss": 0.7754, |
|
"step": 404500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.056692357123918e-05, |
|
"loss": 0.7733, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.055529692679129e-05, |
|
"loss": 0.7706, |
|
"step": 405500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.054367028234339e-05, |
|
"loss": 0.7704, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.053202033800683e-05, |
|
"loss": 0.7765, |
|
"step": 406500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.052037039367026e-05, |
|
"loss": 0.7811, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0508743749222365e-05, |
|
"loss": 0.7806, |
|
"step": 407500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.04970938048858e-05, |
|
"loss": 0.7753, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0485443860549236e-05, |
|
"loss": 0.7761, |
|
"step": 408500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0473840515990016e-05, |
|
"loss": 0.8088, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0462190571653456e-05, |
|
"loss": 0.7892, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.045054062731689e-05, |
|
"loss": 0.7851, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.043889068298032e-05, |
|
"loss": 0.7802, |
|
"step": 410500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.042724073864375e-05, |
|
"loss": 0.7767, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0415590794307185e-05, |
|
"loss": 0.7801, |
|
"step": 411500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0403940849970624e-05, |
|
"loss": 0.7739, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.039231420552273e-05, |
|
"loss": 0.776, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0380664261186166e-05, |
|
"loss": 0.7782, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.03690143168496e-05, |
|
"loss": 0.7742, |
|
"step": 413500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.035736437251303e-05, |
|
"loss": 0.781, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.034571442817646e-05, |
|
"loss": 0.7775, |
|
"step": 414500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.03340644838399e-05, |
|
"loss": 0.7733, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0322414539503335e-05, |
|
"loss": 0.7693, |
|
"step": 415500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.031076459516677e-05, |
|
"loss": 0.7832, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.02991146508302e-05, |
|
"loss": 0.7757, |
|
"step": 416500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.028746470649363e-05, |
|
"loss": 0.7746, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0275814762157064e-05, |
|
"loss": 0.7743, |
|
"step": 417500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0264164817820503e-05, |
|
"loss": 0.7696, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0252514873483936e-05, |
|
"loss": 0.7748, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0240888229036045e-05, |
|
"loss": 0.7715, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.022926158458815e-05, |
|
"loss": 0.776, |
|
"step": 419500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.021761164025159e-05, |
|
"loss": 0.7788, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.020596169591502e-05, |
|
"loss": 0.8024, |
|
"step": 420500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.019431175157845e-05, |
|
"loss": 0.7791, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.018266180724189e-05, |
|
"loss": 0.7831, |
|
"step": 421500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0171011862905324e-05, |
|
"loss": 0.7785, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0159385218457427e-05, |
|
"loss": 0.7784, |
|
"step": 422500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0147735274120866e-05, |
|
"loss": 0.7714, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.01360853297843e-05, |
|
"loss": 0.7827, |
|
"step": 423500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.01244586853364e-05, |
|
"loss": 0.7722, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.011280874099983e-05, |
|
"loss": 0.7777, |
|
"step": 424500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.010115879666327e-05, |
|
"loss": 0.7688, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0089508852326705e-05, |
|
"loss": 0.7771, |
|
"step": 425500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.007785890799014e-05, |
|
"loss": 0.7729, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.0066208963653576e-05, |
|
"loss": 0.7732, |
|
"step": 426500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.005455901931701e-05, |
|
"loss": 0.7754, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.004290907498044e-05, |
|
"loss": 0.7773, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.003125913064388e-05, |
|
"loss": 0.7773, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.001963248619598e-05, |
|
"loss": 0.7772, |
|
"step": 428500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.0007982541859415e-05, |
|
"loss": 0.7723, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.9996355897411525e-05, |
|
"loss": 0.7829, |
|
"step": 429500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.998470595307496e-05, |
|
"loss": 0.7803, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.997305600873839e-05, |
|
"loss": 0.7911, |
|
"step": 430500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.996140606440182e-05, |
|
"loss": 0.7775, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.994975612006526e-05, |
|
"loss": 0.7801, |
|
"step": 431500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.9938106175728694e-05, |
|
"loss": 0.7775, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.9926456231392126e-05, |
|
"loss": 0.7758, |
|
"step": 432500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.991480628705556e-05, |
|
"loss": 0.7754, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9903156342719e-05, |
|
"loss": 0.7784, |
|
"step": 433500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.989150639838243e-05, |
|
"loss": 0.7754, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.987987975393454e-05, |
|
"loss": 0.7753, |
|
"step": 434500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.986822980959797e-05, |
|
"loss": 0.7779, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9856579865261404e-05, |
|
"loss": 0.7766, |
|
"step": 435500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.984492992092484e-05, |
|
"loss": 0.7806, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.983327997658827e-05, |
|
"loss": 0.7814, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.982163003225171e-05, |
|
"loss": 0.7784, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.980998008791515e-05, |
|
"loss": 0.7692, |
|
"step": 437500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.979833014357858e-05, |
|
"loss": 0.7721, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.978668019924201e-05, |
|
"loss": 0.7697, |
|
"step": 438500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9775030254905445e-05, |
|
"loss": 0.7706, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.976338031056888e-05, |
|
"loss": 0.7728, |
|
"step": 439500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.975173036623231e-05, |
|
"loss": 0.774, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.974008042189575e-05, |
|
"loss": 0.7686, |
|
"step": 440500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.972847707733653e-05, |
|
"loss": 0.7732, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.971682713299996e-05, |
|
"loss": 0.7715, |
|
"step": 441500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.970517718866339e-05, |
|
"loss": 0.773, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.9693527244326826e-05, |
|
"loss": 0.7728, |
|
"step": 442500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.968187729999026e-05, |
|
"loss": 0.7694, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.96702273556537e-05, |
|
"loss": 0.773, |
|
"step": 443500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.965857741131713e-05, |
|
"loss": 0.7704, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.964692746698057e-05, |
|
"loss": 0.776, |
|
"step": 444500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.9635277522644e-05, |
|
"loss": 0.7741, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.9623627578307433e-05, |
|
"loss": 0.7656, |
|
"step": 445500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.9611977633970866e-05, |
|
"loss": 0.7709, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.9600374289411646e-05, |
|
"loss": 0.7688, |
|
"step": 446500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.958872434507508e-05, |
|
"loss": 0.7756, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.957707440073852e-05, |
|
"loss": 0.7668, |
|
"step": 447500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.956542445640195e-05, |
|
"loss": 0.7665, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.955377451206538e-05, |
|
"loss": 0.7719, |
|
"step": 448500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.9542124567728815e-05, |
|
"loss": 0.7662, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.953047462339225e-05, |
|
"loss": 0.7666, |
|
"step": 449500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.9518847978944357e-05, |
|
"loss": 0.7697, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.9507198034607796e-05, |
|
"loss": 0.769, |
|
"step": 450500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.949554809027123e-05, |
|
"loss": 0.7706, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.948389814593466e-05, |
|
"loss": 0.7684, |
|
"step": 451500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.947224820159809e-05, |
|
"loss": 0.7676, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.9460598257261525e-05, |
|
"loss": 0.771, |
|
"step": 452500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.944894831292496e-05, |
|
"loss": 0.7712, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.94372983685884e-05, |
|
"loss": 0.7688, |
|
"step": 453500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.942564842425183e-05, |
|
"loss": 0.771, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.941399847991527e-05, |
|
"loss": 0.7656, |
|
"step": 454500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.940237183546737e-05, |
|
"loss": 0.7647, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.9390721891130803e-05, |
|
"loss": 0.7624, |
|
"step": 455500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.9379071946794236e-05, |
|
"loss": 0.7673, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.9367422002457675e-05, |
|
"loss": 0.7634, |
|
"step": 456500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.935577205812111e-05, |
|
"loss": 0.7618, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.934412211378454e-05, |
|
"loss": 0.7607, |
|
"step": 457500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.933247216944798e-05, |
|
"loss": 0.7593, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.932082222511141e-05, |
|
"loss": 0.7635, |
|
"step": 458500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.9309172280774844e-05, |
|
"loss": 0.757, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.929752233643828e-05, |
|
"loss": 0.7646, |
|
"step": 459500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.9285895691990386e-05, |
|
"loss": 0.761, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.927424574765382e-05, |
|
"loss": 0.7659, |
|
"step": 460500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.926261910320593e-05, |
|
"loss": 0.7607, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.925096915886936e-05, |
|
"loss": 0.766, |
|
"step": 461500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.923931921453279e-05, |
|
"loss": 0.7602, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.9227669270196225e-05, |
|
"loss": 0.766, |
|
"step": 462500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.9216042625748334e-05, |
|
"loss": 0.7637, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.9204392681411773e-05, |
|
"loss": 0.7652, |
|
"step": 463500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.9192742737075206e-05, |
|
"loss": 0.7648, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.918109279273864e-05, |
|
"loss": 0.7639, |
|
"step": 464500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.916944284840207e-05, |
|
"loss": 0.766, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.91577929040655e-05, |
|
"loss": 0.769, |
|
"step": 465500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.914616625961761e-05, |
|
"loss": 0.7665, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.913451631528105e-05, |
|
"loss": 0.7663, |
|
"step": 466500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.9122866370944484e-05, |
|
"loss": 0.7672, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.9111216426607916e-05, |
|
"loss": 0.7622, |
|
"step": 467500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.9099613082048697e-05, |
|
"loss": 0.7655, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.908796313771213e-05, |
|
"loss": 0.7534, |
|
"step": 468500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.907631319337556e-05, |
|
"loss": 0.7596, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.9064663249038994e-05, |
|
"loss": 0.7588, |
|
"step": 469500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.9053013304702426e-05, |
|
"loss": 0.7607, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.9041363360365865e-05, |
|
"loss": 0.765, |
|
"step": 470500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.90297134160293e-05, |
|
"loss": 0.7609, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.901806347169274e-05, |
|
"loss": 0.7629, |
|
"step": 471500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.900641352735617e-05, |
|
"loss": 0.7656, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.89947635830196e-05, |
|
"loss": 0.7621, |
|
"step": 472500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.898311363868304e-05, |
|
"loss": 0.7606, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.897146369434647e-05, |
|
"loss": 0.7645, |
|
"step": 473500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.8959813750009905e-05, |
|
"loss": 0.7626, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.894816380567334e-05, |
|
"loss": 0.758, |
|
"step": 474500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.893651386133677e-05, |
|
"loss": 0.762, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.892488721688888e-05, |
|
"loss": 0.7597, |
|
"step": 475500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.891323727255231e-05, |
|
"loss": 0.7611, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.890158732821575e-05, |
|
"loss": 0.7588, |
|
"step": 476500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.8889937383879184e-05, |
|
"loss": 0.7607, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.8878287439542616e-05, |
|
"loss": 0.7602, |
|
"step": 477500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.886663749520605e-05, |
|
"loss": 0.7598, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.885501085075816e-05, |
|
"loss": 0.7653, |
|
"step": 478500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.884338420631026e-05, |
|
"loss": 0.7645, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.883175756186237e-05, |
|
"loss": 0.7676, |
|
"step": 479500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.88201076175258e-05, |
|
"loss": 0.768, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.880845767318924e-05, |
|
"loss": 0.7636, |
|
"step": 480500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.8796807728852674e-05, |
|
"loss": 0.7617, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.878515778451611e-05, |
|
"loss": 0.7571, |
|
"step": 481500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.877350784017954e-05, |
|
"loss": 0.7628, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.876185789584297e-05, |
|
"loss": 0.7676, |
|
"step": 482500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.875020795150641e-05, |
|
"loss": 0.7659, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.873858130705852e-05, |
|
"loss": 0.7647, |
|
"step": 483500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.872695466261062e-05, |
|
"loss": 0.779, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.871532801816273e-05, |
|
"loss": 0.7699, |
|
"step": 484500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.8703678073826165e-05, |
|
"loss": 0.7635, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.86920281294896e-05, |
|
"loss": 0.76, |
|
"step": 485500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.868037818515303e-05, |
|
"loss": 0.7626, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.866875154070514e-05, |
|
"loss": 0.7594, |
|
"step": 486500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.865710159636857e-05, |
|
"loss": 0.7585, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.864545165203201e-05, |
|
"loss": 0.7597, |
|
"step": 487500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.863380170769544e-05, |
|
"loss": 0.7558, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.8622151763358876e-05, |
|
"loss": 0.7594, |
|
"step": 488500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.861050181902231e-05, |
|
"loss": 0.7607, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.859885187468574e-05, |
|
"loss": 0.7572, |
|
"step": 489500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.858720193034917e-05, |
|
"loss": 0.7626, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.857555198601261e-05, |
|
"loss": 0.7603, |
|
"step": 490500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.8563902041676044e-05, |
|
"loss": 0.7659, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.8552252097339484e-05, |
|
"loss": 0.7642, |
|
"step": 491500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.8540625452891586e-05, |
|
"loss": 0.7595, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.852897550855502e-05, |
|
"loss": 0.7585, |
|
"step": 492500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.851732556421845e-05, |
|
"loss": 0.7628, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.850567561988189e-05, |
|
"loss": 0.7599, |
|
"step": 493500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.849402567554532e-05, |
|
"loss": 0.7542, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.848237573120876e-05, |
|
"loss": 0.7566, |
|
"step": 494500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.8470725786872194e-05, |
|
"loss": 0.7602, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.8459075842535627e-05, |
|
"loss": 0.7634, |
|
"step": 495500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.844742589819906e-05, |
|
"loss": 0.7639, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.843579925375117e-05, |
|
"loss": 0.7817, |
|
"step": 496500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.84241493094146e-05, |
|
"loss": 0.765, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.841249936507803e-05, |
|
"loss": 0.7583, |
|
"step": 497500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.840084942074147e-05, |
|
"loss": 0.7596, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.8389199476404905e-05, |
|
"loss": 0.7565, |
|
"step": 498500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.837757283195701e-05, |
|
"loss": 0.7573, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.836592288762044e-05, |
|
"loss": 0.7626, |
|
"step": 499500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.835427294328388e-05, |
|
"loss": 0.7613, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.834262299894731e-05, |
|
"loss": 0.7626, |
|
"step": 500500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.833099635449942e-05, |
|
"loss": 0.7662, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.8319346410162854e-05, |
|
"loss": 0.7616, |
|
"step": 501500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.8307696465826286e-05, |
|
"loss": 0.7705, |
|
"step": 502000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.8296069821378396e-05, |
|
"loss": 0.789, |
|
"step": 502500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.828441987704183e-05, |
|
"loss": 0.7674, |
|
"step": 503000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.827279323259394e-05, |
|
"loss": 0.7605, |
|
"step": 503500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.826114328825737e-05, |
|
"loss": 0.7618, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.824949334392081e-05, |
|
"loss": 0.7926, |
|
"step": 504500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.823784339958424e-05, |
|
"loss": 0.7731, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.8226193455247674e-05, |
|
"loss": 0.7662, |
|
"step": 505500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.8214543510911106e-05, |
|
"loss": 0.7591, |
|
"step": 506000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.820289356657454e-05, |
|
"loss": 0.7589, |
|
"step": 506500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.819124362223798e-05, |
|
"loss": 0.7617, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.817959367790141e-05, |
|
"loss": 0.7713, |
|
"step": 507500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.816796703345352e-05, |
|
"loss": 0.7646, |
|
"step": 508000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.815631708911695e-05, |
|
"loss": 0.7692, |
|
"step": 508500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.8144690444669055e-05, |
|
"loss": 0.7763, |
|
"step": 509000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.813304050033249e-05, |
|
"loss": 0.7661, |
|
"step": 509500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.812139055599592e-05, |
|
"loss": 0.7688, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.810974061165936e-05, |
|
"loss": 0.7744, |
|
"step": 510500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.809809066732279e-05, |
|
"loss": 0.8432, |
|
"step": 511000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.808644072298623e-05, |
|
"loss": 0.7715, |
|
"step": 511500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.807479077864966e-05, |
|
"loss": 0.77, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.8063140834313095e-05, |
|
"loss": 0.7677, |
|
"step": 512500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.80515141898652e-05, |
|
"loss": 0.7692, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.803986424552864e-05, |
|
"loss": 0.7634, |
|
"step": 513500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.802821430119207e-05, |
|
"loss": 0.7664, |
|
"step": 514000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.80165643568555e-05, |
|
"loss": 0.7629, |
|
"step": 514500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.800491441251894e-05, |
|
"loss": 0.761, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.799326446818237e-05, |
|
"loss": 0.7586, |
|
"step": 515500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.7981614523845806e-05, |
|
"loss": 0.7567, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.7969964579509245e-05, |
|
"loss": 0.7585, |
|
"step": 516500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.795831463517268e-05, |
|
"loss": 0.7553, |
|
"step": 517000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.794666469083611e-05, |
|
"loss": 0.7562, |
|
"step": 517500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.793501474649954e-05, |
|
"loss": 0.7585, |
|
"step": 518000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.792338810205165e-05, |
|
"loss": 0.7655, |
|
"step": 518500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.7911738157715084e-05, |
|
"loss": 0.7572, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.790008821337852e-05, |
|
"loss": 0.7553, |
|
"step": 519500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.7888438269041956e-05, |
|
"loss": 0.7555, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.787678832470539e-05, |
|
"loss": 0.7544, |
|
"step": 520500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.786513838036882e-05, |
|
"loss": 0.7523, |
|
"step": 521000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.785351173592093e-05, |
|
"loss": 0.7588, |
|
"step": 521500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.784188509147303e-05, |
|
"loss": 0.7585, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.7830235147136465e-05, |
|
"loss": 0.762, |
|
"step": 522500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.7818585202799904e-05, |
|
"loss": 0.7634, |
|
"step": 523000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.7806981858240684e-05, |
|
"loss": 0.809, |
|
"step": 523500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.779533191390412e-05, |
|
"loss": 0.7722, |
|
"step": 524000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.7783681969567556e-05, |
|
"loss": 0.7643, |
|
"step": 524500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.777203202523099e-05, |
|
"loss": 0.7642, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.776038208089442e-05, |
|
"loss": 0.7601, |
|
"step": 525500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.774873213655785e-05, |
|
"loss": 0.7588, |
|
"step": 526000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7737105492109956e-05, |
|
"loss": 0.7585, |
|
"step": 526500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7725455547773395e-05, |
|
"loss": 0.7676, |
|
"step": 527000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.771380560343683e-05, |
|
"loss": 0.7628, |
|
"step": 527500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7702155659100266e-05, |
|
"loss": 0.7688, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.76905057147637e-05, |
|
"loss": 0.7663, |
|
"step": 528500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.767885577042713e-05, |
|
"loss": 0.7603, |
|
"step": 529000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7667205826090564e-05, |
|
"loss": 0.7616, |
|
"step": 529500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7655555881754e-05, |
|
"loss": 0.7589, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7643905937417435e-05, |
|
"loss": 0.7646, |
|
"step": 530500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.763227929296954e-05, |
|
"loss": 0.768, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.762062934863298e-05, |
|
"loss": 0.7853, |
|
"step": 531500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.760897940429641e-05, |
|
"loss": 0.8162, |
|
"step": 532000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.759732945995984e-05, |
|
"loss": 0.7718, |
|
"step": 532500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.758567951562328e-05, |
|
"loss": 0.7669, |
|
"step": 533000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.7574029571286713e-05, |
|
"loss": 0.7724, |
|
"step": 533500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.7562402926838816e-05, |
|
"loss": 0.7688, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.755075298250225e-05, |
|
"loss": 0.7658, |
|
"step": 534500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.753910303816569e-05, |
|
"loss": 0.759, |
|
"step": 535000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.752745309382912e-05, |
|
"loss": 0.7589, |
|
"step": 535500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.751580314949255e-05, |
|
"loss": 0.7532, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.750415320515599e-05, |
|
"loss": 0.7542, |
|
"step": 536500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.7492526560708095e-05, |
|
"loss": 0.7556, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.748087661637153e-05, |
|
"loss": 0.7538, |
|
"step": 537500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.7469226672034966e-05, |
|
"loss": 0.7576, |
|
"step": 538000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.74575767276984e-05, |
|
"loss": 0.7719, |
|
"step": 538500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.74459500832505e-05, |
|
"loss": 0.7708, |
|
"step": 539000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.743432343880261e-05, |
|
"loss": 0.7702, |
|
"step": 539500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.742267349446604e-05, |
|
"loss": 0.7647, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.741102355012948e-05, |
|
"loss": 0.7661, |
|
"step": 540500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7399373605792915e-05, |
|
"loss": 0.7664, |
|
"step": 541000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.738772366145635e-05, |
|
"loss": 0.7601, |
|
"step": 541500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.737607371711978e-05, |
|
"loss": 0.7611, |
|
"step": 542000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.736442377278321e-05, |
|
"loss": 0.7577, |
|
"step": 542500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.735277382844665e-05, |
|
"loss": 0.7568, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7341123884110083e-05, |
|
"loss": 0.7585, |
|
"step": 543500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7329473939773516e-05, |
|
"loss": 0.7563, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.731782399543695e-05, |
|
"loss": 0.7664, |
|
"step": 544500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.730617405110039e-05, |
|
"loss": 0.761, |
|
"step": 545000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.729452410676382e-05, |
|
"loss": 0.7524, |
|
"step": 545500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.728287416242726e-05, |
|
"loss": 0.7572, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.727122421809069e-05, |
|
"loss": 0.7625, |
|
"step": 546500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7259574273754124e-05, |
|
"loss": 0.7609, |
|
"step": 547000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.7247924329417556e-05, |
|
"loss": 0.7564, |
|
"step": 547500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.723627438508099e-05, |
|
"loss": 0.7585, |
|
"step": 548000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.722462444074443e-05, |
|
"loss": 0.7576, |
|
"step": 548500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.721297449640786e-05, |
|
"loss": 0.7591, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.720132455207129e-05, |
|
"loss": 0.7579, |
|
"step": 549500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.718972120751207e-05, |
|
"loss": 0.7559, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.717809456306418e-05, |
|
"loss": 0.7588, |
|
"step": 550500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.7166444618727614e-05, |
|
"loss": 0.755, |
|
"step": 551000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.7154817974279724e-05, |
|
"loss": 0.7565, |
|
"step": 551500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.7143168029943156e-05, |
|
"loss": 0.7546, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.713151808560659e-05, |
|
"loss": 0.7571, |
|
"step": 552500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.711986814127003e-05, |
|
"loss": 0.7535, |
|
"step": 553000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.710821819693346e-05, |
|
"loss": 0.7545, |
|
"step": 553500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.709659155248556e-05, |
|
"loss": 0.7536, |
|
"step": 554000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.7084941608148995e-05, |
|
"loss": 0.7565, |
|
"step": 554500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.7073291663812435e-05, |
|
"loss": 0.7509, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.706164171947587e-05, |
|
"loss": 0.7554, |
|
"step": 555500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.70499917751393e-05, |
|
"loss": 0.7624, |
|
"step": 556000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.703834183080274e-05, |
|
"loss": 0.7503, |
|
"step": 556500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.702669188646617e-05, |
|
"loss": 0.7528, |
|
"step": 557000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.70150419421296e-05, |
|
"loss": 0.7537, |
|
"step": 557500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.7003391997793036e-05, |
|
"loss": 0.7563, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.699174205345647e-05, |
|
"loss": 0.7533, |
|
"step": 558500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.698011540900858e-05, |
|
"loss": 0.7503, |
|
"step": 559000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.696846546467202e-05, |
|
"loss": 0.7522, |
|
"step": 559500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.695681552033545e-05, |
|
"loss": 0.7426, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.694516557599888e-05, |
|
"loss": 0.7523, |
|
"step": 560500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.6933538931550984e-05, |
|
"loss": 0.7557, |
|
"step": 561000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.6921912287103094e-05, |
|
"loss": 0.7583, |
|
"step": 561500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6910262342766526e-05, |
|
"loss": 0.7595, |
|
"step": 562000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.689861239842996e-05, |
|
"loss": 0.7562, |
|
"step": 562500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.688696245409339e-05, |
|
"loss": 0.7552, |
|
"step": 563000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.687531250975683e-05, |
|
"loss": 0.7538, |
|
"step": 563500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.686366256542026e-05, |
|
"loss": 0.7509, |
|
"step": 564000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6852012621083695e-05, |
|
"loss": 0.7508, |
|
"step": 564500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6840362676747134e-05, |
|
"loss": 0.7509, |
|
"step": 565000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6828712732410566e-05, |
|
"loss": 0.7525, |
|
"step": 565500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6817062788074006e-05, |
|
"loss": 0.75, |
|
"step": 566000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.680541284373744e-05, |
|
"loss": 0.7504, |
|
"step": 566500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.679376289940087e-05, |
|
"loss": 0.7468, |
|
"step": 567000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.67821129550643e-05, |
|
"loss": 0.7523, |
|
"step": 567500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6770463010727735e-05, |
|
"loss": 0.7502, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.675881306639117e-05, |
|
"loss": 0.7525, |
|
"step": 568500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.674716312205461e-05, |
|
"loss": 0.7487, |
|
"step": 569000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.673551317771804e-05, |
|
"loss": 0.7443, |
|
"step": 569500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.672388653327015e-05, |
|
"loss": 0.7508, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.671223658893358e-05, |
|
"loss": 0.7477, |
|
"step": 570500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6700586644597013e-05, |
|
"loss": 0.7492, |
|
"step": 571000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6688936700260446e-05, |
|
"loss": 0.7507, |
|
"step": 571500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6677286755923885e-05, |
|
"loss": 0.7468, |
|
"step": 572000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.666563681158732e-05, |
|
"loss": 0.7429, |
|
"step": 572500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.665398686725075e-05, |
|
"loss": 0.7482, |
|
"step": 573000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.664233692291418e-05, |
|
"loss": 0.7509, |
|
"step": 573500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6630686978577614e-05, |
|
"loss": 0.748, |
|
"step": 574000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6619060334129724e-05, |
|
"loss": 0.7517, |
|
"step": 574500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6607433689681834e-05, |
|
"loss": 0.7454, |
|
"step": 575000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6595783745345266e-05, |
|
"loss": 0.7448, |
|
"step": 575500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6584133801008705e-05, |
|
"loss": 0.7461, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.657248385667214e-05, |
|
"loss": 0.7464, |
|
"step": 576500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.656083391233557e-05, |
|
"loss": 0.7469, |
|
"step": 577000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6549183967999e-05, |
|
"loss": 0.743, |
|
"step": 577500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6537534023662435e-05, |
|
"loss": 0.7531, |
|
"step": 578000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6525884079325874e-05, |
|
"loss": 0.7455, |
|
"step": 578500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6514234134989306e-05, |
|
"loss": 0.746, |
|
"step": 579000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.650258419065274e-05, |
|
"loss": 0.743, |
|
"step": 579500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.649093424631617e-05, |
|
"loss": 0.7446, |
|
"step": 580000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.64792843019796e-05, |
|
"loss": 0.7436, |
|
"step": 580500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.646765765753171e-05, |
|
"loss": 0.7461, |
|
"step": 581000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.645600771319515e-05, |
|
"loss": 0.7433, |
|
"step": 581500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6444357768858584e-05, |
|
"loss": 0.7447, |
|
"step": 582000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.643270782452202e-05, |
|
"loss": 0.7462, |
|
"step": 582500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.642105788018545e-05, |
|
"loss": 0.7442, |
|
"step": 583000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.640943123573756e-05, |
|
"loss": 0.7412, |
|
"step": 583500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.639778129140099e-05, |
|
"loss": 0.742, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6386131347064424e-05, |
|
"loss": 0.7478, |
|
"step": 584500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.637450470261653e-05, |
|
"loss": 0.7434, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.636287805816864e-05, |
|
"loss": 0.7487, |
|
"step": 585500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6351228113832075e-05, |
|
"loss": 0.744, |
|
"step": 586000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6339601469384185e-05, |
|
"loss": 0.7445, |
|
"step": 586500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.632795152504762e-05, |
|
"loss": 0.7438, |
|
"step": 587000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.631630158071105e-05, |
|
"loss": 0.7505, |
|
"step": 587500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.630465163637448e-05, |
|
"loss": 0.7444, |
|
"step": 588000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6293001692037914e-05, |
|
"loss": 0.7484, |
|
"step": 588500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6281351747701353e-05, |
|
"loss": 0.75, |
|
"step": 589000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6269701803364786e-05, |
|
"loss": 0.7474, |
|
"step": 589500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6258075158916895e-05, |
|
"loss": 0.7424, |
|
"step": 590000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6246448514469e-05, |
|
"loss": 0.7483, |
|
"step": 590500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.623479857013243e-05, |
|
"loss": 0.7471, |
|
"step": 591000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.622314862579586e-05, |
|
"loss": 0.7421, |
|
"step": 591500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.621152198134797e-05, |
|
"loss": 0.7468, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6199872037011405e-05, |
|
"loss": 0.7415, |
|
"step": 592500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6188222092674844e-05, |
|
"loss": 0.7481, |
|
"step": 593000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6176572148338277e-05, |
|
"loss": 0.7499, |
|
"step": 593500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.616492220400171e-05, |
|
"loss": 0.7443, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.615327225966514e-05, |
|
"loss": 0.7481, |
|
"step": 594500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.614164561521725e-05, |
|
"loss": 0.7456, |
|
"step": 595000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.612999567088068e-05, |
|
"loss": 0.7415, |
|
"step": 595500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.611834572654412e-05, |
|
"loss": 0.743, |
|
"step": 596000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6106695782207555e-05, |
|
"loss": 0.7469, |
|
"step": 596500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.609504583787099e-05, |
|
"loss": 0.7473, |
|
"step": 597000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.608339589353442e-05, |
|
"loss": 0.741, |
|
"step": 597500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.607174594919785e-05, |
|
"loss": 0.75, |
|
"step": 598000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.606009600486129e-05, |
|
"loss": 0.7441, |
|
"step": 598500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6048469360413394e-05, |
|
"loss": 0.7456, |
|
"step": 599000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.603684271596551e-05, |
|
"loss": 0.7458, |
|
"step": 599500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.602519277162894e-05, |
|
"loss": 0.7469, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6013542827292375e-05, |
|
"loss": 0.7484, |
|
"step": 600500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.600189288295581e-05, |
|
"loss": 0.7463, |
|
"step": 601000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.599024293861924e-05, |
|
"loss": 0.7442, |
|
"step": 601500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.597859299428267e-05, |
|
"loss": 0.7382, |
|
"step": 602000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.596694304994611e-05, |
|
"loss": 0.7455, |
|
"step": 602500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.5955293105609544e-05, |
|
"loss": 0.7459, |
|
"step": 603000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.5943643161272976e-05, |
|
"loss": 0.7404, |
|
"step": 603500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.593199321693641e-05, |
|
"loss": 0.741, |
|
"step": 604000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.592034327259984e-05, |
|
"loss": 0.7449, |
|
"step": 604500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.590869332826328e-05, |
|
"loss": 0.7494, |
|
"step": 605000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.589704338392671e-05, |
|
"loss": 0.7427, |
|
"step": 605500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5885393439590145e-05, |
|
"loss": 0.7445, |
|
"step": 606000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5873743495253584e-05, |
|
"loss": 0.7414, |
|
"step": 606500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5862093550917016e-05, |
|
"loss": 0.7412, |
|
"step": 607000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.585044360658045e-05, |
|
"loss": 0.7432, |
|
"step": 607500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.583881696213255e-05, |
|
"loss": 0.7586, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.582719031768466e-05, |
|
"loss": 0.7483, |
|
"step": 608500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.581556367323677e-05, |
|
"loss": 0.7401, |
|
"step": 609000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.580391372890021e-05, |
|
"loss": 0.7446, |
|
"step": 609500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.579226378456364e-05, |
|
"loss": 0.7418, |
|
"step": 610000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5780613840227075e-05, |
|
"loss": 0.7411, |
|
"step": 610500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.576896389589051e-05, |
|
"loss": 0.7431, |
|
"step": 611000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.575731395155394e-05, |
|
"loss": 0.7452, |
|
"step": 611500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.574568730710605e-05, |
|
"loss": 0.7424, |
|
"step": 612000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.573403736276948e-05, |
|
"loss": 0.7438, |
|
"step": 612500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.572238741843292e-05, |
|
"loss": 0.7427, |
|
"step": 613000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.571073747409635e-05, |
|
"loss": 0.7442, |
|
"step": 613500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5699087529759785e-05, |
|
"loss": 0.745, |
|
"step": 614000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.568743758542322e-05, |
|
"loss": 0.7485, |
|
"step": 614500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.567578764108665e-05, |
|
"loss": 0.7445, |
|
"step": 615000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.566413769675009e-05, |
|
"loss": 0.7467, |
|
"step": 615500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.56525110523022e-05, |
|
"loss": 0.7397, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.564086110796563e-05, |
|
"loss": 0.7442, |
|
"step": 616500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5629211163629064e-05, |
|
"loss": 0.7415, |
|
"step": 617000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5617584519181166e-05, |
|
"loss": 0.7451, |
|
"step": 617500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.56059345748446e-05, |
|
"loss": 0.7447, |
|
"step": 618000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.559428463050804e-05, |
|
"loss": 0.7402, |
|
"step": 618500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.558263468617147e-05, |
|
"loss": 0.7419, |
|
"step": 619000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.557098474183491e-05, |
|
"loss": 0.7419, |
|
"step": 619500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.555933479749834e-05, |
|
"loss": 0.7372, |
|
"step": 620000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.5547684853161774e-05, |
|
"loss": 0.7416, |
|
"step": 620500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.5536034908825207e-05, |
|
"loss": 0.7508, |
|
"step": 621000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.552440826437731e-05, |
|
"loss": 0.7427, |
|
"step": 621500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.551275832004075e-05, |
|
"loss": 0.7553, |
|
"step": 622000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.550110837570418e-05, |
|
"loss": 0.7431, |
|
"step": 622500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.548948173125629e-05, |
|
"loss": 0.7429, |
|
"step": 623000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.547783178691972e-05, |
|
"loss": 0.7445, |
|
"step": 623500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.546620514247183e-05, |
|
"loss": 0.7428, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.5454555198135265e-05, |
|
"loss": 0.7404, |
|
"step": 624500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.54429052537987e-05, |
|
"loss": 0.7428, |
|
"step": 625000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.5431255309462136e-05, |
|
"loss": 0.7454, |
|
"step": 625500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.541960536512557e-05, |
|
"loss": 0.7431, |
|
"step": 626000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5407955420789e-05, |
|
"loss": 0.7449, |
|
"step": 626500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5396305476452434e-05, |
|
"loss": 0.7501, |
|
"step": 627000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5384655532115866e-05, |
|
"loss": 0.7417, |
|
"step": 627500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.53730055877793e-05, |
|
"loss": 0.7425, |
|
"step": 628000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.536135564344274e-05, |
|
"loss": 0.7457, |
|
"step": 628500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.534970569910617e-05, |
|
"loss": 0.7378, |
|
"step": 629000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.533805575476961e-05, |
|
"loss": 0.7445, |
|
"step": 629500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.532640581043304e-05, |
|
"loss": 0.7372, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5314755866096474e-05, |
|
"loss": 0.7676, |
|
"step": 630500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.530310592175991e-05, |
|
"loss": 0.7583, |
|
"step": 631000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5291455977423345e-05, |
|
"loss": 0.7405, |
|
"step": 631500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.527980603308678e-05, |
|
"loss": 0.7347, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.526817938863888e-05, |
|
"loss": 0.7391, |
|
"step": 632500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.525652944430232e-05, |
|
"loss": 0.7357, |
|
"step": 633000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.524487949996575e-05, |
|
"loss": 0.7388, |
|
"step": 633500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.5233229555629184e-05, |
|
"loss": 0.7448, |
|
"step": 634000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.5221579611292623e-05, |
|
"loss": 0.7372, |
|
"step": 634500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.5209929666956056e-05, |
|
"loss": 0.7379, |
|
"step": 635000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.519827972261949e-05, |
|
"loss": 0.7445, |
|
"step": 635500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.518662977828292e-05, |
|
"loss": 0.7409, |
|
"step": 636000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.517497983394635e-05, |
|
"loss": 0.7337, |
|
"step": 636500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.516332988960979e-05, |
|
"loss": 0.7405, |
|
"step": 637000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.5151679945273225e-05, |
|
"loss": 0.7388, |
|
"step": 637500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.514003000093666e-05, |
|
"loss": 0.7402, |
|
"step": 638000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.512838005660009e-05, |
|
"loss": 0.7424, |
|
"step": 638500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.51167534121522e-05, |
|
"loss": 0.7403, |
|
"step": 639000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.510510346781563e-05, |
|
"loss": 0.7342, |
|
"step": 639500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.509347682336774e-05, |
|
"loss": 0.7364, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.508182687903117e-05, |
|
"loss": 0.7413, |
|
"step": 640500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.507017693469461e-05, |
|
"loss": 0.7447, |
|
"step": 641000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.5058526990358045e-05, |
|
"loss": 0.7366, |
|
"step": 641500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.504687704602148e-05, |
|
"loss": 0.7382, |
|
"step": 642000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.503522710168491e-05, |
|
"loss": 0.7383, |
|
"step": 642500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.502360045723702e-05, |
|
"loss": 0.7369, |
|
"step": 643000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.501195051290045e-05, |
|
"loss": 0.7393, |
|
"step": 643500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.5000323868452554e-05, |
|
"loss": 0.7413, |
|
"step": 644000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.4988673924115994e-05, |
|
"loss": 0.7318, |
|
"step": 644500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.4977023979779426e-05, |
|
"loss": 0.7414, |
|
"step": 645000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.496537403544286e-05, |
|
"loss": 0.7384, |
|
"step": 645500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.495372409110629e-05, |
|
"loss": 0.7379, |
|
"step": 646000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.494207414676973e-05, |
|
"loss": 0.7382, |
|
"step": 646500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.493042420243316e-05, |
|
"loss": 0.7359, |
|
"step": 647000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.49187742580966e-05, |
|
"loss": 0.7391, |
|
"step": 647500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.4907124313760034e-05, |
|
"loss": 0.7401, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.4895474369423466e-05, |
|
"loss": 0.7341, |
|
"step": 648500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.48838244250869e-05, |
|
"loss": 0.7383, |
|
"step": 649000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.487217448075033e-05, |
|
"loss": 0.7351, |
|
"step": 649500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.486054783630244e-05, |
|
"loss": 0.7383, |
|
"step": 650000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.484889789196587e-05, |
|
"loss": 0.7362, |
|
"step": 650500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.483727124751798e-05, |
|
"loss": 0.7369, |
|
"step": 651000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.4825621303181415e-05, |
|
"loss": 0.7356, |
|
"step": 651500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.481397135884485e-05, |
|
"loss": 0.7379, |
|
"step": 652000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.480232141450828e-05, |
|
"loss": 0.7419, |
|
"step": 652500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.479067147017171e-05, |
|
"loss": 0.7343, |
|
"step": 653000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.477904482572382e-05, |
|
"loss": 0.7387, |
|
"step": 653500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.476741818127593e-05, |
|
"loss": 0.7362, |
|
"step": 654000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.475576823693937e-05, |
|
"loss": 0.7391, |
|
"step": 654500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.47441182926028e-05, |
|
"loss": 0.7476, |
|
"step": 655000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.4732468348266235e-05, |
|
"loss": 0.7382, |
|
"step": 655500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.472081840392967e-05, |
|
"loss": 0.7402, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.47091684595931e-05, |
|
"loss": 0.7347, |
|
"step": 656500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.469751851525654e-05, |
|
"loss": 0.7413, |
|
"step": 657000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.468586857091997e-05, |
|
"loss": 0.7415, |
|
"step": 657500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.4674218626583404e-05, |
|
"loss": 0.7383, |
|
"step": 658000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.466259198213551e-05, |
|
"loss": 0.737, |
|
"step": 658500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.4650942037798946e-05, |
|
"loss": 0.7321, |
|
"step": 659000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.463929209346238e-05, |
|
"loss": 0.7359, |
|
"step": 659500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.462764214912581e-05, |
|
"loss": 0.7408, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.461599220478925e-05, |
|
"loss": 0.7387, |
|
"step": 660500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.460434226045268e-05, |
|
"loss": 0.7339, |
|
"step": 661000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.4592692316116114e-05, |
|
"loss": 0.7356, |
|
"step": 661500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.458104237177955e-05, |
|
"loss": 0.7336, |
|
"step": 662000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.456939242744298e-05, |
|
"loss": 0.7345, |
|
"step": 662500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.455774248310642e-05, |
|
"loss": 0.7389, |
|
"step": 663000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.454611583865853e-05, |
|
"loss": 0.7298, |
|
"step": 663500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.453446589432196e-05, |
|
"loss": 0.736, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.452286254976274e-05, |
|
"loss": 0.7367, |
|
"step": 664500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.451121260542617e-05, |
|
"loss": 0.7413, |
|
"step": 665000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.4499562661089605e-05, |
|
"loss": 0.739, |
|
"step": 665500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.448791271675304e-05, |
|
"loss": 0.7437, |
|
"step": 666000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.4476262772416477e-05, |
|
"loss": 0.7421, |
|
"step": 666500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.446463612796858e-05, |
|
"loss": 0.7378, |
|
"step": 667000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.445298618363202e-05, |
|
"loss": 0.7381, |
|
"step": 667500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.444133623929545e-05, |
|
"loss": 0.7368, |
|
"step": 668000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.442968629495888e-05, |
|
"loss": 0.7347, |
|
"step": 668500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4418036350622316e-05, |
|
"loss": 0.7359, |
|
"step": 669000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.440638640628575e-05, |
|
"loss": 0.7371, |
|
"step": 669500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.439473646194919e-05, |
|
"loss": 0.734, |
|
"step": 670000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.438308651761262e-05, |
|
"loss": 0.7397, |
|
"step": 670500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.437143657327606e-05, |
|
"loss": 0.733, |
|
"step": 671000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.435978662893949e-05, |
|
"loss": 0.7378, |
|
"step": 671500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4348136684602923e-05, |
|
"loss": 0.7305, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4336486740266356e-05, |
|
"loss": 0.736, |
|
"step": 672500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4324836795929795e-05, |
|
"loss": 0.7397, |
|
"step": 673000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4313256751259246e-05, |
|
"loss": 0.7345, |
|
"step": 673500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.430160680692268e-05, |
|
"loss": 0.7336, |
|
"step": 674000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.428995686258612e-05, |
|
"loss": 0.7354, |
|
"step": 674500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.427833021813822e-05, |
|
"loss": 0.7357, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.426668027380165e-05, |
|
"loss": 0.7326, |
|
"step": 675500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.4255030329465085e-05, |
|
"loss": 0.7368, |
|
"step": 676000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.4243380385128524e-05, |
|
"loss": 0.7383, |
|
"step": 676500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.4231730440791956e-05, |
|
"loss": 0.7324, |
|
"step": 677000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.4220080496455395e-05, |
|
"loss": 0.7343, |
|
"step": 677500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.420843055211883e-05, |
|
"loss": 0.733, |
|
"step": 678000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.419678060778226e-05, |
|
"loss": 0.7314, |
|
"step": 678500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.418513066344569e-05, |
|
"loss": 0.732, |
|
"step": 679000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.4173480719109125e-05, |
|
"loss": 0.7353, |
|
"step": 679500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.4161854074661234e-05, |
|
"loss": 0.7342, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.415020413032467e-05, |
|
"loss": 0.7301, |
|
"step": 680500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.4138554185988106e-05, |
|
"loss": 0.7311, |
|
"step": 681000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.412692754154021e-05, |
|
"loss": 0.7326, |
|
"step": 681500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.411527759720364e-05, |
|
"loss": 0.731, |
|
"step": 682000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.4103627652867074e-05, |
|
"loss": 0.734, |
|
"step": 682500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.4091977708530506e-05, |
|
"loss": 0.7337, |
|
"step": 683000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.4080327764193945e-05, |
|
"loss": 0.7311, |
|
"step": 683500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.406867781985738e-05, |
|
"loss": 0.7373, |
|
"step": 684000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.405705117540949e-05, |
|
"loss": 0.7345, |
|
"step": 684500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.404540123107292e-05, |
|
"loss": 0.7408, |
|
"step": 685000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.403375128673635e-05, |
|
"loss": 0.7346, |
|
"step": 685500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.402212464228846e-05, |
|
"loss": 0.7347, |
|
"step": 686000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.4010474697951894e-05, |
|
"loss": 0.7323, |
|
"step": 686500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.3998824753615326e-05, |
|
"loss": 0.7337, |
|
"step": 687000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.3987174809278765e-05, |
|
"loss": 0.7327, |
|
"step": 687500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.39755248649422e-05, |
|
"loss": 0.7318, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.396387492060563e-05, |
|
"loss": 0.7357, |
|
"step": 688500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.395224827615774e-05, |
|
"loss": 0.7335, |
|
"step": 689000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.394059833182117e-05, |
|
"loss": 0.732, |
|
"step": 689500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.3928948387484604e-05, |
|
"loss": 0.7314, |
|
"step": 690000 |
|
} |
|
], |
|
"max_steps": 2145933, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.543433599991808e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|