diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,15166 +1,7556 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 3.0, - "global_step": 1260555, + "epoch": 5.0, + "global_step": 625000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, - "learning_rate": 1.9992066986367118e-05, - "loss": 3.3636, + "learning_rate": 1.9996000000000003e-05, + "loss": 4.9384, "step": 500 }, { - "epoch": 0.0, - "learning_rate": 1.9984133972734233e-05, - "loss": 2.8247, + "epoch": 0.01, + "learning_rate": 1.9992e-05, + "loss": 3.2955, "step": 1000 }, { - "epoch": 0.0, - "learning_rate": 1.997620095910135e-05, - "loss": 2.6434, + "epoch": 0.01, + "learning_rate": 1.9988000000000002e-05, + "loss": 2.879, "step": 1500 }, { - "epoch": 0.0, - "learning_rate": 1.996826794546847e-05, - "loss": 2.5482, + "epoch": 0.02, + "learning_rate": 1.9984e-05, + "loss": 2.6655, "step": 2000 }, { - "epoch": 0.01, - "learning_rate": 1.996033493183558e-05, - "loss": 2.4555, + "epoch": 0.02, + "learning_rate": 1.9980000000000002e-05, + "loss": 2.5291, "step": 2500 }, { - "epoch": 0.01, - "learning_rate": 1.9952401918202697e-05, - "loss": 2.4086, + "epoch": 0.02, + "learning_rate": 1.9976000000000003e-05, + "loss": 2.4219, "step": 3000 }, { - "epoch": 0.01, - "learning_rate": 1.9944468904569813e-05, - "loss": 2.348, + "epoch": 0.03, + "learning_rate": 1.9972e-05, + "loss": 2.3595, "step": 3500 }, { - "epoch": 0.01, - "learning_rate": 1.993653589093693e-05, - "loss": 2.3079, + "epoch": 0.03, + "learning_rate": 1.9968e-05, + "loss": 2.2788, "step": 4000 }, { - "epoch": 0.01, - "learning_rate": 1.9928602877304048e-05, - "loss": 2.263, + "epoch": 0.04, + "learning_rate": 1.9964e-05, + "loss": 2.239, "step": 4500 }, { - "epoch": 0.01, - "learning_rate": 1.9920669863671164e-05, - "loss": 2.2407, + "epoch": 0.04, + "learning_rate": 1.9960000000000002e-05, + "loss": 2.1891, "step": 5000 }, { - "epoch": 0.01, - "learning_rate": 1.991273685003828e-05, - "loss": 2.2031, + "epoch": 0.04, + "learning_rate": 1.9956000000000003e-05, + "loss": 2.1492, "step": 5500 }, { - "epoch": 0.01, - "learning_rate": 1.9904803836405395e-05, - "loss": 2.1797, + "epoch": 0.05, + "learning_rate": 1.9952e-05, + "loss": 2.1163, "step": 6000 }, { - "epoch": 0.02, - "learning_rate": 1.989687082277251e-05, - "loss": 2.1896, + "epoch": 0.05, + "learning_rate": 1.9948e-05, + "loss": 2.086, "step": 6500 }, { - "epoch": 0.02, - "learning_rate": 1.9888937809139627e-05, - "loss": 2.14, + "epoch": 0.06, + "learning_rate": 1.9944e-05, + "loss": 2.0627, "step": 7000 }, { - "epoch": 0.02, - "learning_rate": 1.9881004795506743e-05, - "loss": 2.1279, + "epoch": 0.06, + "learning_rate": 1.9940000000000002e-05, + "loss": 2.0396, "step": 7500 }, { - "epoch": 0.02, - "learning_rate": 1.987307178187386e-05, - "loss": 2.0861, + "epoch": 0.06, + "learning_rate": 1.9936000000000004e-05, + "loss": 2.0061, "step": 8000 }, { - "epoch": 0.02, - "learning_rate": 1.9865138768240975e-05, - "loss": 2.0768, + "epoch": 0.07, + "learning_rate": 1.9932e-05, + "loss": 1.9973, "step": 8500 }, { - "epoch": 0.02, - "learning_rate": 1.985720575460809e-05, - "loss": 2.0827, + "epoch": 0.07, + "learning_rate": 1.9928e-05, + "loss": 1.9651, "step": 9000 }, { - "epoch": 0.02, - "learning_rate": 1.9849272740975206e-05, - "loss": 2.0539, + "epoch": 0.08, + "learning_rate": 1.9924e-05, + "loss": 1.9509, "step": 9500 }, { - "epoch": 0.02, - "learning_rate": 1.9841339727342322e-05, - "loss": 2.03, + "epoch": 0.08, + "learning_rate": 1.9920000000000002e-05, + "loss": 1.937, "step": 10000 }, { - "epoch": 0.02, - "learning_rate": 1.983340671370944e-05, - "loss": 2.0105, + "epoch": 0.08, + "learning_rate": 1.9916e-05, + "loss": 1.9226, "step": 10500 }, { - "epoch": 0.03, - "learning_rate": 1.9825473700076557e-05, - "loss": 1.9989, + "epoch": 0.09, + "learning_rate": 1.9912000000000002e-05, + "loss": 1.9087, "step": 11000 }, { - "epoch": 0.03, - "learning_rate": 1.981754068644367e-05, - "loss": 1.9886, + "epoch": 0.09, + "learning_rate": 1.9908e-05, + "loss": 1.8888, "step": 11500 }, { - "epoch": 0.03, - "learning_rate": 1.9809607672810786e-05, - "loss": 1.9745, + "epoch": 0.1, + "learning_rate": 1.9904e-05, + "loss": 1.879, "step": 12000 }, { - "epoch": 0.03, - "learning_rate": 1.98016746591779e-05, - "loss": 1.9839, + "epoch": 0.1, + "learning_rate": 1.9900000000000003e-05, + "loss": 1.8612, "step": 12500 }, { - "epoch": 0.03, - "learning_rate": 1.979374164554502e-05, - "loss": 1.9611, + "epoch": 0.1, + "learning_rate": 1.9896e-05, + "loss": 1.8466, "step": 13000 }, { - "epoch": 0.03, - "learning_rate": 1.9785808631912137e-05, - "loss": 1.9685, + "epoch": 0.11, + "learning_rate": 1.9892000000000002e-05, + "loss": 1.8376, "step": 13500 }, { - "epoch": 0.03, - "learning_rate": 1.9777875618279253e-05, - "loss": 1.9465, + "epoch": 0.11, + "learning_rate": 1.9888e-05, + "loss": 1.8353, "step": 14000 }, { - "epoch": 0.03, - "learning_rate": 1.976994260464637e-05, - "loss": 1.9532, + "epoch": 0.12, + "learning_rate": 1.9884e-05, + "loss": 1.8239, "step": 14500 }, { - "epoch": 0.04, - "learning_rate": 1.9762009591013484e-05, - "loss": 1.9243, + "epoch": 0.12, + "learning_rate": 1.9880000000000003e-05, + "loss": 1.8111, "step": 15000 }, { - "epoch": 0.04, - "learning_rate": 1.97540765773806e-05, - "loss": 1.9132, + "epoch": 0.12, + "learning_rate": 1.9876e-05, + "loss": 1.8014, "step": 15500 }, { - "epoch": 0.04, - "learning_rate": 1.9746143563747716e-05, - "loss": 1.907, + "epoch": 0.13, + "learning_rate": 1.9872000000000002e-05, + "loss": 1.7967, "step": 16000 }, { - "epoch": 0.04, - "learning_rate": 1.9738210550114832e-05, - "loss": 1.9201, + "epoch": 0.13, + "learning_rate": 1.9868e-05, + "loss": 1.78, "step": 16500 }, { - "epoch": 0.04, - "learning_rate": 1.9730277536481948e-05, - "loss": 1.9033, + "epoch": 0.14, + "learning_rate": 1.9864e-05, + "loss": 1.7791, "step": 17000 }, { - "epoch": 0.04, - "learning_rate": 1.9722344522849064e-05, - "loss": 1.9114, + "epoch": 0.14, + "learning_rate": 1.9860000000000003e-05, + "loss": 1.7657, "step": 17500 }, { - "epoch": 0.04, - "learning_rate": 1.971441150921618e-05, - "loss": 1.8667, + "epoch": 0.14, + "learning_rate": 1.9856e-05, + "loss": 1.7641, "step": 18000 }, { - "epoch": 0.04, - "learning_rate": 1.9706478495583295e-05, - "loss": 1.8737, + "epoch": 0.15, + "learning_rate": 1.9852000000000002e-05, + "loss": 1.769, "step": 18500 }, { - "epoch": 0.05, - "learning_rate": 1.969854548195041e-05, - "loss": 1.8696, + "epoch": 0.15, + "learning_rate": 1.9848e-05, + "loss": 1.7516, "step": 19000 }, { - "epoch": 0.05, - "learning_rate": 1.969061246831753e-05, - "loss": 1.871, + "epoch": 0.16, + "learning_rate": 1.9844000000000002e-05, + "loss": 1.7521, "step": 19500 }, { - "epoch": 0.05, - "learning_rate": 1.9682679454684646e-05, - "loss": 1.8694, + "epoch": 0.16, + "learning_rate": 1.9840000000000003e-05, + "loss": 1.738, "step": 20000 }, { - "epoch": 0.05, - "learning_rate": 1.967474644105176e-05, - "loss": 1.8408, + "epoch": 0.16, + "learning_rate": 1.9836e-05, + "loss": 1.7292, "step": 20500 }, { - "epoch": 0.05, - "learning_rate": 1.9666813427418875e-05, - "loss": 1.8475, + "epoch": 0.17, + "learning_rate": 1.9832000000000003e-05, + "loss": 1.7123, "step": 21000 }, { - "epoch": 0.05, - "learning_rate": 1.9658880413785994e-05, - "loss": 1.8506, + "epoch": 0.17, + "learning_rate": 1.9828e-05, + "loss": 1.7123, "step": 21500 }, { - "epoch": 0.05, - "learning_rate": 1.965094740015311e-05, - "loss": 1.8373, + "epoch": 0.18, + "learning_rate": 1.9824000000000002e-05, + "loss": 1.7095, "step": 22000 }, { - "epoch": 0.05, - "learning_rate": 1.9643014386520226e-05, - "loss": 1.8109, + "epoch": 0.18, + "learning_rate": 1.982e-05, + "loss": 1.71, "step": 22500 }, { - "epoch": 0.05, - "learning_rate": 1.963508137288734e-05, - "loss": 1.819, + "epoch": 0.18, + "learning_rate": 1.9816e-05, + "loss": 1.7009, "step": 23000 }, { - "epoch": 0.06, - "learning_rate": 1.9627148359254457e-05, - "loss": 1.8175, + "epoch": 0.19, + "learning_rate": 1.9812000000000003e-05, + "loss": 1.6947, "step": 23500 }, { - "epoch": 0.06, - "learning_rate": 1.9619215345621573e-05, - "loss": 1.806, + "epoch": 0.19, + "learning_rate": 1.9808e-05, + "loss": 1.6951, "step": 24000 }, { - "epoch": 0.06, - "learning_rate": 1.961128233198869e-05, - "loss": 1.824, + "epoch": 0.2, + "learning_rate": 1.9804000000000002e-05, + "loss": 1.6828, "step": 24500 }, { - "epoch": 0.06, - "learning_rate": 1.9603349318355805e-05, - "loss": 1.8055, + "epoch": 0.2, + "learning_rate": 1.98e-05, + "loss": 1.676, "step": 25000 }, { - "epoch": 0.06, - "learning_rate": 1.959541630472292e-05, - "loss": 1.7914, + "epoch": 0.2, + "learning_rate": 1.9796e-05, + "loss": 1.6734, "step": 25500 }, { - "epoch": 0.06, - "learning_rate": 1.9587483291090037e-05, - "loss": 1.7943, + "epoch": 0.21, + "learning_rate": 1.9792000000000003e-05, + "loss": 1.6642, "step": 26000 }, { - "epoch": 0.06, - "learning_rate": 1.9579550277457153e-05, - "loss": 1.7821, + "epoch": 0.21, + "learning_rate": 1.9788e-05, + "loss": 1.6649, "step": 26500 }, { - "epoch": 0.06, - "learning_rate": 1.957161726382427e-05, - "loss": 1.7968, + "epoch": 0.22, + "learning_rate": 1.9784000000000002e-05, + "loss": 1.667, "step": 27000 }, { - "epoch": 0.07, - "learning_rate": 1.9563684250191384e-05, - "loss": 1.7688, + "epoch": 0.22, + "learning_rate": 1.978e-05, + "loss": 1.6556, "step": 27500 }, { - "epoch": 0.07, - "learning_rate": 1.9555751236558504e-05, - "loss": 1.7674, + "epoch": 0.22, + "learning_rate": 1.9776000000000002e-05, + "loss": 1.6532, "step": 28000 }, { - "epoch": 0.07, - "learning_rate": 1.954781822292562e-05, - "loss": 1.7623, + "epoch": 0.23, + "learning_rate": 1.9772000000000003e-05, + "loss": 1.6545, "step": 28500 }, { - "epoch": 0.07, - "learning_rate": 1.9539885209292732e-05, - "loss": 1.7549, + "epoch": 0.23, + "learning_rate": 1.9768e-05, + "loss": 1.6481, "step": 29000 }, { - "epoch": 0.07, - "learning_rate": 1.9531952195659848e-05, - "loss": 1.7584, + "epoch": 0.24, + "learning_rate": 1.9764000000000003e-05, + "loss": 1.647, "step": 29500 }, { - "epoch": 0.07, - "learning_rate": 1.9524019182026964e-05, - "loss": 1.7497, + "epoch": 0.24, + "learning_rate": 1.976e-05, + "loss": 1.6385, "step": 30000 }, { - "epoch": 0.07, - "learning_rate": 1.9516086168394083e-05, - "loss": 1.7464, + "epoch": 0.24, + "learning_rate": 1.9756000000000002e-05, + "loss": 1.6337, "step": 30500 }, { - "epoch": 0.07, - "learning_rate": 1.95081531547612e-05, - "loss": 1.7458, + "epoch": 0.25, + "learning_rate": 1.9752000000000003e-05, + "loss": 1.6296, "step": 31000 }, { - "epoch": 0.07, - "learning_rate": 1.9500220141128315e-05, - "loss": 1.7666, + "epoch": 0.25, + "learning_rate": 1.9748e-05, + "loss": 1.6339, "step": 31500 }, { - "epoch": 0.08, - "learning_rate": 1.949228712749543e-05, - "loss": 1.7518, + "epoch": 0.26, + "learning_rate": 1.9744e-05, + "loss": 1.63, "step": 32000 }, { - "epoch": 0.08, - "learning_rate": 1.9484354113862546e-05, - "loss": 1.7312, + "epoch": 0.26, + "learning_rate": 1.974e-05, + "loss": 1.6224, "step": 32500 }, { - "epoch": 0.08, - "learning_rate": 1.9476421100229662e-05, - "loss": 1.7569, + "epoch": 0.26, + "learning_rate": 1.9736000000000002e-05, + "loss": 1.6207, "step": 33000 }, { - "epoch": 0.08, - "learning_rate": 1.9468488086596778e-05, - "loss": 1.7352, + "epoch": 0.27, + "learning_rate": 1.9732000000000004e-05, + "loss": 1.6101, "step": 33500 }, { - "epoch": 0.08, - "learning_rate": 1.9460555072963894e-05, - "loss": 1.723, + "epoch": 0.27, + "learning_rate": 1.9728e-05, + "loss": 1.6095, "step": 34000 }, { - "epoch": 0.08, - "learning_rate": 1.945262205933101e-05, - "loss": 1.7439, + "epoch": 0.28, + "learning_rate": 1.9724e-05, + "loss": 1.6029, "step": 34500 }, { - "epoch": 0.08, - "learning_rate": 1.9444689045698126e-05, - "loss": 1.7154, + "epoch": 0.28, + "learning_rate": 1.972e-05, + "loss": 1.6028, "step": 35000 }, { - "epoch": 0.08, - "learning_rate": 1.943675603206524e-05, - "loss": 1.7245, + "epoch": 0.28, + "learning_rate": 1.9716000000000002e-05, + "loss": 1.5977, "step": 35500 }, { - "epoch": 0.09, - "learning_rate": 1.9428823018432357e-05, - "loss": 1.7139, + "epoch": 0.29, + "learning_rate": 1.9712000000000004e-05, + "loss": 1.5929, "step": 36000 }, { - "epoch": 0.09, - "learning_rate": 1.9420890004799477e-05, - "loss": 1.7167, + "epoch": 0.29, + "learning_rate": 1.9708000000000002e-05, + "loss": 1.5942, "step": 36500 }, { - "epoch": 0.09, - "learning_rate": 1.9412956991166593e-05, - "loss": 1.7218, + "epoch": 0.3, + "learning_rate": 1.9704e-05, + "loss": 1.5908, "step": 37000 }, { - "epoch": 0.09, - "learning_rate": 1.940502397753371e-05, - "loss": 1.7136, + "epoch": 0.3, + "learning_rate": 1.97e-05, + "loss": 1.5926, "step": 37500 }, { - "epoch": 0.09, - "learning_rate": 1.939709096390082e-05, - "loss": 1.6848, + "epoch": 0.3, + "learning_rate": 1.9696000000000003e-05, + "loss": 1.5793, "step": 38000 }, { - "epoch": 0.09, - "learning_rate": 1.9389157950267937e-05, - "loss": 1.7024, + "epoch": 0.31, + "learning_rate": 1.9692000000000004e-05, + "loss": 1.5769, "step": 38500 }, { - "epoch": 0.09, - "learning_rate": 1.9381224936635056e-05, - "loss": 1.7083, + "epoch": 0.31, + "learning_rate": 1.9688000000000002e-05, + "loss": 1.5778, "step": 39000 }, { - "epoch": 0.09, - "learning_rate": 1.9373291923002172e-05, - "loss": 1.6827, + "epoch": 0.32, + "learning_rate": 1.9684e-05, + "loss": 1.58, "step": 39500 }, { - "epoch": 0.1, - "learning_rate": 1.9365358909369288e-05, - "loss": 1.6955, + "epoch": 0.32, + "learning_rate": 1.968e-05, + "loss": 1.569, "step": 40000 }, { - "epoch": 0.1, - "learning_rate": 1.9357425895736404e-05, - "loss": 1.6975, + "epoch": 0.32, + "learning_rate": 1.9676000000000003e-05, + "loss": 1.5757, "step": 40500 }, { - "epoch": 0.1, - "learning_rate": 1.934949288210352e-05, - "loss": 1.6946, + "epoch": 0.33, + "learning_rate": 1.9672e-05, + "loss": 1.5706, "step": 41000 }, { - "epoch": 0.1, - "learning_rate": 1.9341559868470635e-05, - "loss": 1.6997, + "epoch": 0.33, + "learning_rate": 1.9668000000000002e-05, + "loss": 1.5658, "step": 41500 }, { - "epoch": 0.1, - "learning_rate": 1.933362685483775e-05, - "loss": 1.6943, + "epoch": 0.34, + "learning_rate": 1.9664e-05, + "loss": 1.5606, "step": 42000 }, { - "epoch": 0.1, - "learning_rate": 1.9325693841204867e-05, - "loss": 1.6775, + "epoch": 0.34, + "learning_rate": 1.966e-05, + "loss": 1.5525, "step": 42500 }, { - "epoch": 0.1, - "learning_rate": 1.9317760827571983e-05, - "loss": 1.6608, + "epoch": 0.34, + "learning_rate": 1.9656000000000003e-05, + "loss": 1.5628, "step": 43000 }, { - "epoch": 0.1, - "learning_rate": 1.93098278139391e-05, - "loss": 1.6745, + "epoch": 0.35, + "learning_rate": 1.9652e-05, + "loss": 1.5532, "step": 43500 }, { - "epoch": 0.1, - "learning_rate": 1.9301894800306215e-05, - "loss": 1.6841, + "epoch": 0.35, + "learning_rate": 1.9648000000000002e-05, + "loss": 1.5476, "step": 44000 }, { - "epoch": 0.11, - "learning_rate": 1.929396178667333e-05, - "loss": 1.6698, + "epoch": 0.36, + "learning_rate": 1.9644e-05, + "loss": 1.5552, "step": 44500 }, { - "epoch": 0.11, - "learning_rate": 1.928602877304045e-05, - "loss": 1.6706, + "epoch": 0.36, + "learning_rate": 1.9640000000000002e-05, + "loss": 1.5474, "step": 45000 }, { - "epoch": 0.11, - "learning_rate": 1.9278095759407566e-05, - "loss": 1.6591, + "epoch": 0.36, + "learning_rate": 1.9636000000000003e-05, + "loss": 1.544, "step": 45500 }, { - "epoch": 0.11, - "learning_rate": 1.927016274577468e-05, - "loss": 1.6406, + "epoch": 0.37, + "learning_rate": 1.9632e-05, + "loss": 1.5491, "step": 46000 }, { - "epoch": 0.11, - "learning_rate": 1.9262229732141797e-05, - "loss": 1.6599, + "epoch": 0.37, + "learning_rate": 1.9628000000000002e-05, + "loss": 1.5405, "step": 46500 }, { - "epoch": 0.11, - "learning_rate": 1.925429671850891e-05, - "loss": 1.6872, + "epoch": 0.38, + "learning_rate": 1.9624e-05, + "loss": 1.5407, "step": 47000 }, { - "epoch": 0.11, - "learning_rate": 1.924636370487603e-05, - "loss": 1.6721, + "epoch": 0.38, + "learning_rate": 1.9620000000000002e-05, + "loss": 1.539, "step": 47500 }, { - "epoch": 0.11, - "learning_rate": 1.9238430691243145e-05, - "loss": 1.6741, + "epoch": 0.38, + "learning_rate": 1.9616000000000003e-05, + "loss": 1.5269, "step": 48000 }, { - "epoch": 0.12, - "learning_rate": 1.923049767761026e-05, - "loss": 1.6763, + "epoch": 0.39, + "learning_rate": 1.9612e-05, + "loss": 1.5229, "step": 48500 }, { - "epoch": 0.12, - "learning_rate": 1.9222564663977377e-05, - "loss": 1.64, + "epoch": 0.39, + "learning_rate": 1.9608000000000003e-05, + "loss": 1.5364, "step": 49000 }, { - "epoch": 0.12, - "learning_rate": 1.9214631650344493e-05, - "loss": 1.6618, + "epoch": 0.4, + "learning_rate": 1.9604e-05, + "loss": 1.5312, "step": 49500 }, { - "epoch": 0.12, - "learning_rate": 1.920669863671161e-05, - "loss": 1.6495, + "epoch": 0.4, + "learning_rate": 1.9600000000000002e-05, + "loss": 1.535, "step": 50000 }, { - "epoch": 0.12, - "learning_rate": 1.9198765623078724e-05, - "loss": 1.6656, + "epoch": 0.4, + "learning_rate": 1.9596e-05, + "loss": 1.5234, "step": 50500 }, { - "epoch": 0.12, - "learning_rate": 1.9190832609445844e-05, - "loss": 1.6539, + "epoch": 0.41, + "learning_rate": 1.9592e-05, + "loss": 1.5171, "step": 51000 }, { - "epoch": 0.12, - "learning_rate": 1.9182899595812956e-05, - "loss": 1.6522, + "epoch": 0.41, + "learning_rate": 1.9588000000000003e-05, + "loss": 1.5166, "step": 51500 }, { - "epoch": 0.12, - "learning_rate": 1.9174966582180072e-05, - "loss": 1.6519, + "epoch": 0.42, + "learning_rate": 1.9584e-05, + "loss": 1.5168, "step": 52000 }, { - "epoch": 0.12, - "learning_rate": 1.9167033568547188e-05, - "loss": 1.6273, + "epoch": 0.42, + "learning_rate": 1.9580000000000002e-05, + "loss": 1.5112, "step": 52500 }, { - "epoch": 0.13, - "learning_rate": 1.9159100554914304e-05, - "loss": 1.6323, + "epoch": 0.42, + "learning_rate": 1.9576e-05, + "loss": 1.5218, "step": 53000 }, { - "epoch": 0.13, - "learning_rate": 1.9151167541281423e-05, - "loss": 1.6291, + "epoch": 0.43, + "learning_rate": 1.9572e-05, + "loss": 1.51, "step": 53500 }, { - "epoch": 0.13, - "learning_rate": 1.914323452764854e-05, - "loss": 1.6351, + "epoch": 0.43, + "learning_rate": 1.9568000000000003e-05, + "loss": 1.5093, "step": 54000 }, { - "epoch": 0.13, - "learning_rate": 1.9135301514015655e-05, - "loss": 1.6392, + "epoch": 0.44, + "learning_rate": 1.9564e-05, + "loss": 1.5038, "step": 54500 }, { - "epoch": 0.13, - "learning_rate": 1.912736850038277e-05, - "loss": 1.631, + "epoch": 0.44, + "learning_rate": 1.9560000000000002e-05, + "loss": 1.5069, "step": 55000 }, { - "epoch": 0.13, - "learning_rate": 1.9119435486749886e-05, - "loss": 1.6256, + "epoch": 0.44, + "learning_rate": 1.9556e-05, + "loss": 1.5045, "step": 55500 }, { - "epoch": 0.13, - "learning_rate": 1.9111502473117002e-05, - "loss": 1.6261, + "epoch": 0.45, + "learning_rate": 1.9552000000000002e-05, + "loss": 1.4999, "step": 56000 }, { - "epoch": 0.13, - "learning_rate": 1.9103569459484118e-05, - "loss": 1.64, + "epoch": 0.45, + "learning_rate": 1.9548000000000003e-05, + "loss": 1.5019, "step": 56500 }, { - "epoch": 0.14, - "learning_rate": 1.9095636445851234e-05, - "loss": 1.6334, + "epoch": 0.46, + "learning_rate": 1.9544e-05, + "loss": 1.5001, "step": 57000 }, { - "epoch": 0.14, - "learning_rate": 1.908770343221835e-05, - "loss": 1.6164, + "epoch": 0.46, + "learning_rate": 1.9540000000000003e-05, + "loss": 1.5014, "step": 57500 }, { - "epoch": 0.14, - "learning_rate": 1.9079770418585466e-05, - "loss": 1.6201, + "epoch": 0.46, + "learning_rate": 1.9536e-05, + "loss": 1.4977, "step": 58000 }, { - "epoch": 0.14, - "learning_rate": 1.907183740495258e-05, - "loss": 1.6119, + "epoch": 0.47, + "learning_rate": 1.9532000000000002e-05, + "loss": 1.4924, "step": 58500 }, { - "epoch": 0.14, - "learning_rate": 1.9063904391319697e-05, - "loss": 1.6112, + "epoch": 0.47, + "learning_rate": 1.9528000000000003e-05, + "loss": 1.4853, "step": 59000 }, { - "epoch": 0.14, - "learning_rate": 1.9055971377686813e-05, - "loss": 1.608, + "epoch": 0.48, + "learning_rate": 1.9524e-05, + "loss": 1.4857, "step": 59500 }, { - "epoch": 0.14, - "learning_rate": 1.9048038364053933e-05, - "loss": 1.61, + "epoch": 0.48, + "learning_rate": 1.9520000000000003e-05, + "loss": 1.4909, "step": 60000 }, { - "epoch": 0.14, - "learning_rate": 1.9040105350421045e-05, - "loss": 1.6156, + "epoch": 0.48, + "learning_rate": 1.9516e-05, + "loss": 1.4927, "step": 60500 }, { - "epoch": 0.15, - "learning_rate": 1.903217233678816e-05, - "loss": 1.5982, + "epoch": 0.49, + "learning_rate": 1.9512000000000002e-05, + "loss": 1.4781, "step": 61000 }, { - "epoch": 0.15, - "learning_rate": 1.9024239323155277e-05, - "loss": 1.6265, + "epoch": 0.49, + "learning_rate": 1.9508000000000004e-05, + "loss": 1.4841, "step": 61500 }, { - "epoch": 0.15, - "learning_rate": 1.9016306309522396e-05, - "loss": 1.5894, + "epoch": 0.5, + "learning_rate": 1.9504e-05, + "loss": 1.48, "step": 62000 }, { - "epoch": 0.15, - "learning_rate": 1.9008373295889512e-05, - "loss": 1.619, + "epoch": 0.5, + "learning_rate": 1.95e-05, + "loss": 1.4897, "step": 62500 }, { - "epoch": 0.15, - "learning_rate": 1.9000440282256628e-05, - "loss": 1.5871, + "epoch": 0.5, + "learning_rate": 1.9496e-05, + "loss": 1.4823, "step": 63000 }, { - "epoch": 0.15, - "learning_rate": 1.8992507268623744e-05, - "loss": 1.5939, + "epoch": 0.51, + "learning_rate": 1.9492000000000002e-05, + "loss": 1.4801, "step": 63500 }, { - "epoch": 0.15, - "learning_rate": 1.898457425499086e-05, - "loss": 1.5893, + "epoch": 0.51, + "learning_rate": 1.9488000000000004e-05, + "loss": 1.4817, "step": 64000 }, { - "epoch": 0.15, - "learning_rate": 1.8976641241357975e-05, - "loss": 1.5878, + "epoch": 0.52, + "learning_rate": 1.9484000000000002e-05, + "loss": 1.4841, "step": 64500 }, { - "epoch": 0.15, - "learning_rate": 1.896870822772509e-05, - "loss": 1.5954, + "epoch": 0.52, + "learning_rate": 1.948e-05, + "loss": 1.4823, "step": 65000 }, { - "epoch": 0.16, - "learning_rate": 1.8960775214092207e-05, - "loss": 1.5962, + "epoch": 0.52, + "learning_rate": 1.9476e-05, + "loss": 1.4758, "step": 65500 }, { - "epoch": 0.16, - "learning_rate": 1.8952842200459323e-05, - "loss": 1.6119, + "epoch": 0.53, + "learning_rate": 1.9472000000000003e-05, + "loss": 1.4731, "step": 66000 }, { - "epoch": 0.16, - "learning_rate": 1.894490918682644e-05, - "loss": 1.5776, + "epoch": 0.53, + "learning_rate": 1.9468000000000004e-05, + "loss": 1.4695, "step": 66500 }, { - "epoch": 0.16, - "learning_rate": 1.8936976173193555e-05, - "loss": 1.5796, + "epoch": 0.54, + "learning_rate": 1.9464000000000002e-05, + "loss": 1.4712, "step": 67000 }, { - "epoch": 0.16, - "learning_rate": 1.892904315956067e-05, - "loss": 1.6101, + "epoch": 0.54, + "learning_rate": 1.946e-05, + "loss": 1.4635, "step": 67500 }, { - "epoch": 0.16, - "learning_rate": 1.8921110145927786e-05, - "loss": 1.5797, + "epoch": 0.54, + "learning_rate": 1.9456e-05, + "loss": 1.4711, "step": 68000 }, { - "epoch": 0.16, - "learning_rate": 1.8913177132294906e-05, - "loss": 1.5989, + "epoch": 0.55, + "learning_rate": 1.9452000000000003e-05, + "loss": 1.472, "step": 68500 }, { - "epoch": 0.16, - "learning_rate": 1.8905244118662018e-05, - "loss": 1.5874, + "epoch": 0.55, + "learning_rate": 1.9448e-05, + "loss": 1.4643, "step": 69000 }, { - "epoch": 0.17, - "learning_rate": 1.8897311105029134e-05, - "loss": 1.5918, + "epoch": 0.56, + "learning_rate": 1.9444000000000002e-05, + "loss": 1.4622, "step": 69500 }, { - "epoch": 0.17, - "learning_rate": 1.888937809139625e-05, - "loss": 1.5872, + "epoch": 0.56, + "learning_rate": 1.944e-05, + "loss": 1.4658, "step": 70000 }, { - "epoch": 0.17, - "learning_rate": 1.8881445077763366e-05, - "loss": 1.6041, + "epoch": 0.56, + "learning_rate": 1.9436e-05, + "loss": 1.4688, "step": 70500 }, { - "epoch": 0.17, - "learning_rate": 1.8873512064130485e-05, - "loss": 1.5861, + "epoch": 0.57, + "learning_rate": 1.9432000000000003e-05, + "loss": 1.463, "step": 71000 }, { - "epoch": 0.17, - "learning_rate": 1.88655790504976e-05, - "loss": 1.5676, + "epoch": 0.57, + "learning_rate": 1.9428e-05, + "loss": 1.4603, "step": 71500 }, { - "epoch": 0.17, - "learning_rate": 1.8857646036864717e-05, - "loss": 1.5859, + "epoch": 0.58, + "learning_rate": 1.9424e-05, + "loss": 1.4553, "step": 72000 }, { - "epoch": 0.17, - "learning_rate": 1.8849713023231833e-05, - "loss": 1.5694, + "epoch": 0.58, + "learning_rate": 1.942e-05, + "loss": 1.4544, "step": 72500 }, { - "epoch": 0.17, - "learning_rate": 1.884178000959895e-05, - "loss": 1.5606, + "epoch": 0.58, + "learning_rate": 1.9416000000000002e-05, + "loss": 1.4559, "step": 73000 }, { - "epoch": 0.17, - "learning_rate": 1.8833846995966064e-05, - "loss": 1.5768, + "epoch": 0.59, + "learning_rate": 1.9412000000000003e-05, + "loss": 1.4488, "step": 73500 }, { - "epoch": 0.18, - "learning_rate": 1.882591398233318e-05, - "loss": 1.5834, + "epoch": 0.59, + "learning_rate": 1.9408e-05, + "loss": 1.4493, "step": 74000 }, { - "epoch": 0.18, - "learning_rate": 1.8817980968700296e-05, - "loss": 1.5815, + "epoch": 0.6, + "learning_rate": 1.9404e-05, + "loss": 1.4501, "step": 74500 }, { - "epoch": 0.18, - "learning_rate": 1.8810047955067412e-05, - "loss": 1.569, + "epoch": 0.6, + "learning_rate": 1.94e-05, + "loss": 1.4479, "step": 75000 }, { - "epoch": 0.18, - "learning_rate": 1.8802114941434528e-05, - "loss": 1.5839, + "epoch": 0.6, + "learning_rate": 1.9396000000000002e-05, + "loss": 1.4461, "step": 75500 }, { - "epoch": 0.18, - "learning_rate": 1.8794181927801644e-05, - "loss": 1.577, + "epoch": 0.61, + "learning_rate": 1.9392000000000003e-05, + "loss": 1.4441, "step": 76000 }, { - "epoch": 0.18, - "learning_rate": 1.878624891416876e-05, - "loss": 1.5707, + "epoch": 0.61, + "learning_rate": 1.9388e-05, + "loss": 1.4549, "step": 76500 }, { - "epoch": 0.18, - "learning_rate": 1.877831590053588e-05, - "loss": 1.5722, + "epoch": 0.62, + "learning_rate": 1.9384e-05, + "loss": 1.4426, "step": 77000 }, { - "epoch": 0.18, - "learning_rate": 1.8770382886902995e-05, - "loss": 1.5634, + "epoch": 0.62, + "learning_rate": 1.938e-05, + "loss": 1.4469, "step": 77500 }, { - "epoch": 0.19, - "learning_rate": 1.8762449873270107e-05, - "loss": 1.5612, + "epoch": 0.62, + "learning_rate": 1.9376000000000002e-05, + "loss": 1.431, "step": 78000 }, { - "epoch": 0.19, - "learning_rate": 1.8754516859637223e-05, - "loss": 1.5566, + "epoch": 0.63, + "learning_rate": 1.9372000000000004e-05, + "loss": 1.4444, "step": 78500 }, { - "epoch": 0.19, - "learning_rate": 1.874658384600434e-05, - "loss": 1.5693, + "epoch": 0.63, + "learning_rate": 1.9368e-05, + "loss": 1.4401, "step": 79000 }, { - "epoch": 0.19, - "learning_rate": 1.8738650832371458e-05, - "loss": 1.5542, + "epoch": 0.64, + "learning_rate": 1.9364e-05, + "loss": 1.4343, "step": 79500 }, { - "epoch": 0.19, - "learning_rate": 1.8730717818738574e-05, - "loss": 1.5495, + "epoch": 0.64, + "learning_rate": 1.936e-05, + "loss": 1.4358, "step": 80000 }, { - "epoch": 0.19, - "learning_rate": 1.872278480510569e-05, - "loss": 1.5419, + "epoch": 0.64, + "learning_rate": 1.9356000000000002e-05, + "loss": 1.4324, "step": 80500 }, { - "epoch": 0.19, - "learning_rate": 1.8714851791472806e-05, - "loss": 1.5529, + "epoch": 0.65, + "learning_rate": 1.9352e-05, + "loss": 1.4299, "step": 81000 }, { - "epoch": 0.19, - "learning_rate": 1.870691877783992e-05, - "loss": 1.5448, + "epoch": 0.65, + "learning_rate": 1.9348000000000002e-05, + "loss": 1.4306, "step": 81500 }, { - "epoch": 0.2, - "learning_rate": 1.8698985764207037e-05, - "loss": 1.5696, + "epoch": 0.66, + "learning_rate": 1.9344e-05, + "loss": 1.4315, "step": 82000 }, { - "epoch": 0.2, - "learning_rate": 1.8691052750574153e-05, - "loss": 1.5589, + "epoch": 0.66, + "learning_rate": 1.934e-05, + "loss": 1.4348, "step": 82500 }, { - "epoch": 0.2, - "learning_rate": 1.868311973694127e-05, - "loss": 1.5483, + "epoch": 0.66, + "learning_rate": 1.9336000000000003e-05, + "loss": 1.4284, "step": 83000 }, { - "epoch": 0.2, - "learning_rate": 1.8675186723308385e-05, - "loss": 1.5546, + "epoch": 0.67, + "learning_rate": 1.9332e-05, + "loss": 1.4277, "step": 83500 }, { - "epoch": 0.2, - "learning_rate": 1.86672537096755e-05, - "loss": 1.5535, + "epoch": 0.67, + "learning_rate": 1.9328000000000002e-05, + "loss": 1.4362, "step": 84000 }, { - "epoch": 0.2, - "learning_rate": 1.8659320696042617e-05, - "loss": 1.5528, + "epoch": 0.68, + "learning_rate": 1.9324e-05, + "loss": 1.4311, "step": 84500 }, { - "epoch": 0.2, - "learning_rate": 1.8651387682409733e-05, - "loss": 1.5478, + "epoch": 0.68, + "learning_rate": 1.932e-05, + "loss": 1.4264, "step": 85000 }, { - "epoch": 0.2, - "learning_rate": 1.8643454668776852e-05, - "loss": 1.5418, + "epoch": 0.68, + "learning_rate": 1.9316000000000003e-05, + "loss": 1.4241, "step": 85500 }, { - "epoch": 0.2, - "learning_rate": 1.8635521655143968e-05, - "loss": 1.5414, + "epoch": 0.69, + "learning_rate": 1.9312e-05, + "loss": 1.4169, "step": 86000 }, { - "epoch": 0.21, - "learning_rate": 1.8627588641511084e-05, - "loss": 1.542, + "epoch": 0.69, + "learning_rate": 1.9308000000000002e-05, + "loss": 1.4226, "step": 86500 }, { - "epoch": 0.21, - "learning_rate": 1.8619655627878196e-05, - "loss": 1.5685, + "epoch": 0.7, + "learning_rate": 1.9304e-05, + "loss": 1.4244, "step": 87000 }, { - "epoch": 0.21, - "learning_rate": 1.8611722614245312e-05, - "loss": 1.5616, + "epoch": 0.7, + "learning_rate": 1.93e-05, + "loss": 1.4194, "step": 87500 }, { - "epoch": 0.21, - "learning_rate": 1.860378960061243e-05, - "loss": 1.5236, + "epoch": 0.7, + "learning_rate": 1.9296000000000003e-05, + "loss": 1.4198, "step": 88000 }, { - "epoch": 0.21, - "learning_rate": 1.8595856586979547e-05, - "loss": 1.5412, + "epoch": 0.71, + "learning_rate": 1.9292e-05, + "loss": 1.4175, "step": 88500 }, { - "epoch": 0.21, - "learning_rate": 1.8587923573346663e-05, - "loss": 1.5395, + "epoch": 0.71, + "learning_rate": 1.9288000000000002e-05, + "loss": 1.4189, "step": 89000 }, { - "epoch": 0.21, - "learning_rate": 1.857999055971378e-05, - "loss": 1.537, + "epoch": 0.72, + "learning_rate": 1.9284e-05, + "loss": 1.4248, "step": 89500 }, { - "epoch": 0.21, - "learning_rate": 1.8572057546080895e-05, - "loss": 1.5405, + "epoch": 0.72, + "learning_rate": 1.9280000000000002e-05, + "loss": 1.4194, "step": 90000 }, { - "epoch": 0.22, - "learning_rate": 1.856412453244801e-05, - "loss": 1.5417, + "epoch": 0.72, + "learning_rate": 1.9276e-05, + "loss": 1.4118, "step": 90500 }, { - "epoch": 0.22, - "learning_rate": 1.8556191518815126e-05, - "loss": 1.5448, + "epoch": 0.73, + "learning_rate": 1.9272e-05, + "loss": 1.4141, "step": 91000 }, { - "epoch": 0.22, - "learning_rate": 1.8548258505182242e-05, - "loss": 1.5352, + "epoch": 0.73, + "learning_rate": 1.9268000000000003e-05, + "loss": 1.4151, "step": 91500 }, { - "epoch": 0.22, - "learning_rate": 1.8540325491549358e-05, - "loss": 1.531, + "epoch": 0.74, + "learning_rate": 1.9264e-05, + "loss": 1.4161, "step": 92000 }, { - "epoch": 0.22, - "learning_rate": 1.8532392477916474e-05, - "loss": 1.528, + "epoch": 0.74, + "learning_rate": 1.9260000000000002e-05, + "loss": 1.412, "step": 92500 }, { - "epoch": 0.22, - "learning_rate": 1.852445946428359e-05, - "loss": 1.5332, + "epoch": 0.74, + "learning_rate": 1.9256e-05, + "loss": 1.4086, "step": 93000 }, { - "epoch": 0.22, - "learning_rate": 1.8516526450650706e-05, - "loss": 1.523, + "epoch": 0.75, + "learning_rate": 1.9252e-05, + "loss": 1.4062, "step": 93500 }, { - "epoch": 0.22, - "learning_rate": 1.8508593437017825e-05, - "loss": 1.5142, + "epoch": 0.75, + "learning_rate": 1.9248000000000003e-05, + "loss": 1.4112, "step": 94000 }, { - "epoch": 0.22, - "learning_rate": 1.850066042338494e-05, - "loss": 1.5457, + "epoch": 0.76, + "learning_rate": 1.9244000000000004e-05, + "loss": 1.399, "step": 94500 }, { - "epoch": 0.23, - "learning_rate": 1.8492727409752057e-05, - "loss": 1.5238, + "epoch": 0.76, + "learning_rate": 1.9240000000000002e-05, + "loss": 1.4006, "step": 95000 }, { - "epoch": 0.23, - "learning_rate": 1.8484794396119172e-05, - "loss": 1.5247, + "epoch": 0.76, + "learning_rate": 1.9236e-05, + "loss": 1.4046, "step": 95500 }, { - "epoch": 0.23, - "learning_rate": 1.8476861382486285e-05, - "loss": 1.5395, + "epoch": 0.77, + "learning_rate": 1.9232e-05, + "loss": 1.4005, "step": 96000 }, { - "epoch": 0.23, - "learning_rate": 1.8468928368853404e-05, - "loss": 1.5273, + "epoch": 0.77, + "learning_rate": 1.9228000000000003e-05, + "loss": 1.4049, "step": 96500 }, { - "epoch": 0.23, - "learning_rate": 1.846099535522052e-05, - "loss": 1.5271, + "epoch": 0.78, + "learning_rate": 1.9224000000000004e-05, + "loss": 1.4011, "step": 97000 }, { - "epoch": 0.23, - "learning_rate": 1.8453062341587636e-05, - "loss": 1.5232, + "epoch": 0.78, + "learning_rate": 1.9220000000000002e-05, + "loss": 1.3991, "step": 97500 }, { - "epoch": 0.23, - "learning_rate": 1.8445129327954752e-05, - "loss": 1.5027, + "epoch": 0.78, + "learning_rate": 1.9216e-05, + "loss": 1.3992, "step": 98000 }, { - "epoch": 0.23, - "learning_rate": 1.8437196314321868e-05, - "loss": 1.5073, + "epoch": 0.79, + "learning_rate": 1.9212000000000002e-05, + "loss": 1.405, "step": 98500 }, { - "epoch": 0.24, - "learning_rate": 1.8429263300688984e-05, - "loss": 1.5218, + "epoch": 0.79, + "learning_rate": 1.9208000000000003e-05, + "loss": 1.4007, "step": 99000 }, { - "epoch": 0.24, - "learning_rate": 1.84213302870561e-05, - "loss": 1.5144, + "epoch": 0.8, + "learning_rate": 1.9204e-05, + "loss": 1.3973, "step": 99500 }, { - "epoch": 0.24, - "learning_rate": 1.8413397273423215e-05, - "loss": 1.5173, + "epoch": 0.8, + "learning_rate": 1.9200000000000003e-05, + "loss": 1.3959, "step": 100000 }, { - "epoch": 0.24, - "learning_rate": 1.840546425979033e-05, - "loss": 1.5381, + "epoch": 0.8, + "learning_rate": 1.9196e-05, + "loss": 1.4014, "step": 100500 }, { - "epoch": 0.24, - "learning_rate": 1.8397531246157447e-05, - "loss": 1.5068, + "epoch": 0.81, + "learning_rate": 1.9192000000000002e-05, + "loss": 1.3928, "step": 101000 }, { - "epoch": 0.24, - "learning_rate": 1.8389598232524563e-05, - "loss": 1.5072, + "epoch": 0.81, + "learning_rate": 1.9188000000000003e-05, + "loss": 1.3946, "step": 101500 }, { - "epoch": 0.24, - "learning_rate": 1.838166521889168e-05, - "loss": 1.5171, + "epoch": 0.82, + "learning_rate": 1.9184e-05, + "loss": 1.3904, "step": 102000 }, { - "epoch": 0.24, - "learning_rate": 1.8373732205258798e-05, - "loss": 1.5188, + "epoch": 0.82, + "learning_rate": 1.918e-05, + "loss": 1.3904, "step": 102500 }, { - "epoch": 0.25, - "learning_rate": 1.8365799191625914e-05, - "loss": 1.5285, + "epoch": 0.82, + "learning_rate": 1.9176e-05, + "loss": 1.3949, "step": 103000 }, { - "epoch": 0.25, - "learning_rate": 1.835786617799303e-05, - "loss": 1.5022, + "epoch": 0.83, + "learning_rate": 1.9172000000000002e-05, + "loss": 1.3948, "step": 103500 }, { - "epoch": 0.25, - "learning_rate": 1.8349933164360146e-05, - "loss": 1.5106, + "epoch": 0.83, + "learning_rate": 1.9168000000000004e-05, + "loss": 1.3859, "step": 104000 }, { - "epoch": 0.25, - "learning_rate": 1.834200015072726e-05, - "loss": 1.5098, + "epoch": 0.84, + "learning_rate": 1.9164e-05, + "loss": 1.3803, "step": 104500 }, { - "epoch": 0.25, - "learning_rate": 1.8334067137094377e-05, - "loss": 1.4992, + "epoch": 0.84, + "learning_rate": 1.916e-05, + "loss": 1.3885, "step": 105000 }, { - "epoch": 0.25, - "learning_rate": 1.8326134123461493e-05, - "loss": 1.4907, + "epoch": 0.84, + "learning_rate": 1.9156e-05, + "loss": 1.3773, "step": 105500 }, { - "epoch": 0.25, - "learning_rate": 1.831820110982861e-05, - "loss": 1.5323, + "epoch": 0.85, + "learning_rate": 1.9152000000000002e-05, + "loss": 1.3812, "step": 106000 }, { - "epoch": 0.25, - "learning_rate": 1.8310268096195725e-05, - "loss": 1.5156, + "epoch": 0.85, + "learning_rate": 1.9148000000000004e-05, + "loss": 1.3844, "step": 106500 }, { - "epoch": 0.25, - "learning_rate": 1.830233508256284e-05, - "loss": 1.5154, + "epoch": 0.86, + "learning_rate": 1.9144000000000002e-05, + "loss": 1.386, "step": 107000 }, { - "epoch": 0.26, - "learning_rate": 1.8294402068929957e-05, - "loss": 1.5203, + "epoch": 0.86, + "learning_rate": 1.914e-05, + "loss": 1.381, "step": 107500 }, { - "epoch": 0.26, - "learning_rate": 1.8286469055297072e-05, - "loss": 1.4912, + "epoch": 0.86, + "learning_rate": 1.9136e-05, + "loss": 1.388, "step": 108000 }, { - "epoch": 0.26, - "learning_rate": 1.827853604166419e-05, - "loss": 1.5087, + "epoch": 0.87, + "learning_rate": 1.9132000000000002e-05, + "loss": 1.3822, "step": 108500 }, { - "epoch": 0.26, - "learning_rate": 1.8270603028031308e-05, - "loss": 1.4957, + "epoch": 0.87, + "learning_rate": 1.9128e-05, + "loss": 1.3875, "step": 109000 }, { - "epoch": 0.26, - "learning_rate": 1.826267001439842e-05, - "loss": 1.5215, + "epoch": 0.88, + "learning_rate": 1.9124000000000002e-05, + "loss": 1.3869, "step": 109500 }, { - "epoch": 0.26, - "learning_rate": 1.8254737000765536e-05, - "loss": 1.5169, + "epoch": 0.88, + "learning_rate": 1.912e-05, + "loss": 1.3877, "step": 110000 }, { - "epoch": 0.26, - "learning_rate": 1.8246803987132652e-05, - "loss": 1.495, + "epoch": 0.88, + "learning_rate": 1.9116e-05, + "loss": 1.3814, "step": 110500 }, { - "epoch": 0.26, - "learning_rate": 1.8238870973499768e-05, - "loss": 1.4967, + "epoch": 0.89, + "learning_rate": 1.9112000000000003e-05, + "loss": 1.3852, "step": 111000 }, { - "epoch": 0.27, - "learning_rate": 1.8230937959866887e-05, - "loss": 1.5037, + "epoch": 0.89, + "learning_rate": 1.9108e-05, + "loss": 1.3797, "step": 111500 }, { - "epoch": 0.27, - "learning_rate": 1.8223004946234003e-05, - "loss": 1.4955, + "epoch": 0.9, + "learning_rate": 1.9104000000000002e-05, + "loss": 1.3692, "step": 112000 }, { - "epoch": 0.27, - "learning_rate": 1.821507193260112e-05, - "loss": 1.5085, + "epoch": 0.9, + "learning_rate": 1.91e-05, + "loss": 1.3773, "step": 112500 }, { - "epoch": 0.27, - "learning_rate": 1.8207138918968235e-05, - "loss": 1.503, + "epoch": 0.9, + "learning_rate": 1.9096e-05, + "loss": 1.3822, "step": 113000 }, { - "epoch": 0.27, - "learning_rate": 1.8199205905335347e-05, - "loss": 1.4851, + "epoch": 0.91, + "learning_rate": 1.9092000000000003e-05, + "loss": 1.3831, "step": 113500 }, { - "epoch": 0.27, - "learning_rate": 1.8191272891702466e-05, - "loss": 1.4809, + "epoch": 0.91, + "learning_rate": 1.9088e-05, + "loss": 1.3746, "step": 114000 }, { - "epoch": 0.27, - "learning_rate": 1.8183339878069582e-05, - "loss": 1.492, + "epoch": 0.92, + "learning_rate": 1.9084000000000002e-05, + "loss": 1.3697, "step": 114500 }, { - "epoch": 0.27, - "learning_rate": 1.8175406864436698e-05, - "loss": 1.4864, + "epoch": 0.92, + "learning_rate": 1.908e-05, + "loss": 1.3753, "step": 115000 }, { - "epoch": 0.27, - "learning_rate": 1.8167473850803814e-05, - "loss": 1.51, + "epoch": 0.92, + "learning_rate": 1.9076e-05, + "loss": 1.3764, "step": 115500 }, { - "epoch": 0.28, - "learning_rate": 1.815954083717093e-05, - "loss": 1.4747, + "epoch": 0.93, + "learning_rate": 1.9072000000000003e-05, + "loss": 1.3688, "step": 116000 }, { - "epoch": 0.28, - "learning_rate": 1.8151607823538046e-05, - "loss": 1.4872, + "epoch": 0.93, + "learning_rate": 1.9068e-05, + "loss": 1.3679, "step": 116500 }, { - "epoch": 0.28, - "learning_rate": 1.814367480990516e-05, - "loss": 1.5075, + "epoch": 0.94, + "learning_rate": 1.9064000000000002e-05, + "loss": 1.3676, "step": 117000 }, { - "epoch": 0.28, - "learning_rate": 1.813574179627228e-05, - "loss": 1.4848, + "epoch": 0.94, + "learning_rate": 1.906e-05, + "loss": 1.3733, "step": 117500 }, { - "epoch": 0.28, - "learning_rate": 1.8127808782639393e-05, - "loss": 1.5053, + "epoch": 0.94, + "learning_rate": 1.9056000000000002e-05, + "loss": 1.3679, "step": 118000 }, { - "epoch": 0.28, - "learning_rate": 1.811987576900651e-05, - "loss": 1.4734, + "epoch": 0.95, + "learning_rate": 1.9052000000000003e-05, + "loss": 1.3595, "step": 118500 }, { - "epoch": 0.28, - "learning_rate": 1.8111942755373625e-05, - "loss": 1.4905, + "epoch": 0.95, + "learning_rate": 1.9048e-05, + "loss": 1.3723, "step": 119000 }, { - "epoch": 0.28, - "learning_rate": 1.810400974174074e-05, - "loss": 1.5025, + "epoch": 0.96, + "learning_rate": 1.9044000000000003e-05, + "loss": 1.3695, "step": 119500 }, { - "epoch": 0.29, - "learning_rate": 1.809607672810786e-05, - "loss": 1.4873, + "epoch": 0.96, + "learning_rate": 1.904e-05, + "loss": 1.368, "step": 120000 }, { - "epoch": 0.29, - "learning_rate": 1.8088143714474976e-05, - "loss": 1.4957, + "epoch": 0.96, + "learning_rate": 1.9036000000000002e-05, + "loss": 1.3656, "step": 120500 }, { - "epoch": 0.29, - "learning_rate": 1.8080210700842092e-05, - "loss": 1.5084, + "epoch": 0.97, + "learning_rate": 1.9032e-05, + "loss": 1.364, "step": 121000 }, { - "epoch": 0.29, - "learning_rate": 1.8072277687209208e-05, - "loss": 1.4878, + "epoch": 0.97, + "learning_rate": 1.9028e-05, + "loss": 1.357, "step": 121500 }, { - "epoch": 0.29, - "learning_rate": 1.8064344673576323e-05, - "loss": 1.4849, + "epoch": 0.98, + "learning_rate": 1.9024000000000003e-05, + "loss": 1.3588, "step": 122000 }, { - "epoch": 0.29, - "learning_rate": 1.805641165994344e-05, - "loss": 1.4744, + "epoch": 0.98, + "learning_rate": 1.902e-05, + "loss": 1.361, "step": 122500 }, { - "epoch": 0.29, - "learning_rate": 1.8048478646310555e-05, - "loss": 1.4895, + "epoch": 0.98, + "learning_rate": 1.9016000000000002e-05, + "loss": 1.3631, "step": 123000 }, { - "epoch": 0.29, - "learning_rate": 1.804054563267767e-05, - "loss": 1.4884, + "epoch": 0.99, + "learning_rate": 1.9012e-05, + "loss": 1.3566, "step": 123500 }, { - "epoch": 0.3, - "learning_rate": 1.8032612619044787e-05, - "loss": 1.4721, + "epoch": 0.99, + "learning_rate": 1.9008e-05, + "loss": 1.3568, "step": 124000 }, { - "epoch": 0.3, - "learning_rate": 1.8024679605411903e-05, - "loss": 1.481, + "epoch": 1.0, + "learning_rate": 1.9004000000000003e-05, + "loss": 1.3615, "step": 124500 }, { - "epoch": 0.3, - "learning_rate": 1.801674659177902e-05, - "loss": 1.4799, + "epoch": 1.0, + "learning_rate": 1.9e-05, + "loss": 1.3577, "step": 125000 }, { - "epoch": 0.3, - "learning_rate": 1.8008813578146135e-05, - "loss": 1.4843, + "epoch": 1.0, + "eval_loss": 1.2994823455810547, + "eval_runtime": 80.8005, + "eval_samples_per_second": 166.002, + "eval_steps_per_second": 2.599, + "step": 125000 + }, + { + "epoch": 1.0, + "learning_rate": 1.8996000000000002e-05, + "loss": 1.3472, "step": 125500 }, { - "epoch": 0.3, - "learning_rate": 1.8000880564513254e-05, - "loss": 1.4828, + "epoch": 1.01, + "learning_rate": 1.8992e-05, + "loss": 1.3557, "step": 126000 }, { - "epoch": 0.3, - "learning_rate": 1.799294755088037e-05, - "loss": 1.4799, + "epoch": 1.01, + "learning_rate": 1.8988000000000002e-05, + "loss": 1.3538, "step": 126500 }, { - "epoch": 0.3, - "learning_rate": 1.7985014537247482e-05, - "loss": 1.4707, + "epoch": 1.02, + "learning_rate": 1.8984000000000003e-05, + "loss": 1.3584, "step": 127000 }, { - "epoch": 0.3, - "learning_rate": 1.7977081523614598e-05, - "loss": 1.4809, + "epoch": 1.02, + "learning_rate": 1.898e-05, + "loss": 1.3484, "step": 127500 }, { - "epoch": 0.3, - "learning_rate": 1.7969148509981714e-05, - "loss": 1.4778, + "epoch": 1.02, + "learning_rate": 1.8976000000000003e-05, + "loss": 1.351, "step": 128000 }, { - "epoch": 0.31, - "learning_rate": 1.7961215496348833e-05, - "loss": 1.4675, + "epoch": 1.03, + "learning_rate": 1.8972e-05, + "loss": 1.3485, "step": 128500 }, { - "epoch": 0.31, - "learning_rate": 1.795328248271595e-05, - "loss": 1.4854, + "epoch": 1.03, + "learning_rate": 1.8968000000000002e-05, + "loss": 1.3497, "step": 129000 }, { - "epoch": 0.31, - "learning_rate": 1.7945349469083065e-05, - "loss": 1.4501, + "epoch": 1.04, + "learning_rate": 1.8964000000000003e-05, + "loss": 1.3455, "step": 129500 }, { - "epoch": 0.31, - "learning_rate": 1.793741645545018e-05, - "loss": 1.4603, + "epoch": 1.04, + "learning_rate": 1.896e-05, + "loss": 1.3437, "step": 130000 }, { - "epoch": 0.31, - "learning_rate": 1.7929483441817297e-05, - "loss": 1.4748, + "epoch": 1.04, + "learning_rate": 1.8956e-05, + "loss": 1.3544, "step": 130500 }, { - "epoch": 0.31, - "learning_rate": 1.7921550428184412e-05, - "loss": 1.4578, + "epoch": 1.05, + "learning_rate": 1.8952e-05, + "loss": 1.3606, "step": 131000 }, { - "epoch": 0.31, - "learning_rate": 1.791361741455153e-05, - "loss": 1.4679, + "epoch": 1.05, + "learning_rate": 1.8948000000000002e-05, + "loss": 1.3514, "step": 131500 }, { - "epoch": 0.31, - "learning_rate": 1.7905684400918644e-05, - "loss": 1.4745, + "epoch": 1.06, + "learning_rate": 1.8944000000000004e-05, + "loss": 1.3377, "step": 132000 }, { - "epoch": 0.32, - "learning_rate": 1.789775138728576e-05, - "loss": 1.4779, + "epoch": 1.06, + "learning_rate": 1.894e-05, + "loss": 1.3421, "step": 132500 }, { - "epoch": 0.32, - "learning_rate": 1.7889818373652876e-05, - "loss": 1.4708, + "epoch": 1.06, + "learning_rate": 1.8936e-05, + "loss": 1.3454, "step": 133000 }, { - "epoch": 0.32, - "learning_rate": 1.7881885360019992e-05, - "loss": 1.4761, + "epoch": 1.07, + "learning_rate": 1.8932e-05, + "loss": 1.3386, "step": 133500 }, { - "epoch": 0.32, - "learning_rate": 1.7873952346387108e-05, - "loss": 1.4682, + "epoch": 1.07, + "learning_rate": 1.8928000000000002e-05, + "loss": 1.3444, "step": 134000 }, { - "epoch": 0.32, - "learning_rate": 1.7866019332754227e-05, - "loss": 1.47, + "epoch": 1.08, + "learning_rate": 1.8924000000000004e-05, + "loss": 1.3426, "step": 134500 }, { - "epoch": 0.32, - "learning_rate": 1.7858086319121343e-05, - "loss": 1.4355, + "epoch": 1.08, + "learning_rate": 1.8920000000000002e-05, + "loss": 1.344, "step": 135000 }, { - "epoch": 0.32, - "learning_rate": 1.785015330548846e-05, - "loss": 1.448, + "epoch": 1.08, + "learning_rate": 1.8916e-05, + "loss": 1.3443, "step": 135500 }, { - "epoch": 0.32, - "learning_rate": 1.784222029185557e-05, - "loss": 1.4724, + "epoch": 1.09, + "learning_rate": 1.8912e-05, + "loss": 1.3431, "step": 136000 }, { - "epoch": 0.32, - "learning_rate": 1.7834287278222687e-05, - "loss": 1.4684, + "epoch": 1.09, + "learning_rate": 1.8908000000000003e-05, + "loss": 1.341, "step": 136500 }, { - "epoch": 0.33, - "learning_rate": 1.7826354264589806e-05, - "loss": 1.4582, + "epoch": 1.1, + "learning_rate": 1.8904000000000004e-05, + "loss": 1.3528, "step": 137000 }, { - "epoch": 0.33, - "learning_rate": 1.7818421250956922e-05, - "loss": 1.4742, + "epoch": 1.1, + "learning_rate": 1.8900000000000002e-05, + "loss": 1.3424, "step": 137500 }, { - "epoch": 0.33, - "learning_rate": 1.7810488237324038e-05, - "loss": 1.4583, + "epoch": 1.1, + "learning_rate": 1.8896e-05, + "loss": 1.3506, "step": 138000 }, { - "epoch": 0.33, - "learning_rate": 1.7802555223691154e-05, - "loss": 1.465, + "epoch": 1.11, + "learning_rate": 1.8892e-05, + "loss": 1.3451, "step": 138500 }, { - "epoch": 0.33, - "learning_rate": 1.779462221005827e-05, - "loss": 1.4817, + "epoch": 1.11, + "learning_rate": 1.8888000000000003e-05, + "loss": 1.3451, "step": 139000 }, { - "epoch": 0.33, - "learning_rate": 1.7786689196425386e-05, - "loss": 1.4492, + "epoch": 1.12, + "learning_rate": 1.8884e-05, + "loss": 1.3366, "step": 139500 }, { - "epoch": 0.33, - "learning_rate": 1.77787561827925e-05, - "loss": 1.4506, + "epoch": 1.12, + "learning_rate": 1.8880000000000002e-05, + "loss": 1.34, "step": 140000 }, { - "epoch": 0.33, - "learning_rate": 1.7770823169159617e-05, - "loss": 1.4632, + "epoch": 1.12, + "learning_rate": 1.8876e-05, + "loss": 1.3465, "step": 140500 }, { - "epoch": 0.34, - "learning_rate": 1.7762890155526733e-05, - "loss": 1.4393, + "epoch": 1.13, + "learning_rate": 1.8872e-05, + "loss": 1.3462, "step": 141000 }, { - "epoch": 0.34, - "learning_rate": 1.775495714189385e-05, - "loss": 1.4658, + "epoch": 1.13, + "learning_rate": 1.8868000000000003e-05, + "loss": 1.3359, "step": 141500 }, { - "epoch": 0.34, - "learning_rate": 1.7747024128260965e-05, - "loss": 1.4482, + "epoch": 1.14, + "learning_rate": 1.8864e-05, + "loss": 1.3431, "step": 142000 }, { - "epoch": 0.34, - "learning_rate": 1.773909111462808e-05, - "loss": 1.4754, + "epoch": 1.14, + "learning_rate": 1.886e-05, + "loss": 1.337, "step": 142500 }, { - "epoch": 0.34, - "learning_rate": 1.77311581009952e-05, - "loss": 1.464, + "epoch": 1.14, + "learning_rate": 1.8856e-05, + "loss": 1.3355, "step": 143000 }, { - "epoch": 0.34, - "learning_rate": 1.7723225087362316e-05, - "loss": 1.4376, + "epoch": 1.15, + "learning_rate": 1.8852000000000002e-05, + "loss": 1.3374, "step": 143500 }, { - "epoch": 0.34, - "learning_rate": 1.7715292073729432e-05, - "loss": 1.4426, + "epoch": 1.15, + "learning_rate": 1.8848000000000003e-05, + "loss": 1.3362, "step": 144000 }, { - "epoch": 0.34, - "learning_rate": 1.7707359060096548e-05, - "loss": 1.453, + "epoch": 1.16, + "learning_rate": 1.8844e-05, + "loss": 1.334, "step": 144500 }, { - "epoch": 0.35, - "learning_rate": 1.769942604646366e-05, - "loss": 1.4593, + "epoch": 1.16, + "learning_rate": 1.884e-05, + "loss": 1.3311, "step": 145000 }, { - "epoch": 0.35, - "learning_rate": 1.769149303283078e-05, - "loss": 1.4472, + "epoch": 1.16, + "learning_rate": 1.8836e-05, + "loss": 1.3281, "step": 145500 }, { - "epoch": 0.35, - "learning_rate": 1.7683560019197895e-05, - "loss": 1.4566, + "epoch": 1.17, + "learning_rate": 1.8832000000000002e-05, + "loss": 1.338, "step": 146000 }, { - "epoch": 0.35, - "learning_rate": 1.767562700556501e-05, - "loss": 1.4435, + "epoch": 1.17, + "learning_rate": 1.8828000000000003e-05, + "loss": 1.3314, "step": 146500 }, { - "epoch": 0.35, - "learning_rate": 1.7667693991932127e-05, - "loss": 1.4489, + "epoch": 1.18, + "learning_rate": 1.8824e-05, + "loss": 1.3319, "step": 147000 }, { - "epoch": 0.35, - "learning_rate": 1.7659760978299243e-05, - "loss": 1.4362, + "epoch": 1.18, + "learning_rate": 1.882e-05, + "loss": 1.3316, "step": 147500 }, { - "epoch": 0.35, - "learning_rate": 1.765182796466636e-05, - "loss": 1.4713, + "epoch": 1.18, + "learning_rate": 1.8816e-05, + "loss": 1.329, "step": 148000 }, { - "epoch": 0.35, - "learning_rate": 1.7643894951033474e-05, - "loss": 1.4414, + "epoch": 1.19, + "learning_rate": 1.8812000000000002e-05, + "loss": 1.3322, "step": 148500 }, { - "epoch": 0.35, - "learning_rate": 1.763596193740059e-05, - "loss": 1.4431, + "epoch": 1.19, + "learning_rate": 1.8808e-05, + "loss": 1.3236, "step": 149000 }, { - "epoch": 0.36, - "learning_rate": 1.7628028923767706e-05, - "loss": 1.4597, + "epoch": 1.2, + "learning_rate": 1.8804e-05, + "loss": 1.3237, "step": 149500 }, { - "epoch": 0.36, - "learning_rate": 1.7620095910134822e-05, - "loss": 1.454, + "epoch": 1.2, + "learning_rate": 1.88e-05, + "loss": 1.3316, "step": 150000 }, { - "epoch": 0.36, - "learning_rate": 1.7612162896501938e-05, - "loss": 1.4594, + "epoch": 1.2, + "learning_rate": 1.8796e-05, + "loss": 1.3354, "step": 150500 }, { - "epoch": 0.36, - "learning_rate": 1.7604229882869054e-05, - "loss": 1.4496, + "epoch": 1.21, + "learning_rate": 1.8792000000000002e-05, + "loss": 1.3304, "step": 151000 }, { - "epoch": 0.36, - "learning_rate": 1.759629686923617e-05, - "loss": 1.4593, + "epoch": 1.21, + "learning_rate": 1.8788e-05, + "loss": 1.3295, "step": 151500 }, { - "epoch": 0.36, - "learning_rate": 1.758836385560329e-05, - "loss": 1.462, + "epoch": 1.22, + "learning_rate": 1.8784000000000002e-05, + "loss": 1.3275, "step": 152000 }, { - "epoch": 0.36, - "learning_rate": 1.7580430841970405e-05, - "loss": 1.4471, + "epoch": 1.22, + "learning_rate": 1.878e-05, + "loss": 1.3288, "step": 152500 }, { - "epoch": 0.36, - "learning_rate": 1.757249782833752e-05, - "loss": 1.4455, + "epoch": 1.22, + "learning_rate": 1.8776e-05, + "loss": 1.3341, "step": 153000 }, { - "epoch": 0.37, - "learning_rate": 1.7564564814704637e-05, - "loss": 1.4595, + "epoch": 1.23, + "learning_rate": 1.8772000000000003e-05, + "loss": 1.3203, "step": 153500 }, { - "epoch": 0.37, - "learning_rate": 1.755663180107175e-05, - "loss": 1.4522, + "epoch": 1.23, + "learning_rate": 1.8768e-05, + "loss": 1.3242, "step": 154000 }, { - "epoch": 0.37, - "learning_rate": 1.7548698787438868e-05, - "loss": 1.4458, + "epoch": 1.24, + "learning_rate": 1.8764000000000002e-05, + "loss": 1.3234, "step": 154500 }, { - "epoch": 0.37, - "learning_rate": 1.7540765773805984e-05, - "loss": 1.443, + "epoch": 1.24, + "learning_rate": 1.876e-05, + "loss": 1.3264, "step": 155000 }, { - "epoch": 0.37, - "learning_rate": 1.75328327601731e-05, - "loss": 1.4546, + "epoch": 1.24, + "learning_rate": 1.8756e-05, + "loss": 1.3247, "step": 155500 }, { - "epoch": 0.37, - "learning_rate": 1.7524899746540216e-05, - "loss": 1.466, + "epoch": 1.25, + "learning_rate": 1.8752000000000003e-05, + "loss": 1.3219, "step": 156000 }, { - "epoch": 0.37, - "learning_rate": 1.7516966732907332e-05, - "loss": 1.4545, + "epoch": 1.25, + "learning_rate": 1.8748e-05, + "loss": 1.3211, "step": 156500 }, { - "epoch": 0.37, - "learning_rate": 1.7509033719274448e-05, - "loss": 1.4423, + "epoch": 1.26, + "learning_rate": 1.8744000000000002e-05, + "loss": 1.3198, "step": 157000 }, { - "epoch": 0.37, - "learning_rate": 1.7501100705641563e-05, - "loss": 1.4351, + "epoch": 1.26, + "learning_rate": 1.8740000000000004e-05, + "loss": 1.3225, "step": 157500 }, { - "epoch": 0.38, - "learning_rate": 1.7493167692008683e-05, - "loss": 1.4404, + "epoch": 1.26, + "learning_rate": 1.8736e-05, + "loss": 1.3175, "step": 158000 }, { - "epoch": 0.38, - "learning_rate": 1.7485234678375795e-05, - "loss": 1.4345, + "epoch": 1.27, + "learning_rate": 1.8732000000000003e-05, + "loss": 1.3248, "step": 158500 }, { - "epoch": 0.38, - "learning_rate": 1.747730166474291e-05, - "loss": 1.4435, + "epoch": 1.27, + "learning_rate": 1.8728e-05, + "loss": 1.316, "step": 159000 }, { - "epoch": 0.38, - "learning_rate": 1.7469368651110027e-05, - "loss": 1.4248, + "epoch": 1.28, + "learning_rate": 1.8724000000000002e-05, + "loss": 1.3208, "step": 159500 }, { - "epoch": 0.38, - "learning_rate": 1.7461435637477143e-05, - "loss": 1.4276, + "epoch": 1.28, + "learning_rate": 1.8720000000000004e-05, + "loss": 1.3132, "step": 160000 }, { - "epoch": 0.38, - "learning_rate": 1.7453502623844262e-05, - "loss": 1.4206, + "epoch": 1.28, + "learning_rate": 1.8716000000000002e-05, + "loss": 1.3097, "step": 160500 }, { - "epoch": 0.38, - "learning_rate": 1.7445569610211378e-05, - "loss": 1.4408, + "epoch": 1.29, + "learning_rate": 1.8712e-05, + "loss": 1.3181, "step": 161000 }, { - "epoch": 0.38, - "learning_rate": 1.7437636596578494e-05, - "loss": 1.4219, + "epoch": 1.29, + "learning_rate": 1.8708e-05, + "loss": 1.3199, "step": 161500 }, { - "epoch": 0.39, - "learning_rate": 1.742970358294561e-05, - "loss": 1.4568, + "epoch": 1.3, + "learning_rate": 1.8704000000000003e-05, + "loss": 1.3191, "step": 162000 }, { - "epoch": 0.39, - "learning_rate": 1.7421770569312722e-05, - "loss": 1.4342, + "epoch": 1.3, + "learning_rate": 1.8700000000000004e-05, + "loss": 1.3189, "step": 162500 }, { - "epoch": 0.39, - "learning_rate": 1.741383755567984e-05, - "loss": 1.443, + "epoch": 1.3, + "learning_rate": 1.8696000000000002e-05, + "loss": 1.319, "step": 163000 }, { - "epoch": 0.39, - "learning_rate": 1.7405904542046957e-05, - "loss": 1.4288, + "epoch": 1.31, + "learning_rate": 1.8692e-05, + "loss": 1.3203, "step": 163500 }, { - "epoch": 0.39, - "learning_rate": 1.7397971528414073e-05, - "loss": 1.4399, + "epoch": 1.31, + "learning_rate": 1.8688e-05, + "loss": 1.3155, "step": 164000 }, { - "epoch": 0.39, - "learning_rate": 1.739003851478119e-05, - "loss": 1.4421, + "epoch": 1.32, + "learning_rate": 1.8684000000000003e-05, + "loss": 1.3148, "step": 164500 }, { - "epoch": 0.39, - "learning_rate": 1.7382105501148305e-05, - "loss": 1.4323, + "epoch": 1.32, + "learning_rate": 1.8680000000000004e-05, + "loss": 1.319, "step": 165000 }, { - "epoch": 0.39, - "learning_rate": 1.737417248751542e-05, - "loss": 1.434, + "epoch": 1.32, + "learning_rate": 1.8676000000000002e-05, + "loss": 1.3105, "step": 165500 }, { - "epoch": 0.4, - "learning_rate": 1.7366239473882537e-05, - "loss": 1.4261, + "epoch": 1.33, + "learning_rate": 1.8672e-05, + "loss": 1.3117, "step": 166000 }, { - "epoch": 0.4, - "learning_rate": 1.7358306460249656e-05, - "loss": 1.4221, + "epoch": 1.33, + "learning_rate": 1.8668e-05, + "loss": 1.3119, "step": 166500 }, { - "epoch": 0.4, - "learning_rate": 1.7350373446616768e-05, - "loss": 1.4418, + "epoch": 1.34, + "learning_rate": 1.8664000000000003e-05, + "loss": 1.3159, "step": 167000 }, { - "epoch": 0.4, - "learning_rate": 1.7342440432983884e-05, - "loss": 1.4297, + "epoch": 1.34, + "learning_rate": 1.866e-05, + "loss": 1.307, "step": 167500 }, { - "epoch": 0.4, - "learning_rate": 1.7334507419351e-05, - "loss": 1.419, + "epoch": 1.34, + "learning_rate": 1.8656000000000002e-05, + "loss": 1.3063, "step": 168000 }, { - "epoch": 0.4, - "learning_rate": 1.7326574405718116e-05, - "loss": 1.4328, + "epoch": 1.35, + "learning_rate": 1.8652e-05, + "loss": 1.3102, "step": 168500 }, { - "epoch": 0.4, - "learning_rate": 1.7318641392085235e-05, - "loss": 1.4227, + "epoch": 1.35, + "learning_rate": 1.8648000000000002e-05, + "loss": 1.3134, "step": 169000 }, { - "epoch": 0.4, - "learning_rate": 1.731070837845235e-05, - "loss": 1.432, + "epoch": 1.36, + "learning_rate": 1.8644000000000003e-05, + "loss": 1.314, "step": 169500 }, { - "epoch": 0.4, - "learning_rate": 1.7302775364819467e-05, - "loss": 1.4383, + "epoch": 1.36, + "learning_rate": 1.864e-05, + "loss": 1.3111, "step": 170000 }, { - "epoch": 0.41, - "learning_rate": 1.7294842351186583e-05, - "loss": 1.4363, + "epoch": 1.36, + "learning_rate": 1.8636e-05, + "loss": 1.3047, "step": 170500 }, { - "epoch": 0.41, - "learning_rate": 1.72869093375537e-05, - "loss": 1.438, + "epoch": 1.37, + "learning_rate": 1.8632e-05, + "loss": 1.3095, "step": 171000 }, { - "epoch": 0.41, - "learning_rate": 1.7278976323920814e-05, - "loss": 1.4065, + "epoch": 1.37, + "learning_rate": 1.8628000000000002e-05, + "loss": 1.3066, "step": 171500 }, { - "epoch": 0.41, - "learning_rate": 1.727104331028793e-05, - "loss": 1.4143, + "epoch": 1.38, + "learning_rate": 1.8624000000000003e-05, + "loss": 1.3071, "step": 172000 }, { - "epoch": 0.41, - "learning_rate": 1.7263110296655046e-05, - "loss": 1.4316, + "epoch": 1.38, + "learning_rate": 1.862e-05, + "loss": 1.2985, "step": 172500 }, { - "epoch": 0.41, - "learning_rate": 1.7255177283022162e-05, - "loss": 1.4189, + "epoch": 1.38, + "learning_rate": 1.8616e-05, + "loss": 1.3103, "step": 173000 }, { - "epoch": 0.41, - "learning_rate": 1.7247244269389278e-05, - "loss": 1.4218, + "epoch": 1.39, + "learning_rate": 1.8612e-05, + "loss": 1.3106, "step": 173500 }, { - "epoch": 0.41, - "learning_rate": 1.7239311255756394e-05, - "loss": 1.4213, + "epoch": 1.39, + "learning_rate": 1.8608000000000002e-05, + "loss": 1.3055, "step": 174000 }, { - "epoch": 0.42, - "learning_rate": 1.723137824212351e-05, - "loss": 1.4286, + "epoch": 1.4, + "learning_rate": 1.8604000000000003e-05, + "loss": 1.3074, "step": 174500 }, { - "epoch": 0.42, - "learning_rate": 1.722344522849063e-05, - "loss": 1.4231, + "epoch": 1.4, + "learning_rate": 1.86e-05, + "loss": 1.3028, "step": 175000 }, { - "epoch": 0.42, - "learning_rate": 1.7215512214857745e-05, - "loss": 1.4295, + "epoch": 1.4, + "learning_rate": 1.8596e-05, + "loss": 1.3027, "step": 175500 }, { - "epoch": 0.42, - "learning_rate": 1.7207579201224857e-05, - "loss": 1.4329, + "epoch": 1.41, + "learning_rate": 1.8592e-05, + "loss": 1.2988, "step": 176000 }, { - "epoch": 0.42, - "learning_rate": 1.7199646187591973e-05, - "loss": 1.413, + "epoch": 1.41, + "learning_rate": 1.8588000000000002e-05, + "loss": 1.303, "step": 176500 }, { - "epoch": 0.42, - "learning_rate": 1.719171317395909e-05, - "loss": 1.4222, + "epoch": 1.42, + "learning_rate": 1.8584000000000004e-05, + "loss": 1.3009, "step": 177000 }, { - "epoch": 0.42, - "learning_rate": 1.7183780160326208e-05, - "loss": 1.4188, + "epoch": 1.42, + "learning_rate": 1.858e-05, + "loss": 1.3023, "step": 177500 }, { - "epoch": 0.42, - "learning_rate": 1.7175847146693324e-05, - "loss": 1.4304, + "epoch": 1.42, + "learning_rate": 1.8576e-05, + "loss": 1.3051, "step": 178000 }, { - "epoch": 0.42, - "learning_rate": 1.716791413306044e-05, - "loss": 1.4011, + "epoch": 1.43, + "learning_rate": 1.8572e-05, + "loss": 1.3032, "step": 178500 }, { - "epoch": 0.43, - "learning_rate": 1.7159981119427556e-05, - "loss": 1.4098, + "epoch": 1.43, + "learning_rate": 1.8568000000000002e-05, + "loss": 1.3051, "step": 179000 }, { - "epoch": 0.43, - "learning_rate": 1.715204810579467e-05, - "loss": 1.4229, + "epoch": 1.44, + "learning_rate": 1.8564e-05, + "loss": 1.303, "step": 179500 }, { - "epoch": 0.43, - "learning_rate": 1.7144115092161788e-05, - "loss": 1.4314, + "epoch": 1.44, + "learning_rate": 1.8560000000000002e-05, + "loss": 1.2975, "step": 180000 }, { - "epoch": 0.43, - "learning_rate": 1.7136182078528903e-05, - "loss": 1.437, + "epoch": 1.44, + "learning_rate": 1.8556e-05, + "loss": 1.2985, "step": 180500 }, { - "epoch": 0.43, - "learning_rate": 1.712824906489602e-05, - "loss": 1.4239, + "epoch": 1.45, + "learning_rate": 1.8552e-05, + "loss": 1.3044, "step": 181000 }, { - "epoch": 0.43, - "learning_rate": 1.7120316051263135e-05, - "loss": 1.4092, + "epoch": 1.45, + "learning_rate": 1.8548000000000003e-05, + "loss": 1.2983, "step": 181500 }, { - "epoch": 0.43, - "learning_rate": 1.711238303763025e-05, - "loss": 1.4114, + "epoch": 1.46, + "learning_rate": 1.8544e-05, + "loss": 1.2999, "step": 182000 }, { - "epoch": 0.43, - "learning_rate": 1.7104450023997367e-05, - "loss": 1.4131, + "epoch": 1.46, + "learning_rate": 1.8540000000000002e-05, + "loss": 1.3027, "step": 182500 }, { - "epoch": 0.44, - "learning_rate": 1.7096517010364483e-05, - "loss": 1.4138, + "epoch": 1.46, + "learning_rate": 1.8536e-05, + "loss": 1.3021, "step": 183000 }, { - "epoch": 0.44, - "learning_rate": 1.7088583996731602e-05, - "loss": 1.4207, + "epoch": 1.47, + "learning_rate": 1.8532e-05, + "loss": 1.2962, "step": 183500 }, { - "epoch": 0.44, - "learning_rate": 1.7080650983098718e-05, - "loss": 1.4036, + "epoch": 1.47, + "learning_rate": 1.8528000000000003e-05, + "loss": 1.2929, "step": 184000 }, { - "epoch": 0.44, - "learning_rate": 1.7072717969465834e-05, - "loss": 1.4177, + "epoch": 1.48, + "learning_rate": 1.8524e-05, + "loss": 1.3006, "step": 184500 }, { - "epoch": 0.44, - "learning_rate": 1.7064784955832946e-05, - "loss": 1.4006, + "epoch": 1.48, + "learning_rate": 1.8520000000000002e-05, + "loss": 1.2888, "step": 185000 }, { - "epoch": 0.44, - "learning_rate": 1.7056851942200062e-05, - "loss": 1.4258, + "epoch": 1.48, + "learning_rate": 1.8516e-05, + "loss": 1.2982, "step": 185500 }, { - "epoch": 0.44, - "learning_rate": 1.704891892856718e-05, - "loss": 1.4067, + "epoch": 1.49, + "learning_rate": 1.8512e-05, + "loss": 1.2911, "step": 186000 }, { - "epoch": 0.44, - "learning_rate": 1.7040985914934297e-05, - "loss": 1.4066, + "epoch": 1.49, + "learning_rate": 1.8508000000000003e-05, + "loss": 1.2942, "step": 186500 }, { - "epoch": 0.45, - "learning_rate": 1.7033052901301413e-05, - "loss": 1.4097, + "epoch": 1.5, + "learning_rate": 1.8504e-05, + "loss": 1.3, "step": 187000 }, { - "epoch": 0.45, - "learning_rate": 1.702511988766853e-05, - "loss": 1.3993, + "epoch": 1.5, + "learning_rate": 1.8500000000000002e-05, + "loss": 1.2996, "step": 187500 }, { - "epoch": 0.45, - "learning_rate": 1.7017186874035645e-05, - "loss": 1.3967, + "epoch": 1.5, + "learning_rate": 1.8496e-05, + "loss": 1.2964, "step": 188000 }, { - "epoch": 0.45, - "learning_rate": 1.700925386040276e-05, - "loss": 1.3943, + "epoch": 1.51, + "learning_rate": 1.8492000000000002e-05, + "loss": 1.2862, "step": 188500 }, { - "epoch": 0.45, - "learning_rate": 1.7001320846769876e-05, - "loss": 1.4084, + "epoch": 1.51, + "learning_rate": 1.8488e-05, + "loss": 1.2911, "step": 189000 }, { - "epoch": 0.45, - "learning_rate": 1.6993387833136992e-05, - "loss": 1.4102, + "epoch": 1.52, + "learning_rate": 1.8484e-05, + "loss": 1.295, "step": 189500 }, { - "epoch": 0.45, - "learning_rate": 1.6985454819504108e-05, - "loss": 1.3981, + "epoch": 1.52, + "learning_rate": 1.8480000000000003e-05, + "loss": 1.2934, "step": 190000 }, { - "epoch": 0.45, - "learning_rate": 1.6977521805871224e-05, - "loss": 1.4251, + "epoch": 1.52, + "learning_rate": 1.8476e-05, + "loss": 1.2915, "step": 190500 }, { - "epoch": 0.45, - "learning_rate": 1.696958879223834e-05, - "loss": 1.4113, + "epoch": 1.53, + "learning_rate": 1.8472000000000002e-05, + "loss": 1.289, "step": 191000 }, { - "epoch": 0.46, - "learning_rate": 1.6961655778605456e-05, - "loss": 1.3979, + "epoch": 1.53, + "learning_rate": 1.8468e-05, + "loss": 1.2858, "step": 191500 }, { - "epoch": 0.46, - "learning_rate": 1.695372276497257e-05, - "loss": 1.4171, + "epoch": 1.54, + "learning_rate": 1.8464e-05, + "loss": 1.298, "step": 192000 }, { - "epoch": 0.46, - "learning_rate": 1.694578975133969e-05, - "loss": 1.4015, + "epoch": 1.54, + "learning_rate": 1.8460000000000003e-05, + "loss": 1.2918, "step": 192500 }, { - "epoch": 0.46, - "learning_rate": 1.6937856737706807e-05, - "loss": 1.4109, + "epoch": 1.54, + "learning_rate": 1.8456e-05, + "loss": 1.2922, "step": 193000 }, { - "epoch": 0.46, - "learning_rate": 1.6929923724073923e-05, - "loss": 1.4151, + "epoch": 1.55, + "learning_rate": 1.8452000000000002e-05, + "loss": 1.2901, "step": 193500 }, { - "epoch": 0.46, - "learning_rate": 1.6921990710441035e-05, - "loss": 1.4171, + "epoch": 1.55, + "learning_rate": 1.8448e-05, + "loss": 1.2854, "step": 194000 }, { - "epoch": 0.46, - "learning_rate": 1.691405769680815e-05, - "loss": 1.4033, + "epoch": 1.56, + "learning_rate": 1.8444e-05, + "loss": 1.2853, "step": 194500 }, { - "epoch": 0.46, - "learning_rate": 1.690612468317527e-05, - "loss": 1.4259, + "epoch": 1.56, + "learning_rate": 1.8440000000000003e-05, + "loss": 1.2896, "step": 195000 }, { - "epoch": 0.47, - "learning_rate": 1.6898191669542386e-05, - "loss": 1.3964, + "epoch": 1.56, + "learning_rate": 1.8436e-05, + "loss": 1.2873, "step": 195500 }, { - "epoch": 0.47, - "learning_rate": 1.6890258655909502e-05, - "loss": 1.4096, + "epoch": 1.57, + "learning_rate": 1.8432000000000002e-05, + "loss": 1.2856, "step": 196000 }, { - "epoch": 0.47, - "learning_rate": 1.6882325642276618e-05, - "loss": 1.3983, + "epoch": 1.57, + "learning_rate": 1.8428e-05, + "loss": 1.2851, "step": 196500 }, { - "epoch": 0.47, - "learning_rate": 1.6874392628643734e-05, - "loss": 1.396, + "epoch": 1.58, + "learning_rate": 1.8424000000000002e-05, + "loss": 1.2843, "step": 197000 }, { - "epoch": 0.47, - "learning_rate": 1.686645961501085e-05, - "loss": 1.3992, + "epoch": 1.58, + "learning_rate": 1.8420000000000003e-05, + "loss": 1.2896, "step": 197500 }, { - "epoch": 0.47, - "learning_rate": 1.6858526601377965e-05, - "loss": 1.3996, + "epoch": 1.58, + "learning_rate": 1.8416e-05, + "loss": 1.2893, "step": 198000 }, { - "epoch": 0.47, - "learning_rate": 1.685059358774508e-05, - "loss": 1.3947, + "epoch": 1.59, + "learning_rate": 1.8412000000000003e-05, + "loss": 1.2837, "step": 198500 }, { - "epoch": 0.47, - "learning_rate": 1.6842660574112197e-05, - "loss": 1.4166, + "epoch": 1.59, + "learning_rate": 1.8408e-05, + "loss": 1.2922, "step": 199000 }, { - "epoch": 0.47, - "learning_rate": 1.6834727560479313e-05, - "loss": 1.403, + "epoch": 1.6, + "learning_rate": 1.8404000000000002e-05, + "loss": 1.289, "step": 199500 }, { - "epoch": 0.48, - "learning_rate": 1.682679454684643e-05, - "loss": 1.4067, + "epoch": 1.6, + "learning_rate": 1.8400000000000003e-05, + "loss": 1.2923, "step": 200000 }, { - "epoch": 0.48, - "learning_rate": 1.6818861533213545e-05, - "loss": 1.3818, + "epoch": 1.6, + "learning_rate": 1.8396e-05, + "loss": 1.2859, "step": 200500 }, { - "epoch": 0.48, - "learning_rate": 1.6810928519580664e-05, - "loss": 1.425, + "epoch": 1.61, + "learning_rate": 1.8392e-05, + "loss": 1.2819, "step": 201000 }, { - "epoch": 0.48, - "learning_rate": 1.680299550594778e-05, - "loss": 1.4004, + "epoch": 1.61, + "learning_rate": 1.8388e-05, + "loss": 1.2864, "step": 201500 }, { - "epoch": 0.48, - "learning_rate": 1.6795062492314896e-05, - "loss": 1.3936, + "epoch": 1.62, + "learning_rate": 1.8384000000000002e-05, + "loss": 1.2828, "step": 202000 }, { - "epoch": 0.48, - "learning_rate": 1.678712947868201e-05, - "loss": 1.3815, + "epoch": 1.62, + "learning_rate": 1.8380000000000004e-05, + "loss": 1.2883, "step": 202500 }, { - "epoch": 0.48, - "learning_rate": 1.6779196465049124e-05, - "loss": 1.3987, + "epoch": 1.62, + "learning_rate": 1.8376e-05, + "loss": 1.2833, "step": 203000 }, { - "epoch": 0.48, - "learning_rate": 1.6771263451416243e-05, - "loss": 1.4018, + "epoch": 1.63, + "learning_rate": 1.8372e-05, + "loss": 1.284, "step": 203500 }, { - "epoch": 0.49, - "learning_rate": 1.676333043778336e-05, - "loss": 1.3852, + "epoch": 1.63, + "learning_rate": 1.8368e-05, + "loss": 1.2811, "step": 204000 }, { - "epoch": 0.49, - "learning_rate": 1.6755397424150475e-05, - "loss": 1.4024, + "epoch": 1.64, + "learning_rate": 1.8364000000000002e-05, + "loss": 1.2818, "step": 204500 }, { - "epoch": 0.49, - "learning_rate": 1.674746441051759e-05, - "loss": 1.3827, + "epoch": 1.64, + "learning_rate": 1.8360000000000004e-05, + "loss": 1.279, "step": 205000 }, { - "epoch": 0.49, - "learning_rate": 1.6739531396884707e-05, - "loss": 1.4046, + "epoch": 1.64, + "learning_rate": 1.8356000000000002e-05, + "loss": 1.2823, "step": 205500 }, { - "epoch": 0.49, - "learning_rate": 1.6731598383251823e-05, - "loss": 1.3966, + "epoch": 1.65, + "learning_rate": 1.8352e-05, + "loss": 1.2842, "step": 206000 }, { - "epoch": 0.49, - "learning_rate": 1.672366536961894e-05, - "loss": 1.3957, + "epoch": 1.65, + "learning_rate": 1.8348e-05, + "loss": 1.2854, "step": 206500 }, { - "epoch": 0.49, - "learning_rate": 1.6715732355986054e-05, - "loss": 1.4029, + "epoch": 1.66, + "learning_rate": 1.8344000000000003e-05, + "loss": 1.2787, "step": 207000 }, { - "epoch": 0.49, - "learning_rate": 1.670779934235317e-05, - "loss": 1.4036, + "epoch": 1.66, + "learning_rate": 1.834e-05, + "loss": 1.285, "step": 207500 }, { - "epoch": 0.5, - "learning_rate": 1.6699866328720286e-05, - "loss": 1.3773, + "epoch": 1.66, + "learning_rate": 1.8336000000000002e-05, + "loss": 1.2851, "step": 208000 }, { - "epoch": 0.5, - "learning_rate": 1.6691933315087402e-05, - "loss": 1.4042, + "epoch": 1.67, + "learning_rate": 1.8332e-05, + "loss": 1.277, "step": 208500 }, { - "epoch": 0.5, - "learning_rate": 1.6684000301454518e-05, - "loss": 1.395, + "epoch": 1.67, + "learning_rate": 1.8328e-05, + "loss": 1.2828, "step": 209000 }, { - "epoch": 0.5, - "learning_rate": 1.6676067287821637e-05, - "loss": 1.3992, + "epoch": 1.68, + "learning_rate": 1.8324000000000003e-05, + "loss": 1.2741, "step": 209500 }, { - "epoch": 0.5, - "learning_rate": 1.6668134274188753e-05, - "loss": 1.404, + "epoch": 1.68, + "learning_rate": 1.832e-05, + "loss": 1.2744, "step": 210000 }, { - "epoch": 0.5, - "learning_rate": 1.666020126055587e-05, - "loss": 1.4033, + "epoch": 1.68, + "learning_rate": 1.8316e-05, + "loss": 1.2743, "step": 210500 }, { - "epoch": 0.5, - "learning_rate": 1.6652268246922985e-05, - "loss": 1.3797, + "epoch": 1.69, + "learning_rate": 1.8312e-05, + "loss": 1.2722, "step": 211000 }, { - "epoch": 0.5, - "learning_rate": 1.6644335233290097e-05, - "loss": 1.3864, + "epoch": 1.69, + "learning_rate": 1.8308e-05, + "loss": 1.2759, "step": 211500 }, { - "epoch": 0.5, - "learning_rate": 1.6636402219657216e-05, - "loss": 1.4064, + "epoch": 1.7, + "learning_rate": 1.8304000000000003e-05, + "loss": 1.2757, "step": 212000 }, { - "epoch": 0.51, - "learning_rate": 1.6628469206024332e-05, - "loss": 1.3973, + "epoch": 1.7, + "learning_rate": 1.83e-05, + "loss": 1.2806, "step": 212500 }, { - "epoch": 0.51, - "learning_rate": 1.6620536192391448e-05, - "loss": 1.3933, + "epoch": 1.7, + "learning_rate": 1.8296e-05, + "loss": 1.276, "step": 213000 }, { - "epoch": 0.51, - "learning_rate": 1.6612603178758564e-05, - "loss": 1.3928, + "epoch": 1.71, + "learning_rate": 1.8292e-05, + "loss": 1.2771, "step": 213500 }, { - "epoch": 0.51, - "learning_rate": 1.660467016512568e-05, - "loss": 1.4047, + "epoch": 1.71, + "learning_rate": 1.8288000000000002e-05, + "loss": 1.2759, "step": 214000 }, { - "epoch": 0.51, - "learning_rate": 1.6596737151492796e-05, - "loss": 1.4135, + "epoch": 1.72, + "learning_rate": 1.8284000000000003e-05, + "loss": 1.2676, "step": 214500 }, { - "epoch": 0.51, - "learning_rate": 1.658880413785991e-05, - "loss": 1.3821, + "epoch": 1.72, + "learning_rate": 1.828e-05, + "loss": 1.2758, "step": 215000 }, { - "epoch": 0.51, - "learning_rate": 1.658087112422703e-05, - "loss": 1.3826, + "epoch": 1.72, + "learning_rate": 1.8276e-05, + "loss": 1.2667, "step": 215500 }, { - "epoch": 0.51, - "learning_rate": 1.6572938110594143e-05, - "loss": 1.3868, + "epoch": 1.73, + "learning_rate": 1.8272e-05, + "loss": 1.2738, "step": 216000 }, { - "epoch": 0.52, - "learning_rate": 1.656500509696126e-05, - "loss": 1.407, + "epoch": 1.73, + "learning_rate": 1.8268000000000002e-05, + "loss": 1.2748, "step": 216500 }, { - "epoch": 0.52, - "learning_rate": 1.6557072083328375e-05, - "loss": 1.3846, + "epoch": 1.74, + "learning_rate": 1.8264000000000003e-05, + "loss": 1.2762, "step": 217000 }, { - "epoch": 0.52, - "learning_rate": 1.654913906969549e-05, - "loss": 1.3727, + "epoch": 1.74, + "learning_rate": 1.826e-05, + "loss": 1.2707, "step": 217500 }, { - "epoch": 0.52, - "learning_rate": 1.654120605606261e-05, - "loss": 1.3815, + "epoch": 1.74, + "learning_rate": 1.8256e-05, + "loss": 1.2667, "step": 218000 }, { - "epoch": 0.52, - "learning_rate": 1.6533273042429726e-05, - "loss": 1.3885, + "epoch": 1.75, + "learning_rate": 1.8252e-05, + "loss": 1.2708, "step": 218500 }, { - "epoch": 0.52, - "learning_rate": 1.6525340028796842e-05, - "loss": 1.3858, + "epoch": 1.75, + "learning_rate": 1.8248000000000002e-05, + "loss": 1.2674, "step": 219000 }, { - "epoch": 0.52, - "learning_rate": 1.6517407015163958e-05, - "loss": 1.3936, + "epoch": 1.76, + "learning_rate": 1.8244e-05, + "loss": 1.2753, "step": 219500 }, { - "epoch": 0.52, - "learning_rate": 1.6509474001531074e-05, - "loss": 1.3923, + "epoch": 1.76, + "learning_rate": 1.824e-05, + "loss": 1.2724, "step": 220000 }, { - "epoch": 0.52, - "learning_rate": 1.650154098789819e-05, - "loss": 1.3787, + "epoch": 1.76, + "learning_rate": 1.8236000000000003e-05, + "loss": 1.274, "step": 220500 }, { - "epoch": 0.53, - "learning_rate": 1.6493607974265305e-05, - "loss": 1.3872, + "epoch": 1.77, + "learning_rate": 1.8232e-05, + "loss": 1.2764, "step": 221000 }, { - "epoch": 0.53, - "learning_rate": 1.648567496063242e-05, - "loss": 1.3694, + "epoch": 1.77, + "learning_rate": 1.8228000000000002e-05, + "loss": 1.2681, "step": 221500 }, { - "epoch": 0.53, - "learning_rate": 1.6477741946999537e-05, - "loss": 1.389, + "epoch": 1.78, + "learning_rate": 1.8224e-05, + "loss": 1.2742, "step": 222000 }, { - "epoch": 0.53, - "learning_rate": 1.6469808933366653e-05, - "loss": 1.3849, + "epoch": 1.78, + "learning_rate": 1.8220000000000002e-05, + "loss": 1.2712, "step": 222500 }, { - "epoch": 0.53, - "learning_rate": 1.646187591973377e-05, - "loss": 1.385, + "epoch": 1.78, + "learning_rate": 1.8216000000000003e-05, + "loss": 1.2666, "step": 223000 }, { - "epoch": 0.53, - "learning_rate": 1.6453942906100885e-05, - "loss": 1.3686, + "epoch": 1.79, + "learning_rate": 1.8212e-05, + "loss": 1.2695, "step": 223500 }, { - "epoch": 0.53, - "learning_rate": 1.6446009892468004e-05, - "loss": 1.3916, + "epoch": 1.79, + "learning_rate": 1.8208000000000003e-05, + "loss": 1.2698, "step": 224000 }, { - "epoch": 0.53, - "learning_rate": 1.643807687883512e-05, - "loss": 1.375, + "epoch": 1.8, + "learning_rate": 1.8204e-05, + "loss": 1.2734, "step": 224500 }, { - "epoch": 0.54, - "learning_rate": 1.6430143865202232e-05, - "loss": 1.3811, + "epoch": 1.8, + "learning_rate": 1.8200000000000002e-05, + "loss": 1.2682, "step": 225000 }, { - "epoch": 0.54, - "learning_rate": 1.6422210851569348e-05, - "loss": 1.3755, + "epoch": 1.8, + "learning_rate": 1.8196000000000003e-05, + "loss": 1.2715, "step": 225500 }, { - "epoch": 0.54, - "learning_rate": 1.6414277837936464e-05, - "loss": 1.381, + "epoch": 1.81, + "learning_rate": 1.8192e-05, + "loss": 1.2699, "step": 226000 }, { - "epoch": 0.54, - "learning_rate": 1.6406344824303583e-05, - "loss": 1.3781, + "epoch": 1.81, + "learning_rate": 1.8188000000000003e-05, + "loss": 1.2718, "step": 226500 }, { - "epoch": 0.54, - "learning_rate": 1.63984118106707e-05, - "loss": 1.3714, + "epoch": 1.82, + "learning_rate": 1.8184e-05, + "loss": 1.2621, "step": 227000 }, { - "epoch": 0.54, - "learning_rate": 1.6390478797037815e-05, - "loss": 1.3936, + "epoch": 1.82, + "learning_rate": 1.8180000000000002e-05, + "loss": 1.2642, "step": 227500 }, { - "epoch": 0.54, - "learning_rate": 1.638254578340493e-05, - "loss": 1.3711, + "epoch": 1.82, + "learning_rate": 1.8176000000000004e-05, + "loss": 1.2685, "step": 228000 }, { - "epoch": 0.54, - "learning_rate": 1.6374612769772047e-05, - "loss": 1.3872, + "epoch": 1.83, + "learning_rate": 1.8172e-05, + "loss": 1.2645, "step": 228500 }, { - "epoch": 0.54, - "learning_rate": 1.6366679756139163e-05, - "loss": 1.3851, + "epoch": 1.83, + "learning_rate": 1.8168e-05, + "loss": 1.2697, "step": 229000 }, { - "epoch": 0.55, - "learning_rate": 1.635874674250628e-05, - "loss": 1.3924, + "epoch": 1.84, + "learning_rate": 1.8164e-05, + "loss": 1.2671, "step": 229500 }, { - "epoch": 0.55, - "learning_rate": 1.6350813728873394e-05, - "loss": 1.381, + "epoch": 1.84, + "learning_rate": 1.8160000000000002e-05, + "loss": 1.2652, "step": 230000 }, { - "epoch": 0.55, - "learning_rate": 1.634288071524051e-05, - "loss": 1.3746, + "epoch": 1.84, + "learning_rate": 1.8156000000000004e-05, + "loss": 1.2592, "step": 230500 }, { - "epoch": 0.55, - "learning_rate": 1.6334947701607626e-05, - "loss": 1.3714, + "epoch": 1.85, + "learning_rate": 1.8152000000000002e-05, + "loss": 1.2699, "step": 231000 }, { - "epoch": 0.55, - "learning_rate": 1.6327014687974742e-05, - "loss": 1.386, + "epoch": 1.85, + "learning_rate": 1.8148e-05, + "loss": 1.2673, "step": 231500 }, { - "epoch": 0.55, - "learning_rate": 1.6319081674341858e-05, - "loss": 1.3823, + "epoch": 1.86, + "learning_rate": 1.8144e-05, + "loss": 1.27, "step": 232000 }, { - "epoch": 0.55, - "learning_rate": 1.6311148660708974e-05, - "loss": 1.3791, + "epoch": 1.86, + "learning_rate": 1.8140000000000003e-05, + "loss": 1.2673, "step": 232500 }, { - "epoch": 0.55, - "learning_rate": 1.6303215647076093e-05, - "loss": 1.3683, + "epoch": 1.86, + "learning_rate": 1.8136000000000004e-05, + "loss": 1.266, "step": 233000 }, { - "epoch": 0.56, - "learning_rate": 1.629528263344321e-05, - "loss": 1.3786, + "epoch": 1.87, + "learning_rate": 1.8132000000000002e-05, + "loss": 1.2697, "step": 233500 }, { - "epoch": 0.56, - "learning_rate": 1.628734961981032e-05, - "loss": 1.3739, + "epoch": 1.87, + "learning_rate": 1.8128e-05, + "loss": 1.2592, "step": 234000 }, { - "epoch": 0.56, - "learning_rate": 1.6279416606177437e-05, - "loss": 1.3765, + "epoch": 1.88, + "learning_rate": 1.8124e-05, + "loss": 1.2657, "step": 234500 }, { - "epoch": 0.56, - "learning_rate": 1.6271483592544553e-05, - "loss": 1.3808, + "epoch": 1.88, + "learning_rate": 1.8120000000000003e-05, + "loss": 1.2608, "step": 235000 }, { - "epoch": 0.56, - "learning_rate": 1.6263550578911672e-05, - "loss": 1.3843, + "epoch": 1.88, + "learning_rate": 1.8116000000000004e-05, + "loss": 1.2617, "step": 235500 }, { - "epoch": 0.56, - "learning_rate": 1.6255617565278788e-05, - "loss": 1.3607, + "epoch": 1.89, + "learning_rate": 1.8112000000000002e-05, + "loss": 1.2647, "step": 236000 }, { - "epoch": 0.56, - "learning_rate": 1.6247684551645904e-05, - "loss": 1.3586, + "epoch": 1.89, + "learning_rate": 1.8108e-05, + "loss": 1.2623, "step": 236500 }, { - "epoch": 0.56, - "learning_rate": 1.623975153801302e-05, - "loss": 1.4029, + "epoch": 1.9, + "learning_rate": 1.8104e-05, + "loss": 1.2593, "step": 237000 }, { - "epoch": 0.57, - "learning_rate": 1.6231818524380136e-05, - "loss": 1.3793, + "epoch": 1.9, + "learning_rate": 1.8100000000000003e-05, + "loss": 1.258, "step": 237500 }, { - "epoch": 0.57, - "learning_rate": 1.622388551074725e-05, - "loss": 1.3644, + "epoch": 1.9, + "learning_rate": 1.8096e-05, + "loss": 1.2627, "step": 238000 }, { - "epoch": 0.57, - "learning_rate": 1.6215952497114367e-05, - "loss": 1.3749, + "epoch": 1.91, + "learning_rate": 1.8092000000000002e-05, + "loss": 1.2666, "step": 238500 }, { - "epoch": 0.57, - "learning_rate": 1.6208019483481483e-05, - "loss": 1.3771, + "epoch": 1.91, + "learning_rate": 1.8088e-05, + "loss": 1.2642, "step": 239000 }, { - "epoch": 0.57, - "learning_rate": 1.62000864698486e-05, - "loss": 1.3759, + "epoch": 1.92, + "learning_rate": 1.8084e-05, + "loss": 1.2583, "step": 239500 }, { - "epoch": 0.57, - "learning_rate": 1.6192153456215715e-05, - "loss": 1.3703, + "epoch": 1.92, + "learning_rate": 1.8080000000000003e-05, + "loss": 1.2588, "step": 240000 }, { - "epoch": 0.57, - "learning_rate": 1.618422044258283e-05, - "loss": 1.3821, + "epoch": 1.92, + "learning_rate": 1.8076e-05, + "loss": 1.2676, "step": 240500 }, { - "epoch": 0.57, - "learning_rate": 1.6176287428949947e-05, - "loss": 1.363, + "epoch": 1.93, + "learning_rate": 1.8072e-05, + "loss": 1.2564, "step": 241000 }, { - "epoch": 0.57, - "learning_rate": 1.6168354415317066e-05, - "loss": 1.3758, + "epoch": 1.93, + "learning_rate": 1.8068e-05, + "loss": 1.2554, "step": 241500 }, { - "epoch": 0.58, - "learning_rate": 1.6160421401684182e-05, - "loss": 1.3875, + "epoch": 1.94, + "learning_rate": 1.8064000000000002e-05, + "loss": 1.2543, "step": 242000 }, { - "epoch": 0.58, - "learning_rate": 1.6152488388051298e-05, - "loss": 1.3751, + "epoch": 1.94, + "learning_rate": 1.8060000000000003e-05, + "loss": 1.265, "step": 242500 }, { - "epoch": 0.58, - "learning_rate": 1.614455537441841e-05, - "loss": 1.3703, + "epoch": 1.94, + "learning_rate": 1.8056e-05, + "loss": 1.2575, "step": 243000 }, { - "epoch": 0.58, - "learning_rate": 1.6136622360785526e-05, - "loss": 1.3672, + "epoch": 1.95, + "learning_rate": 1.8052e-05, + "loss": 1.2511, "step": 243500 }, { - "epoch": 0.58, - "learning_rate": 1.6128689347152645e-05, - "loss": 1.3743, + "epoch": 1.95, + "learning_rate": 1.8048e-05, + "loss": 1.2581, "step": 244000 }, { - "epoch": 0.58, - "learning_rate": 1.612075633351976e-05, - "loss": 1.3427, + "epoch": 1.96, + "learning_rate": 1.8044000000000002e-05, + "loss": 1.257, "step": 244500 }, { - "epoch": 0.58, - "learning_rate": 1.6112823319886877e-05, - "loss": 1.3816, + "epoch": 1.96, + "learning_rate": 1.8040000000000003e-05, + "loss": 1.2639, "step": 245000 }, { - "epoch": 0.58, - "learning_rate": 1.6104890306253993e-05, - "loss": 1.381, + "epoch": 1.96, + "learning_rate": 1.8036e-05, + "loss": 1.2565, "step": 245500 }, { - "epoch": 0.59, - "learning_rate": 1.609695729262111e-05, - "loss": 1.3709, + "epoch": 1.97, + "learning_rate": 1.8032e-05, + "loss": 1.2567, "step": 246000 }, { - "epoch": 0.59, - "learning_rate": 1.6089024278988225e-05, - "loss": 1.3632, + "epoch": 1.97, + "learning_rate": 1.8028e-05, + "loss": 1.2469, "step": 246500 }, { - "epoch": 0.59, - "learning_rate": 1.608109126535534e-05, - "loss": 1.3776, + "epoch": 1.98, + "learning_rate": 1.8024000000000002e-05, + "loss": 1.2606, "step": 247000 }, { - "epoch": 0.59, - "learning_rate": 1.6073158251722456e-05, - "loss": 1.3771, + "epoch": 1.98, + "learning_rate": 1.802e-05, + "loss": 1.2514, "step": 247500 }, { - "epoch": 0.59, - "learning_rate": 1.6065225238089572e-05, - "loss": 1.3684, + "epoch": 1.98, + "learning_rate": 1.8016e-05, + "loss": 1.2505, "step": 248000 }, { - "epoch": 0.59, - "learning_rate": 1.6057292224456688e-05, - "loss": 1.3799, + "epoch": 1.99, + "learning_rate": 1.8012e-05, + "loss": 1.2537, "step": 248500 }, { - "epoch": 0.59, - "learning_rate": 1.6049359210823804e-05, - "loss": 1.3803, + "epoch": 1.99, + "learning_rate": 1.8008e-05, + "loss": 1.252, "step": 249000 }, { - "epoch": 0.59, - "learning_rate": 1.604142619719092e-05, - "loss": 1.3725, + "epoch": 2.0, + "learning_rate": 1.8004000000000002e-05, + "loss": 1.2528, "step": 249500 }, { - "epoch": 0.59, - "learning_rate": 1.603349318355804e-05, - "loss": 1.3825, + "epoch": 2.0, + "learning_rate": 1.8e-05, + "loss": 1.256, "step": 250000 }, { - "epoch": 0.6, - "learning_rate": 1.6025560169925155e-05, - "loss": 1.3642, + "epoch": 2.0, + "eval_loss": 1.2035317420959473, + "eval_runtime": 81.1073, + "eval_samples_per_second": 165.373, + "eval_steps_per_second": 2.589, + "step": 250000 + }, + { + "epoch": 2.0, + "learning_rate": 1.7996000000000002e-05, + "loss": 1.246, "step": 250500 }, { - "epoch": 0.6, - "learning_rate": 1.601762715629227e-05, - "loss": 1.3714, + "epoch": 2.01, + "learning_rate": 1.7992e-05, + "loss": 1.2571, "step": 251000 }, { - "epoch": 0.6, - "learning_rate": 1.6009694142659383e-05, - "loss": 1.3651, + "epoch": 2.01, + "learning_rate": 1.7988e-05, + "loss": 1.2529, "step": 251500 }, { - "epoch": 0.6, - "learning_rate": 1.60017611290265e-05, - "loss": 1.3768, + "epoch": 2.02, + "learning_rate": 1.7984000000000003e-05, + "loss": 1.2474, "step": 252000 }, { - "epoch": 0.6, - "learning_rate": 1.599382811539362e-05, - "loss": 1.3662, + "epoch": 2.02, + "learning_rate": 1.798e-05, + "loss": 1.2493, "step": 252500 }, { - "epoch": 0.6, - "learning_rate": 1.5985895101760734e-05, - "loss": 1.3668, + "epoch": 2.02, + "learning_rate": 1.7976000000000002e-05, + "loss": 1.2492, "step": 253000 }, { - "epoch": 0.6, - "learning_rate": 1.597796208812785e-05, - "loss": 1.3642, + "epoch": 2.03, + "learning_rate": 1.7972e-05, + "loss": 1.2522, "step": 253500 }, { - "epoch": 0.6, - "learning_rate": 1.5970029074494966e-05, - "loss": 1.3711, + "epoch": 2.03, + "learning_rate": 1.7968e-05, + "loss": 1.2494, "step": 254000 }, { - "epoch": 0.61, - "learning_rate": 1.5962096060862082e-05, - "loss": 1.3713, + "epoch": 2.04, + "learning_rate": 1.7964000000000003e-05, + "loss": 1.2426, "step": 254500 }, { - "epoch": 0.61, - "learning_rate": 1.5954163047229198e-05, - "loss": 1.3604, + "epoch": 2.04, + "learning_rate": 1.796e-05, + "loss": 1.2522, "step": 255000 }, { - "epoch": 0.61, - "learning_rate": 1.5946230033596314e-05, - "loss": 1.3728, + "epoch": 2.04, + "learning_rate": 1.7956000000000002e-05, + "loss": 1.253, "step": 255500 }, { - "epoch": 0.61, - "learning_rate": 1.593829701996343e-05, - "loss": 1.3526, + "epoch": 2.05, + "learning_rate": 1.7952e-05, + "loss": 1.2517, "step": 256000 }, { - "epoch": 0.61, - "learning_rate": 1.5930364006330545e-05, - "loss": 1.3693, + "epoch": 2.05, + "learning_rate": 1.7948e-05, + "loss": 1.2489, "step": 256500 }, { - "epoch": 0.61, - "learning_rate": 1.592243099269766e-05, - "loss": 1.3555, + "epoch": 2.06, + "learning_rate": 1.7944000000000003e-05, + "loss": 1.2497, "step": 257000 }, { - "epoch": 0.61, - "learning_rate": 1.5914497979064777e-05, - "loss": 1.3622, + "epoch": 2.06, + "learning_rate": 1.794e-05, + "loss": 1.2469, "step": 257500 }, { - "epoch": 0.61, - "learning_rate": 1.5906564965431893e-05, - "loss": 1.355, + "epoch": 2.06, + "learning_rate": 1.7936000000000002e-05, + "loss": 1.2405, "step": 258000 }, { - "epoch": 0.62, - "learning_rate": 1.5898631951799012e-05, - "loss": 1.3632, + "epoch": 2.07, + "learning_rate": 1.7932e-05, + "loss": 1.2477, "step": 258500 }, { - "epoch": 0.62, - "learning_rate": 1.5890698938166128e-05, - "loss": 1.3706, + "epoch": 2.07, + "learning_rate": 1.7928000000000002e-05, + "loss": 1.2427, "step": 259000 }, { - "epoch": 0.62, - "learning_rate": 1.5882765924533244e-05, - "loss": 1.3667, + "epoch": 2.08, + "learning_rate": 1.7924e-05, + "loss": 1.2446, "step": 259500 }, { - "epoch": 0.62, - "learning_rate": 1.587483291090036e-05, - "loss": 1.359, + "epoch": 2.08, + "learning_rate": 1.792e-05, + "loss": 1.2422, "step": 260000 }, { - "epoch": 0.62, - "learning_rate": 1.5866899897267472e-05, - "loss": 1.3596, + "epoch": 2.08, + "learning_rate": 1.7916000000000003e-05, + "loss": 1.2452, "step": 260500 }, { - "epoch": 0.62, - "learning_rate": 1.585896688363459e-05, - "loss": 1.3723, + "epoch": 2.09, + "learning_rate": 1.7912e-05, + "loss": 1.2532, "step": 261000 }, { - "epoch": 0.62, - "learning_rate": 1.5851033870001707e-05, - "loss": 1.3712, + "epoch": 2.09, + "learning_rate": 1.7908000000000002e-05, + "loss": 1.2431, "step": 261500 }, { - "epoch": 0.62, - "learning_rate": 1.5843100856368823e-05, - "loss": 1.3611, + "epoch": 2.1, + "learning_rate": 1.7904e-05, + "loss": 1.2471, "step": 262000 }, { - "epoch": 0.62, - "learning_rate": 1.583516784273594e-05, - "loss": 1.3625, + "epoch": 2.1, + "learning_rate": 1.79e-05, + "loss": 1.2446, "step": 262500 }, { - "epoch": 0.63, - "learning_rate": 1.5827234829103055e-05, - "loss": 1.3692, + "epoch": 2.1, + "learning_rate": 1.7896000000000003e-05, + "loss": 1.2495, "step": 263000 }, { - "epoch": 0.63, - "learning_rate": 1.581930181547017e-05, - "loss": 1.3352, + "epoch": 2.11, + "learning_rate": 1.7892e-05, + "loss": 1.2468, "step": 263500 }, { - "epoch": 0.63, - "learning_rate": 1.5811368801837287e-05, - "loss": 1.3738, + "epoch": 2.11, + "learning_rate": 1.7888000000000002e-05, + "loss": 1.2495, "step": 264000 }, { - "epoch": 0.63, - "learning_rate": 1.5803435788204403e-05, - "loss": 1.3665, + "epoch": 2.12, + "learning_rate": 1.7884e-05, + "loss": 1.2519, "step": 264500 }, { - "epoch": 0.63, - "learning_rate": 1.579550277457152e-05, - "loss": 1.3579, + "epoch": 2.12, + "learning_rate": 1.788e-05, + "loss": 1.2434, "step": 265000 }, { - "epoch": 0.63, - "learning_rate": 1.5787569760938634e-05, - "loss": 1.38, + "epoch": 2.12, + "learning_rate": 1.7876000000000003e-05, + "loss": 1.2411, "step": 265500 }, { - "epoch": 0.63, - "learning_rate": 1.577963674730575e-05, - "loss": 1.3522, + "epoch": 2.13, + "learning_rate": 1.7872e-05, + "loss": 1.235, "step": 266000 }, { - "epoch": 0.63, - "learning_rate": 1.5771703733672866e-05, - "loss": 1.3583, + "epoch": 2.13, + "learning_rate": 1.7868000000000002e-05, + "loss": 1.2424, "step": 266500 }, { - "epoch": 0.64, - "learning_rate": 1.5763770720039985e-05, - "loss": 1.3458, + "epoch": 2.14, + "learning_rate": 1.7864e-05, + "loss": 1.2456, "step": 267000 }, { - "epoch": 0.64, - "learning_rate": 1.57558377064071e-05, - "loss": 1.3676, + "epoch": 2.14, + "learning_rate": 1.7860000000000002e-05, + "loss": 1.2521, "step": 267500 }, { - "epoch": 0.64, - "learning_rate": 1.5747904692774217e-05, - "loss": 1.3463, + "epoch": 2.14, + "learning_rate": 1.7856000000000003e-05, + "loss": 1.2368, "step": 268000 }, { - "epoch": 0.64, - "learning_rate": 1.5739971679141333e-05, - "loss": 1.3685, + "epoch": 2.15, + "learning_rate": 1.7852e-05, + "loss": 1.2382, "step": 268500 }, { - "epoch": 0.64, - "learning_rate": 1.573203866550845e-05, - "loss": 1.3594, + "epoch": 2.15, + "learning_rate": 1.7848e-05, + "loss": 1.2447, "step": 269000 }, { - "epoch": 0.64, - "learning_rate": 1.5724105651875565e-05, - "loss": 1.355, + "epoch": 2.16, + "learning_rate": 1.7844e-05, + "loss": 1.2445, "step": 269500 }, { - "epoch": 0.64, - "learning_rate": 1.571617263824268e-05, - "loss": 1.3491, + "epoch": 2.16, + "learning_rate": 1.7840000000000002e-05, + "loss": 1.2429, "step": 270000 }, { - "epoch": 0.64, - "learning_rate": 1.5708239624609796e-05, - "loss": 1.3462, + "epoch": 2.16, + "learning_rate": 1.7836000000000003e-05, + "loss": 1.2416, "step": 270500 }, { - "epoch": 0.64, - "learning_rate": 1.5700306610976912e-05, - "loss": 1.3533, + "epoch": 2.17, + "learning_rate": 1.7832e-05, + "loss": 1.2376, "step": 271000 }, { - "epoch": 0.65, - "learning_rate": 1.5692373597344028e-05, - "loss": 1.3695, + "epoch": 2.17, + "learning_rate": 1.7828e-05, + "loss": 1.2317, "step": 271500 }, { - "epoch": 0.65, - "learning_rate": 1.5684440583711144e-05, - "loss": 1.3666, + "epoch": 2.18, + "learning_rate": 1.7824e-05, + "loss": 1.2303, "step": 272000 }, { - "epoch": 0.65, - "learning_rate": 1.567650757007826e-05, - "loss": 1.3446, + "epoch": 2.18, + "learning_rate": 1.7820000000000002e-05, + "loss": 1.24, "step": 272500 }, { - "epoch": 0.65, - "learning_rate": 1.5668574556445376e-05, - "loss": 1.3747, + "epoch": 2.18, + "learning_rate": 1.7816000000000004e-05, + "loss": 1.2421, "step": 273000 }, { - "epoch": 0.65, - "learning_rate": 1.5660641542812495e-05, - "loss": 1.3456, + "epoch": 2.19, + "learning_rate": 1.7812e-05, + "loss": 1.2416, "step": 273500 }, { - "epoch": 0.65, - "learning_rate": 1.5652708529179607e-05, - "loss": 1.3613, + "epoch": 2.19, + "learning_rate": 1.7808e-05, + "loss": 1.2398, "step": 274000 }, { - "epoch": 0.65, - "learning_rate": 1.5644775515546723e-05, - "loss": 1.3537, + "epoch": 2.2, + "learning_rate": 1.7804e-05, + "loss": 1.2393, "step": 274500 }, { - "epoch": 0.65, - "learning_rate": 1.563684250191384e-05, - "loss": 1.3617, + "epoch": 2.2, + "learning_rate": 1.7800000000000002e-05, + "loss": 1.2455, "step": 275000 }, { - "epoch": 0.66, - "learning_rate": 1.5628909488280955e-05, - "loss": 1.3369, + "epoch": 2.2, + "learning_rate": 1.7796000000000004e-05, + "loss": 1.2321, "step": 275500 }, { - "epoch": 0.66, - "learning_rate": 1.5620976474648074e-05, - "loss": 1.3451, + "epoch": 2.21, + "learning_rate": 1.7792000000000002e-05, + "loss": 1.2326, "step": 276000 }, { - "epoch": 0.66, - "learning_rate": 1.561304346101519e-05, - "loss": 1.3559, + "epoch": 2.21, + "learning_rate": 1.7788e-05, + "loss": 1.2381, "step": 276500 }, { - "epoch": 0.66, - "learning_rate": 1.5605110447382306e-05, - "loss": 1.3487, + "epoch": 2.22, + "learning_rate": 1.7784e-05, + "loss": 1.2398, "step": 277000 }, { - "epoch": 0.66, - "learning_rate": 1.5597177433749422e-05, - "loss": 1.3382, + "epoch": 2.22, + "learning_rate": 1.7780000000000003e-05, + "loss": 1.2367, "step": 277500 }, { - "epoch": 0.66, - "learning_rate": 1.5589244420116538e-05, - "loss": 1.3394, + "epoch": 2.22, + "learning_rate": 1.7776e-05, + "loss": 1.2357, "step": 278000 }, { - "epoch": 0.66, - "learning_rate": 1.5581311406483654e-05, - "loss": 1.342, + "epoch": 2.23, + "learning_rate": 1.7772000000000002e-05, + "loss": 1.2369, "step": 278500 }, { - "epoch": 0.66, - "learning_rate": 1.557337839285077e-05, - "loss": 1.3467, + "epoch": 2.23, + "learning_rate": 1.7768e-05, + "loss": 1.2395, "step": 279000 }, { - "epoch": 0.67, - "learning_rate": 1.5565445379217885e-05, - "loss": 1.3409, + "epoch": 2.24, + "learning_rate": 1.7764e-05, + "loss": 1.2347, "step": 279500 }, { - "epoch": 0.67, - "learning_rate": 1.5557512365585e-05, - "loss": 1.355, + "epoch": 2.24, + "learning_rate": 1.7760000000000003e-05, + "loss": 1.2322, "step": 280000 }, { - "epoch": 0.67, - "learning_rate": 1.5549579351952117e-05, - "loss": 1.3444, + "epoch": 2.24, + "learning_rate": 1.7756e-05, + "loss": 1.2393, "step": 280500 }, { - "epoch": 0.67, - "learning_rate": 1.5541646338319233e-05, - "loss": 1.3609, + "epoch": 2.25, + "learning_rate": 1.7752e-05, + "loss": 1.2349, "step": 281000 }, { - "epoch": 0.67, - "learning_rate": 1.553371332468635e-05, - "loss": 1.3515, + "epoch": 2.25, + "learning_rate": 1.7748e-05, + "loss": 1.2363, "step": 281500 }, { - "epoch": 0.67, - "learning_rate": 1.5525780311053468e-05, - "loss": 1.3388, + "epoch": 2.26, + "learning_rate": 1.7744e-05, + "loss": 1.2316, "step": 282000 }, { - "epoch": 0.67, - "learning_rate": 1.5517847297420584e-05, - "loss": 1.3392, + "epoch": 2.26, + "learning_rate": 1.7740000000000003e-05, + "loss": 1.237, "step": 282500 }, { - "epoch": 0.67, - "learning_rate": 1.5509914283787696e-05, - "loss": 1.3389, + "epoch": 2.26, + "learning_rate": 1.7736e-05, + "loss": 1.2345, "step": 283000 }, { - "epoch": 0.67, - "learning_rate": 1.5501981270154812e-05, - "loss": 1.3493, + "epoch": 2.27, + "learning_rate": 1.7732000000000002e-05, + "loss": 1.2369, "step": 283500 }, { - "epoch": 0.68, - "learning_rate": 1.5494048256521928e-05, - "loss": 1.3489, + "epoch": 2.27, + "learning_rate": 1.7728e-05, + "loss": 1.2333, "step": 284000 }, { - "epoch": 0.68, - "learning_rate": 1.5486115242889047e-05, - "loss": 1.3458, + "epoch": 2.28, + "learning_rate": 1.7724000000000002e-05, + "loss": 1.2326, "step": 284500 }, { - "epoch": 0.68, - "learning_rate": 1.5478182229256163e-05, - "loss": 1.346, + "epoch": 2.28, + "learning_rate": 1.7720000000000003e-05, + "loss": 1.2256, "step": 285000 }, { - "epoch": 0.68, - "learning_rate": 1.547024921562328e-05, - "loss": 1.3303, + "epoch": 2.28, + "learning_rate": 1.7716e-05, + "loss": 1.2268, "step": 285500 }, { - "epoch": 0.68, - "learning_rate": 1.5462316201990395e-05, - "loss": 1.3384, + "epoch": 2.29, + "learning_rate": 1.7712000000000003e-05, + "loss": 1.2354, "step": 286000 }, { - "epoch": 0.68, - "learning_rate": 1.545438318835751e-05, - "loss": 1.3537, + "epoch": 2.29, + "learning_rate": 1.7708e-05, + "loss": 1.2298, "step": 286500 }, { - "epoch": 0.68, - "learning_rate": 1.5446450174724627e-05, - "loss": 1.3511, + "epoch": 2.3, + "learning_rate": 1.7704000000000002e-05, + "loss": 1.2292, "step": 287000 }, { - "epoch": 0.68, - "learning_rate": 1.5438517161091743e-05, - "loss": 1.3256, + "epoch": 2.3, + "learning_rate": 1.77e-05, + "loss": 1.2328, "step": 287500 }, { - "epoch": 0.69, - "learning_rate": 1.543058414745886e-05, - "loss": 1.3465, + "epoch": 2.3, + "learning_rate": 1.7696e-05, + "loss": 1.2332, "step": 288000 }, { - "epoch": 0.69, - "learning_rate": 1.5422651133825974e-05, - "loss": 1.3329, + "epoch": 2.31, + "learning_rate": 1.7692000000000003e-05, + "loss": 1.2337, "step": 288500 }, { - "epoch": 0.69, - "learning_rate": 1.541471812019309e-05, - "loss": 1.3549, + "epoch": 2.31, + "learning_rate": 1.7688e-05, + "loss": 1.2337, "step": 289000 }, { - "epoch": 0.69, - "learning_rate": 1.5406785106560206e-05, - "loss": 1.3458, + "epoch": 2.32, + "learning_rate": 1.7684000000000002e-05, + "loss": 1.2338, "step": 289500 }, { - "epoch": 0.69, - "learning_rate": 1.5398852092927322e-05, - "loss": 1.3359, + "epoch": 2.32, + "learning_rate": 1.768e-05, + "loss": 1.2373, "step": 290000 }, { - "epoch": 0.69, - "learning_rate": 1.539091907929444e-05, - "loss": 1.3343, + "epoch": 2.32, + "learning_rate": 1.7676e-05, + "loss": 1.2335, "step": 290500 }, { - "epoch": 0.69, - "learning_rate": 1.5382986065661557e-05, - "loss": 1.3617, + "epoch": 2.33, + "learning_rate": 1.7672000000000003e-05, + "loss": 1.2338, "step": 291000 }, { - "epoch": 0.69, - "learning_rate": 1.5375053052028673e-05, - "loss": 1.3335, + "epoch": 2.33, + "learning_rate": 1.7668e-05, + "loss": 1.2246, "step": 291500 }, { - "epoch": 0.69, - "learning_rate": 1.5367120038395785e-05, - "loss": 1.3508, + "epoch": 2.34, + "learning_rate": 1.7664000000000002e-05, + "loss": 1.2349, "step": 292000 }, { - "epoch": 0.7, - "learning_rate": 1.53591870247629e-05, - "loss": 1.3492, + "epoch": 2.34, + "learning_rate": 1.766e-05, + "loss": 1.2303, "step": 292500 }, { - "epoch": 0.7, - "learning_rate": 1.535125401113002e-05, - "loss": 1.3382, + "epoch": 2.34, + "learning_rate": 1.7656000000000002e-05, + "loss": 1.227, "step": 293000 }, { - "epoch": 0.7, - "learning_rate": 1.5343320997497136e-05, - "loss": 1.3423, + "epoch": 2.35, + "learning_rate": 1.7652000000000003e-05, + "loss": 1.2304, "step": 293500 }, { - "epoch": 0.7, - "learning_rate": 1.5335387983864252e-05, - "loss": 1.3388, + "epoch": 2.35, + "learning_rate": 1.7648e-05, + "loss": 1.2341, "step": 294000 }, { - "epoch": 0.7, - "learning_rate": 1.5327454970231368e-05, - "loss": 1.3606, + "epoch": 2.36, + "learning_rate": 1.7644000000000003e-05, + "loss": 1.2397, "step": 294500 }, { - "epoch": 0.7, - "learning_rate": 1.5319521956598484e-05, - "loss": 1.3439, + "epoch": 2.36, + "learning_rate": 1.764e-05, + "loss": 1.2389, "step": 295000 }, { - "epoch": 0.7, - "learning_rate": 1.53115889429656e-05, - "loss": 1.3292, + "epoch": 2.36, + "learning_rate": 1.7636000000000002e-05, + "loss": 1.2336, "step": 295500 }, { - "epoch": 0.7, - "learning_rate": 1.5303655929332716e-05, - "loss": 1.341, + "epoch": 2.37, + "learning_rate": 1.7632000000000003e-05, + "loss": 1.2246, "step": 296000 }, { - "epoch": 0.71, - "learning_rate": 1.529572291569983e-05, - "loss": 1.3289, + "epoch": 2.37, + "learning_rate": 1.7628e-05, + "loss": 1.2282, "step": 296500 }, { - "epoch": 0.71, - "learning_rate": 1.5287789902066947e-05, - "loss": 1.3151, + "epoch": 2.38, + "learning_rate": 1.7624000000000003e-05, + "loss": 1.222, "step": 297000 }, { - "epoch": 0.71, - "learning_rate": 1.5279856888434063e-05, - "loss": 1.3513, + "epoch": 2.38, + "learning_rate": 1.762e-05, + "loss": 1.2253, "step": 297500 }, { - "epoch": 0.71, - "learning_rate": 1.527192387480118e-05, - "loss": 1.3459, + "epoch": 2.38, + "learning_rate": 1.7616000000000002e-05, + "loss": 1.234, "step": 298000 }, { - "epoch": 0.71, - "learning_rate": 1.5263990861168295e-05, - "loss": 1.3493, + "epoch": 2.39, + "learning_rate": 1.7612000000000003e-05, + "loss": 1.2259, "step": 298500 }, { - "epoch": 0.71, - "learning_rate": 1.5256057847535413e-05, - "loss": 1.331, + "epoch": 2.39, + "learning_rate": 1.7608e-05, + "loss": 1.2297, "step": 299000 }, { - "epoch": 0.71, - "learning_rate": 1.5248124833902528e-05, - "loss": 1.342, + "epoch": 2.4, + "learning_rate": 1.7604e-05, + "loss": 1.226, "step": 299500 }, { - "epoch": 0.71, - "learning_rate": 1.5240191820269646e-05, - "loss": 1.3355, + "epoch": 2.4, + "learning_rate": 1.76e-05, + "loss": 1.2278, "step": 300000 }, { - "epoch": 0.72, - "learning_rate": 1.523225880663676e-05, - "loss": 1.3206, + "epoch": 2.4, + "learning_rate": 1.7596000000000002e-05, + "loss": 1.2291, "step": 300500 }, { - "epoch": 0.72, - "learning_rate": 1.5224325793003876e-05, - "loss": 1.3463, + "epoch": 2.41, + "learning_rate": 1.7592000000000004e-05, + "loss": 1.2291, "step": 301000 }, { - "epoch": 0.72, - "learning_rate": 1.5216392779370992e-05, - "loss": 1.3243, + "epoch": 2.41, + "learning_rate": 1.7588e-05, + "loss": 1.2228, "step": 301500 }, { - "epoch": 0.72, - "learning_rate": 1.5208459765738108e-05, - "loss": 1.3299, + "epoch": 2.42, + "learning_rate": 1.7584e-05, + "loss": 1.2252, "step": 302000 }, { - "epoch": 0.72, - "learning_rate": 1.5200526752105225e-05, - "loss": 1.3552, + "epoch": 2.42, + "learning_rate": 1.758e-05, + "loss": 1.2322, "step": 302500 }, { - "epoch": 0.72, - "learning_rate": 1.5192593738472341e-05, - "loss": 1.3497, + "epoch": 2.42, + "learning_rate": 1.7576000000000002e-05, + "loss": 1.2248, "step": 303000 }, { - "epoch": 0.72, - "learning_rate": 1.5184660724839457e-05, - "loss": 1.335, + "epoch": 2.43, + "learning_rate": 1.7572000000000004e-05, + "loss": 1.2278, "step": 303500 }, { - "epoch": 0.72, - "learning_rate": 1.5176727711206573e-05, - "loss": 1.3517, + "epoch": 2.43, + "learning_rate": 1.7568000000000002e-05, + "loss": 1.2142, "step": 304000 }, { - "epoch": 0.72, - "learning_rate": 1.516879469757369e-05, - "loss": 1.3291, + "epoch": 2.44, + "learning_rate": 1.7564e-05, + "loss": 1.2294, "step": 304500 }, { - "epoch": 0.73, - "learning_rate": 1.5160861683940805e-05, - "loss": 1.351, + "epoch": 2.44, + "learning_rate": 1.756e-05, + "loss": 1.2247, "step": 305000 }, { - "epoch": 0.73, - "learning_rate": 1.515292867030792e-05, - "loss": 1.3385, + "epoch": 2.44, + "learning_rate": 1.7556000000000003e-05, + "loss": 1.2266, "step": 305500 }, { - "epoch": 0.73, - "learning_rate": 1.5144995656675036e-05, - "loss": 1.3322, + "epoch": 2.45, + "learning_rate": 1.7552e-05, + "loss": 1.2299, "step": 306000 }, { - "epoch": 0.73, - "learning_rate": 1.5137062643042152e-05, - "loss": 1.3398, + "epoch": 2.45, + "learning_rate": 1.7548000000000002e-05, + "loss": 1.2284, "step": 306500 }, { - "epoch": 0.73, - "learning_rate": 1.512912962940927e-05, - "loss": 1.3344, + "epoch": 2.46, + "learning_rate": 1.7544e-05, + "loss": 1.2205, "step": 307000 }, { - "epoch": 0.73, - "learning_rate": 1.5121196615776386e-05, - "loss": 1.3396, + "epoch": 2.46, + "learning_rate": 1.754e-05, + "loss": 1.2305, "step": 307500 }, { - "epoch": 0.73, - "learning_rate": 1.5113263602143501e-05, - "loss": 1.3425, + "epoch": 2.46, + "learning_rate": 1.7536000000000003e-05, + "loss": 1.2268, "step": 308000 }, { - "epoch": 0.73, - "learning_rate": 1.5105330588510619e-05, - "loss": 1.3207, + "epoch": 2.47, + "learning_rate": 1.7532e-05, + "loss": 1.226, "step": 308500 }, { - "epoch": 0.74, - "learning_rate": 1.5097397574877735e-05, - "loss": 1.3348, + "epoch": 2.47, + "learning_rate": 1.7528e-05, + "loss": 1.2249, "step": 309000 }, { - "epoch": 0.74, - "learning_rate": 1.5089464561244849e-05, - "loss": 1.3415, + "epoch": 2.48, + "learning_rate": 1.7524e-05, + "loss": 1.2264, "step": 309500 }, { - "epoch": 0.74, - "learning_rate": 1.5081531547611965e-05, - "loss": 1.3427, + "epoch": 2.48, + "learning_rate": 1.752e-05, + "loss": 1.2265, "step": 310000 }, { - "epoch": 0.74, - "learning_rate": 1.507359853397908e-05, - "loss": 1.3465, + "epoch": 2.48, + "learning_rate": 1.7516000000000003e-05, + "loss": 1.2197, "step": 310500 }, { - "epoch": 0.74, - "learning_rate": 1.5065665520346198e-05, - "loss": 1.329, + "epoch": 2.49, + "learning_rate": 1.7512e-05, + "loss": 1.2224, "step": 311000 }, { - "epoch": 0.74, - "learning_rate": 1.5057732506713314e-05, - "loss": 1.3409, + "epoch": 2.49, + "learning_rate": 1.7508e-05, + "loss": 1.2167, "step": 311500 }, { - "epoch": 0.74, - "learning_rate": 1.504979949308043e-05, - "loss": 1.3252, + "epoch": 2.5, + "learning_rate": 1.7504e-05, + "loss": 1.2284, "step": 312000 }, { - "epoch": 0.74, - "learning_rate": 1.5041866479447546e-05, - "loss": 1.3378, + "epoch": 2.5, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.2235, "step": 312500 }, { - "epoch": 0.74, - "learning_rate": 1.5033933465814663e-05, - "loss": 1.3381, + "epoch": 2.5, + "learning_rate": 1.7496000000000003e-05, + "loss": 1.2218, "step": 313000 }, { - "epoch": 0.75, - "learning_rate": 1.502600045218178e-05, - "loss": 1.3467, + "epoch": 2.51, + "learning_rate": 1.7492e-05, + "loss": 1.215, "step": 313500 }, { - "epoch": 0.75, - "learning_rate": 1.5018067438548894e-05, - "loss": 1.3331, + "epoch": 2.51, + "learning_rate": 1.7488e-05, + "loss": 1.2217, "step": 314000 }, { - "epoch": 0.75, - "learning_rate": 1.501013442491601e-05, - "loss": 1.3487, + "epoch": 2.52, + "learning_rate": 1.7484e-05, + "loss": 1.2207, "step": 314500 }, { - "epoch": 0.75, - "learning_rate": 1.5002201411283125e-05, - "loss": 1.3421, + "epoch": 2.52, + "learning_rate": 1.7480000000000002e-05, + "loss": 1.219, "step": 315000 }, { - "epoch": 0.75, - "learning_rate": 1.4994268397650243e-05, - "loss": 1.3232, + "epoch": 2.52, + "learning_rate": 1.7476000000000003e-05, + "loss": 1.225, "step": 315500 }, { - "epoch": 0.75, - "learning_rate": 1.4986335384017359e-05, - "loss": 1.3304, + "epoch": 2.53, + "learning_rate": 1.7472e-05, + "loss": 1.2214, "step": 316000 }, { - "epoch": 0.75, - "learning_rate": 1.4978402370384475e-05, - "loss": 1.3374, + "epoch": 2.53, + "learning_rate": 1.7468e-05, + "loss": 1.218, "step": 316500 }, { - "epoch": 0.75, - "learning_rate": 1.4970469356751592e-05, - "loss": 1.333, + "epoch": 2.54, + "learning_rate": 1.7464e-05, + "loss": 1.2182, "step": 317000 }, { - "epoch": 0.76, - "learning_rate": 1.4962536343118708e-05, - "loss": 1.3221, + "epoch": 2.54, + "learning_rate": 1.7460000000000002e-05, + "loss": 1.2143, "step": 317500 }, { - "epoch": 0.76, - "learning_rate": 1.4954603329485824e-05, - "loss": 1.3317, + "epoch": 2.54, + "learning_rate": 1.7456e-05, + "loss": 1.219, "step": 318000 }, { - "epoch": 0.76, - "learning_rate": 1.4946670315852938e-05, - "loss": 1.3269, + "epoch": 2.55, + "learning_rate": 1.7452e-05, + "loss": 1.2204, "step": 318500 }, { - "epoch": 0.76, - "learning_rate": 1.4938737302220054e-05, - "loss": 1.318, + "epoch": 2.55, + "learning_rate": 1.7448e-05, + "loss": 1.2143, "step": 319000 }, { - "epoch": 0.76, - "learning_rate": 1.4930804288587171e-05, - "loss": 1.3188, + "epoch": 2.56, + "learning_rate": 1.7444e-05, + "loss": 1.2188, "step": 319500 }, { - "epoch": 0.76, - "learning_rate": 1.4922871274954287e-05, - "loss": 1.3333, + "epoch": 2.56, + "learning_rate": 1.7440000000000002e-05, + "loss": 1.226, "step": 320000 }, { - "epoch": 0.76, - "learning_rate": 1.4914938261321403e-05, - "loss": 1.3207, + "epoch": 2.56, + "learning_rate": 1.7436e-05, + "loss": 1.2146, "step": 320500 }, { - "epoch": 0.76, - "learning_rate": 1.4907005247688519e-05, - "loss": 1.3166, + "epoch": 2.57, + "learning_rate": 1.7432000000000002e-05, + "loss": 1.2195, "step": 321000 }, { - "epoch": 0.77, - "learning_rate": 1.4899072234055637e-05, - "loss": 1.3143, + "epoch": 2.57, + "learning_rate": 1.7428e-05, + "loss": 1.2196, "step": 321500 }, { - "epoch": 0.77, - "learning_rate": 1.4891139220422752e-05, - "loss": 1.3376, + "epoch": 2.58, + "learning_rate": 1.7424e-05, + "loss": 1.2138, "step": 322000 }, { - "epoch": 0.77, - "learning_rate": 1.4883206206789868e-05, - "loss": 1.3167, + "epoch": 2.58, + "learning_rate": 1.7420000000000003e-05, + "loss": 1.2272, "step": 322500 }, { - "epoch": 0.77, - "learning_rate": 1.4875273193156982e-05, - "loss": 1.3148, + "epoch": 2.58, + "learning_rate": 1.7416e-05, + "loss": 1.221, "step": 323000 }, { - "epoch": 0.77, - "learning_rate": 1.4867340179524098e-05, - "loss": 1.3274, + "epoch": 2.59, + "learning_rate": 1.7412000000000002e-05, + "loss": 1.2135, "step": 323500 }, { - "epoch": 0.77, - "learning_rate": 1.4859407165891216e-05, - "loss": 1.3247, + "epoch": 2.59, + "learning_rate": 1.7408e-05, + "loss": 1.2085, "step": 324000 }, { - "epoch": 0.77, - "learning_rate": 1.4851474152258332e-05, - "loss": 1.3306, + "epoch": 2.6, + "learning_rate": 1.7404e-05, + "loss": 1.2114, "step": 324500 }, { - "epoch": 0.77, - "learning_rate": 1.4843541138625448e-05, - "loss": 1.3114, + "epoch": 2.6, + "learning_rate": 1.7400000000000003e-05, + "loss": 1.21, "step": 325000 }, { - "epoch": 0.77, - "learning_rate": 1.4835608124992564e-05, - "loss": 1.342, + "epoch": 2.6, + "learning_rate": 1.7396e-05, + "loss": 1.213, "step": 325500 }, { - "epoch": 0.78, - "learning_rate": 1.4827675111359681e-05, - "loss": 1.3312, + "epoch": 2.61, + "learning_rate": 1.7392000000000002e-05, + "loss": 1.2166, "step": 326000 }, { - "epoch": 0.78, - "learning_rate": 1.4819742097726797e-05, - "loss": 1.3318, + "epoch": 2.61, + "learning_rate": 1.7388e-05, + "loss": 1.2193, "step": 326500 }, { - "epoch": 0.78, - "learning_rate": 1.4811809084093913e-05, - "loss": 1.328, + "epoch": 2.62, + "learning_rate": 1.7384e-05, + "loss": 1.2197, "step": 327000 }, { - "epoch": 0.78, - "learning_rate": 1.4803876070461027e-05, - "loss": 1.3375, + "epoch": 2.62, + "learning_rate": 1.7380000000000003e-05, + "loss": 1.2207, "step": 327500 }, { - "epoch": 0.78, - "learning_rate": 1.4795943056828145e-05, - "loss": 1.3197, + "epoch": 2.62, + "learning_rate": 1.7376e-05, + "loss": 1.2155, "step": 328000 }, { - "epoch": 0.78, - "learning_rate": 1.478801004319526e-05, - "loss": 1.3103, + "epoch": 2.63, + "learning_rate": 1.7372000000000002e-05, + "loss": 1.2246, "step": 328500 }, { - "epoch": 0.78, - "learning_rate": 1.4780077029562376e-05, - "loss": 1.3349, + "epoch": 2.63, + "learning_rate": 1.7368e-05, + "loss": 1.2119, "step": 329000 }, { - "epoch": 0.78, - "learning_rate": 1.4772144015929492e-05, - "loss": 1.3162, + "epoch": 2.64, + "learning_rate": 1.7364000000000002e-05, + "loss": 1.2119, "step": 329500 }, { - "epoch": 0.79, - "learning_rate": 1.476421100229661e-05, - "loss": 1.3056, + "epoch": 2.64, + "learning_rate": 1.736e-05, + "loss": 1.2175, "step": 330000 }, { - "epoch": 0.79, - "learning_rate": 1.4756277988663726e-05, - "loss": 1.3276, + "epoch": 2.64, + "learning_rate": 1.7356e-05, + "loss": 1.2194, "step": 330500 }, { - "epoch": 0.79, - "learning_rate": 1.4748344975030841e-05, - "loss": 1.3215, + "epoch": 2.65, + "learning_rate": 1.7352000000000003e-05, + "loss": 1.2144, "step": 331000 }, { - "epoch": 0.79, - "learning_rate": 1.4740411961397957e-05, - "loss": 1.3295, + "epoch": 2.65, + "learning_rate": 1.7348e-05, + "loss": 1.2142, "step": 331500 }, { - "epoch": 0.79, - "learning_rate": 1.4732478947765071e-05, - "loss": 1.3234, + "epoch": 2.66, + "learning_rate": 1.7344000000000002e-05, + "loss": 1.2139, "step": 332000 }, { - "epoch": 0.79, - "learning_rate": 1.4724545934132189e-05, - "loss": 1.3274, + "epoch": 2.66, + "learning_rate": 1.734e-05, + "loss": 1.2169, "step": 332500 }, { - "epoch": 0.79, - "learning_rate": 1.4716612920499305e-05, - "loss": 1.3364, + "epoch": 2.66, + "learning_rate": 1.7336e-05, + "loss": 1.2131, "step": 333000 }, { - "epoch": 0.79, - "learning_rate": 1.470867990686642e-05, - "loss": 1.3036, + "epoch": 2.67, + "learning_rate": 1.7332000000000003e-05, + "loss": 1.2145, "step": 333500 }, { - "epoch": 0.79, - "learning_rate": 1.4700746893233537e-05, - "loss": 1.3256, + "epoch": 2.67, + "learning_rate": 1.7328e-05, + "loss": 1.2125, "step": 334000 }, { - "epoch": 0.8, - "learning_rate": 1.4692813879600654e-05, - "loss": 1.3096, + "epoch": 2.68, + "learning_rate": 1.7324000000000002e-05, + "loss": 1.218, "step": 334500 }, { - "epoch": 0.8, - "learning_rate": 1.468488086596777e-05, - "loss": 1.339, + "epoch": 2.68, + "learning_rate": 1.732e-05, + "loss": 1.2163, "step": 335000 }, { - "epoch": 0.8, - "learning_rate": 1.4676947852334886e-05, - "loss": 1.328, + "epoch": 2.68, + "learning_rate": 1.7316e-05, + "loss": 1.2136, "step": 335500 }, { - "epoch": 0.8, - "learning_rate": 1.4669014838702003e-05, - "loss": 1.3228, + "epoch": 2.69, + "learning_rate": 1.7312000000000003e-05, + "loss": 1.2045, "step": 336000 }, { - "epoch": 0.8, - "learning_rate": 1.4661081825069116e-05, - "loss": 1.3213, + "epoch": 2.69, + "learning_rate": 1.7308e-05, + "loss": 1.2145, "step": 336500 }, { - "epoch": 0.8, - "learning_rate": 1.4653148811436233e-05, - "loss": 1.332, + "epoch": 2.7, + "learning_rate": 1.7304000000000002e-05, + "loss": 1.2123, "step": 337000 }, { - "epoch": 0.8, - "learning_rate": 1.464521579780335e-05, - "loss": 1.3262, + "epoch": 2.7, + "learning_rate": 1.73e-05, + "loss": 1.2135, "step": 337500 }, { - "epoch": 0.8, - "learning_rate": 1.4637282784170465e-05, - "loss": 1.3213, + "epoch": 2.7, + "learning_rate": 1.7296000000000002e-05, + "loss": 1.2174, "step": 338000 }, { - "epoch": 0.81, - "learning_rate": 1.4629349770537583e-05, - "loss": 1.3289, + "epoch": 2.71, + "learning_rate": 1.7292000000000003e-05, + "loss": 1.2096, "step": 338500 }, { - "epoch": 0.81, - "learning_rate": 1.4621416756904699e-05, - "loss": 1.3137, + "epoch": 2.71, + "learning_rate": 1.7288e-05, + "loss": 1.2167, "step": 339000 }, { - "epoch": 0.81, - "learning_rate": 1.4613483743271814e-05, - "loss": 1.3191, + "epoch": 2.72, + "learning_rate": 1.7284e-05, + "loss": 1.2181, "step": 339500 }, { - "epoch": 0.81, - "learning_rate": 1.460555072963893e-05, - "loss": 1.3242, + "epoch": 2.72, + "learning_rate": 1.728e-05, + "loss": 1.2163, "step": 340000 }, { - "epoch": 0.81, - "learning_rate": 1.4597617716006045e-05, - "loss": 1.3244, + "epoch": 2.72, + "learning_rate": 1.7276000000000002e-05, + "loss": 1.2114, "step": 340500 }, { - "epoch": 0.81, - "learning_rate": 1.4589684702373162e-05, - "loss": 1.3339, + "epoch": 2.73, + "learning_rate": 1.7272000000000003e-05, + "loss": 1.2157, "step": 341000 }, { - "epoch": 0.81, - "learning_rate": 1.4581751688740278e-05, - "loss": 1.3387, + "epoch": 2.73, + "learning_rate": 1.7268e-05, + "loss": 1.2043, "step": 341500 }, { - "epoch": 0.81, - "learning_rate": 1.4573818675107394e-05, - "loss": 1.317, + "epoch": 2.74, + "learning_rate": 1.7264e-05, + "loss": 1.2088, "step": 342000 }, { - "epoch": 0.82, - "learning_rate": 1.456588566147451e-05, - "loss": 1.3094, + "epoch": 2.74, + "learning_rate": 1.726e-05, + "loss": 1.2137, "step": 342500 }, { - "epoch": 0.82, - "learning_rate": 1.4557952647841627e-05, - "loss": 1.322, + "epoch": 2.74, + "learning_rate": 1.7256000000000002e-05, + "loss": 1.2033, "step": 343000 }, { - "epoch": 0.82, - "learning_rate": 1.4550019634208743e-05, - "loss": 1.3226, + "epoch": 2.75, + "learning_rate": 1.7252000000000004e-05, + "loss": 1.2114, "step": 343500 }, { - "epoch": 0.82, - "learning_rate": 1.4542086620575859e-05, - "loss": 1.3209, + "epoch": 2.75, + "learning_rate": 1.7248e-05, + "loss": 1.206, "step": 344000 }, { - "epoch": 0.82, - "learning_rate": 1.4534153606942975e-05, - "loss": 1.3332, + "epoch": 2.76, + "learning_rate": 1.7244e-05, + "loss": 1.2084, "step": 344500 }, { - "epoch": 0.82, - "learning_rate": 1.4526220593310089e-05, - "loss": 1.3088, + "epoch": 2.76, + "learning_rate": 1.724e-05, + "loss": 1.2118, "step": 345000 }, { - "epoch": 0.82, - "learning_rate": 1.4518287579677207e-05, - "loss": 1.3244, + "epoch": 2.76, + "learning_rate": 1.7236000000000002e-05, + "loss": 1.2053, "step": 345500 }, { - "epoch": 0.82, - "learning_rate": 1.4510354566044322e-05, - "loss": 1.323, + "epoch": 2.77, + "learning_rate": 1.7232000000000004e-05, + "loss": 1.2034, "step": 346000 }, { - "epoch": 0.82, - "learning_rate": 1.4502421552411438e-05, - "loss": 1.3268, + "epoch": 2.77, + "learning_rate": 1.7228000000000002e-05, + "loss": 1.2034, "step": 346500 }, { - "epoch": 0.83, - "learning_rate": 1.4494488538778554e-05, - "loss": 1.3168, + "epoch": 2.78, + "learning_rate": 1.7224e-05, + "loss": 1.2055, "step": 347000 }, { - "epoch": 0.83, - "learning_rate": 1.4486555525145672e-05, - "loss": 1.3126, + "epoch": 2.78, + "learning_rate": 1.722e-05, + "loss": 1.2068, "step": 347500 }, { - "epoch": 0.83, - "learning_rate": 1.4478622511512788e-05, - "loss": 1.3337, + "epoch": 2.78, + "learning_rate": 1.7216000000000003e-05, + "loss": 1.2056, "step": 348000 }, { - "epoch": 0.83, - "learning_rate": 1.4470689497879903e-05, - "loss": 1.3058, + "epoch": 2.79, + "learning_rate": 1.7212e-05, + "loss": 1.2027, "step": 348500 }, { - "epoch": 0.83, - "learning_rate": 1.4462756484247021e-05, - "loss": 1.3144, + "epoch": 2.79, + "learning_rate": 1.7208000000000002e-05, + "loss": 1.2045, "step": 349000 }, { - "epoch": 0.83, - "learning_rate": 1.4454823470614135e-05, - "loss": 1.2985, + "epoch": 2.8, + "learning_rate": 1.7204e-05, + "loss": 1.2044, "step": 349500 }, { - "epoch": 0.83, - "learning_rate": 1.4446890456981251e-05, - "loss": 1.3195, + "epoch": 2.8, + "learning_rate": 1.72e-05, + "loss": 1.2073, "step": 350000 }, { - "epoch": 0.83, - "learning_rate": 1.4438957443348367e-05, - "loss": 1.3055, + "epoch": 2.8, + "learning_rate": 1.7196000000000003e-05, + "loss": 1.2113, "step": 350500 }, { - "epoch": 0.84, - "learning_rate": 1.4431024429715483e-05, - "loss": 1.3234, + "epoch": 2.81, + "learning_rate": 1.7192e-05, + "loss": 1.2081, "step": 351000 }, { - "epoch": 0.84, - "learning_rate": 1.44230914160826e-05, - "loss": 1.314, + "epoch": 2.81, + "learning_rate": 1.7188000000000002e-05, + "loss": 1.2076, "step": 351500 }, { - "epoch": 0.84, - "learning_rate": 1.4415158402449716e-05, - "loss": 1.3196, + "epoch": 2.82, + "learning_rate": 1.7184e-05, + "loss": 1.2116, "step": 352000 }, { - "epoch": 0.84, - "learning_rate": 1.4407225388816832e-05, - "loss": 1.3052, + "epoch": 2.82, + "learning_rate": 1.718e-05, + "loss": 1.209, "step": 352500 }, { - "epoch": 0.84, - "learning_rate": 1.4399292375183948e-05, - "loss": 1.3182, + "epoch": 2.82, + "learning_rate": 1.7176000000000003e-05, + "loss": 1.2089, "step": 353000 }, { - "epoch": 0.84, - "learning_rate": 1.4391359361551065e-05, - "loss": 1.3213, + "epoch": 2.83, + "learning_rate": 1.7172e-05, + "loss": 1.2073, "step": 353500 }, { - "epoch": 0.84, - "learning_rate": 1.438342634791818e-05, - "loss": 1.2952, + "epoch": 2.83, + "learning_rate": 1.7168000000000002e-05, + "loss": 1.2057, "step": 354000 }, { - "epoch": 0.84, - "learning_rate": 1.4375493334285296e-05, - "loss": 1.3257, + "epoch": 2.84, + "learning_rate": 1.7164e-05, + "loss": 1.2046, "step": 354500 }, { - "epoch": 0.84, - "learning_rate": 1.4367560320652411e-05, - "loss": 1.3322, + "epoch": 2.84, + "learning_rate": 1.7160000000000002e-05, + "loss": 1.2061, "step": 355000 }, { - "epoch": 0.85, - "learning_rate": 1.4359627307019527e-05, - "loss": 1.3058, + "epoch": 2.84, + "learning_rate": 1.7156000000000003e-05, + "loss": 1.2079, "step": 355500 }, { - "epoch": 0.85, - "learning_rate": 1.4351694293386645e-05, - "loss": 1.2913, + "epoch": 2.85, + "learning_rate": 1.7152e-05, + "loss": 1.2075, "step": 356000 }, { - "epoch": 0.85, - "learning_rate": 1.434376127975376e-05, - "loss": 1.3154, + "epoch": 2.85, + "learning_rate": 1.7148000000000003e-05, + "loss": 1.2085, "step": 356500 }, { - "epoch": 0.85, - "learning_rate": 1.4335828266120877e-05, - "loss": 1.3069, + "epoch": 2.86, + "learning_rate": 1.7144e-05, + "loss": 1.2058, "step": 357000 }, { - "epoch": 0.85, - "learning_rate": 1.4327895252487994e-05, - "loss": 1.3171, + "epoch": 2.86, + "learning_rate": 1.7140000000000002e-05, + "loss": 1.2032, "step": 357500 }, { - "epoch": 0.85, - "learning_rate": 1.431996223885511e-05, - "loss": 1.3136, + "epoch": 2.86, + "learning_rate": 1.7136e-05, + "loss": 1.197, "step": 358000 }, { - "epoch": 0.85, - "learning_rate": 1.4312029225222224e-05, - "loss": 1.3125, + "epoch": 2.87, + "learning_rate": 1.7132e-05, + "loss": 1.2035, "step": 358500 }, { - "epoch": 0.85, - "learning_rate": 1.430409621158934e-05, - "loss": 1.3019, + "epoch": 2.87, + "learning_rate": 1.7128000000000003e-05, + "loss": 1.2008, "step": 359000 }, { - "epoch": 0.86, - "learning_rate": 1.4296163197956456e-05, - "loss": 1.3075, + "epoch": 2.88, + "learning_rate": 1.7124e-05, + "loss": 1.2024, "step": 359500 }, { - "epoch": 0.86, - "learning_rate": 1.4288230184323573e-05, - "loss": 1.2988, + "epoch": 2.88, + "learning_rate": 1.7120000000000002e-05, + "loss": 1.1994, "step": 360000 }, { - "epoch": 0.86, - "learning_rate": 1.428029717069069e-05, - "loss": 1.307, + "epoch": 2.88, + "learning_rate": 1.7116e-05, + "loss": 1.2064, "step": 360500 }, { - "epoch": 0.86, - "learning_rate": 1.4272364157057805e-05, - "loss": 1.3014, + "epoch": 2.89, + "learning_rate": 1.7112e-05, + "loss": 1.2014, "step": 361000 }, { - "epoch": 0.86, - "learning_rate": 1.4264431143424921e-05, - "loss": 1.2961, + "epoch": 2.89, + "learning_rate": 1.7108000000000003e-05, + "loss": 1.2051, "step": 361500 }, { - "epoch": 0.86, - "learning_rate": 1.4256498129792039e-05, - "loss": 1.293, + "epoch": 2.9, + "learning_rate": 1.7104e-05, + "loss": 1.2027, "step": 362000 }, { - "epoch": 0.86, - "learning_rate": 1.4248565116159154e-05, - "loss": 1.3178, + "epoch": 2.9, + "learning_rate": 1.7100000000000002e-05, + "loss": 1.2051, "step": 362500 }, { - "epoch": 0.86, - "learning_rate": 1.4240632102526269e-05, - "loss": 1.3029, + "epoch": 2.9, + "learning_rate": 1.7096e-05, + "loss": 1.2101, "step": 363000 }, { - "epoch": 0.87, - "learning_rate": 1.4232699088893384e-05, - "loss": 1.2999, + "epoch": 2.91, + "learning_rate": 1.7092000000000002e-05, + "loss": 1.2065, "step": 363500 }, { - "epoch": 0.87, - "learning_rate": 1.42247660752605e-05, - "loss": 1.312, + "epoch": 2.91, + "learning_rate": 1.7088000000000003e-05, + "loss": 1.2112, "step": 364000 }, { - "epoch": 0.87, - "learning_rate": 1.4216833061627618e-05, - "loss": 1.3113, + "epoch": 2.92, + "learning_rate": 1.7084e-05, + "loss": 1.2, "step": 364500 }, { - "epoch": 0.87, - "learning_rate": 1.4208900047994734e-05, - "loss": 1.3054, + "epoch": 2.92, + "learning_rate": 1.7080000000000002e-05, + "loss": 1.1968, "step": 365000 }, { - "epoch": 0.87, - "learning_rate": 1.420096703436185e-05, - "loss": 1.3112, + "epoch": 2.92, + "learning_rate": 1.7076e-05, + "loss": 1.2012, "step": 365500 }, { - "epoch": 0.87, - "learning_rate": 1.4193034020728966e-05, - "loss": 1.3033, + "epoch": 2.93, + "learning_rate": 1.7072000000000002e-05, + "loss": 1.2013, "step": 366000 }, { - "epoch": 0.87, - "learning_rate": 1.4185101007096083e-05, - "loss": 1.3126, + "epoch": 2.93, + "learning_rate": 1.7068000000000003e-05, + "loss": 1.2086, "step": 366500 }, { - "epoch": 0.87, - "learning_rate": 1.4177167993463199e-05, - "loss": 1.2953, + "epoch": 2.94, + "learning_rate": 1.7064e-05, + "loss": 1.2017, "step": 367000 }, { - "epoch": 0.87, - "learning_rate": 1.4169234979830313e-05, - "loss": 1.3032, + "epoch": 2.94, + "learning_rate": 1.7060000000000003e-05, + "loss": 1.1961, "step": 367500 }, { - "epoch": 0.88, - "learning_rate": 1.4161301966197429e-05, - "loss": 1.3037, + "epoch": 2.94, + "learning_rate": 1.7056e-05, + "loss": 1.2033, "step": 368000 }, { - "epoch": 0.88, - "learning_rate": 1.4153368952564547e-05, - "loss": 1.3037, + "epoch": 2.95, + "learning_rate": 1.7052000000000002e-05, + "loss": 1.205, "step": 368500 }, { - "epoch": 0.88, - "learning_rate": 1.4145435938931662e-05, - "loss": 1.3157, + "epoch": 2.95, + "learning_rate": 1.7048000000000003e-05, + "loss": 1.1949, "step": 369000 }, { - "epoch": 0.88, - "learning_rate": 1.4137502925298778e-05, - "loss": 1.2962, + "epoch": 2.96, + "learning_rate": 1.7044e-05, + "loss": 1.2033, "step": 369500 }, { - "epoch": 0.88, - "learning_rate": 1.4129569911665894e-05, - "loss": 1.3041, + "epoch": 2.96, + "learning_rate": 1.704e-05, + "loss": 1.1956, "step": 370000 }, { - "epoch": 0.88, - "learning_rate": 1.4121636898033012e-05, - "loss": 1.3162, + "epoch": 2.96, + "learning_rate": 1.7036e-05, + "loss": 1.1983, "step": 370500 }, { - "epoch": 0.88, - "learning_rate": 1.4113703884400128e-05, - "loss": 1.3037, + "epoch": 2.97, + "learning_rate": 1.7032000000000002e-05, + "loss": 1.1971, "step": 371000 }, { - "epoch": 0.88, - "learning_rate": 1.4105770870767243e-05, - "loss": 1.3072, + "epoch": 2.97, + "learning_rate": 1.7028000000000004e-05, + "loss": 1.194, "step": 371500 }, { - "epoch": 0.89, - "learning_rate": 1.4097837857134358e-05, - "loss": 1.2938, + "epoch": 2.98, + "learning_rate": 1.7024e-05, + "loss": 1.2097, "step": 372000 }, { - "epoch": 0.89, - "learning_rate": 1.4089904843501473e-05, - "loss": 1.3014, + "epoch": 2.98, + "learning_rate": 1.702e-05, + "loss": 1.2002, "step": 372500 }, { - "epoch": 0.89, - "learning_rate": 1.4081971829868591e-05, - "loss": 1.3023, + "epoch": 2.98, + "learning_rate": 1.7016e-05, + "loss": 1.1966, "step": 373000 }, { - "epoch": 0.89, - "learning_rate": 1.4074038816235707e-05, - "loss": 1.3017, + "epoch": 2.99, + "learning_rate": 1.7012000000000002e-05, + "loss": 1.1937, "step": 373500 }, { - "epoch": 0.89, - "learning_rate": 1.4066105802602823e-05, - "loss": 1.3143, + "epoch": 2.99, + "learning_rate": 1.7008000000000004e-05, + "loss": 1.1968, "step": 374000 }, { - "epoch": 0.89, - "learning_rate": 1.4058172788969939e-05, - "loss": 1.31, + "epoch": 3.0, + "learning_rate": 1.7004000000000002e-05, + "loss": 1.195, "step": 374500 }, { - "epoch": 0.89, - "learning_rate": 1.4050239775337056e-05, - "loss": 1.3034, + "epoch": 3.0, + "learning_rate": 1.7e-05, + "loss": 1.1981, "step": 375000 }, { - "epoch": 0.89, - "learning_rate": 1.4042306761704172e-05, - "loss": 1.3064, + "epoch": 3.0, + "eval_loss": 1.144347906112671, + "eval_runtime": 81.5337, + "eval_samples_per_second": 164.509, + "eval_steps_per_second": 2.576, + "step": 375000 + }, + { + "epoch": 3.0, + "learning_rate": 1.6996e-05, + "loss": 1.1964, "step": 375500 }, { - "epoch": 0.89, - "learning_rate": 1.4034373748071288e-05, - "loss": 1.3006, + "epoch": 3.01, + "learning_rate": 1.6992000000000003e-05, + "loss": 1.1956, "step": 376000 }, { - "epoch": 0.9, - "learning_rate": 1.4026440734438402e-05, - "loss": 1.3022, + "epoch": 3.01, + "learning_rate": 1.6988e-05, + "loss": 1.197, "step": 376500 }, { - "epoch": 0.9, - "learning_rate": 1.4018507720805518e-05, - "loss": 1.2904, + "epoch": 3.02, + "learning_rate": 1.6984000000000002e-05, + "loss": 1.1958, "step": 377000 }, { - "epoch": 0.9, - "learning_rate": 1.4010574707172635e-05, - "loss": 1.3114, + "epoch": 3.02, + "learning_rate": 1.698e-05, + "loss": 1.1972, "step": 377500 }, { - "epoch": 0.9, - "learning_rate": 1.4002641693539751e-05, - "loss": 1.3129, + "epoch": 3.02, + "learning_rate": 1.6976e-05, + "loss": 1.1957, "step": 378000 }, { - "epoch": 0.9, - "learning_rate": 1.3994708679906867e-05, - "loss": 1.3128, + "epoch": 3.03, + "learning_rate": 1.6972000000000003e-05, + "loss": 1.1937, "step": 378500 }, { - "epoch": 0.9, - "learning_rate": 1.3986775666273985e-05, - "loss": 1.3276, + "epoch": 3.03, + "learning_rate": 1.6968e-05, + "loss": 1.1905, "step": 379000 }, { - "epoch": 0.9, - "learning_rate": 1.39788426526411e-05, - "loss": 1.3104, + "epoch": 3.04, + "learning_rate": 1.6964e-05, + "loss": 1.1894, "step": 379500 }, { - "epoch": 0.9, - "learning_rate": 1.3970909639008216e-05, - "loss": 1.3133, + "epoch": 3.04, + "learning_rate": 1.696e-05, + "loss": 1.2001, "step": 380000 }, { - "epoch": 0.91, - "learning_rate": 1.3962976625375332e-05, - "loss": 1.3067, + "epoch": 3.04, + "learning_rate": 1.6956e-05, + "loss": 1.1898, "step": 380500 }, { - "epoch": 0.91, - "learning_rate": 1.3955043611742447e-05, - "loss": 1.288, + "epoch": 3.05, + "learning_rate": 1.6952000000000003e-05, + "loss": 1.1942, "step": 381000 }, { - "epoch": 0.91, - "learning_rate": 1.3947110598109564e-05, - "loss": 1.2915, + "epoch": 3.05, + "learning_rate": 1.6948e-05, + "loss": 1.1924, "step": 381500 }, { - "epoch": 0.91, - "learning_rate": 1.393917758447668e-05, - "loss": 1.3016, + "epoch": 3.06, + "learning_rate": 1.6944e-05, + "loss": 1.1963, "step": 382000 }, { - "epoch": 0.91, - "learning_rate": 1.3931244570843796e-05, - "loss": 1.3032, + "epoch": 3.06, + "learning_rate": 1.694e-05, + "loss": 1.1896, "step": 382500 }, { - "epoch": 0.91, - "learning_rate": 1.3923311557210912e-05, - "loss": 1.3005, + "epoch": 3.06, + "learning_rate": 1.6936000000000002e-05, + "loss": 1.2003, "step": 383000 }, { - "epoch": 0.91, - "learning_rate": 1.391537854357803e-05, - "loss": 1.2996, + "epoch": 3.07, + "learning_rate": 1.6932000000000003e-05, + "loss": 1.1931, "step": 383500 }, { - "epoch": 0.91, - "learning_rate": 1.3907445529945145e-05, - "loss": 1.2896, + "epoch": 3.07, + "learning_rate": 1.6928e-05, + "loss": 1.1962, "step": 384000 }, { - "epoch": 0.92, - "learning_rate": 1.3899512516312261e-05, - "loss": 1.3101, + "epoch": 3.08, + "learning_rate": 1.6924e-05, + "loss": 1.1963, "step": 384500 }, { - "epoch": 0.92, - "learning_rate": 1.3891579502679375e-05, - "loss": 1.2886, + "epoch": 3.08, + "learning_rate": 1.692e-05, + "loss": 1.1862, "step": 385000 }, { - "epoch": 0.92, - "learning_rate": 1.3883646489046491e-05, - "loss": 1.295, + "epoch": 3.08, + "learning_rate": 1.6916000000000002e-05, + "loss": 1.1885, "step": 385500 }, { - "epoch": 0.92, - "learning_rate": 1.3875713475413609e-05, - "loss": 1.3001, + "epoch": 3.09, + "learning_rate": 1.6912000000000003e-05, + "loss": 1.1898, "step": 386000 }, { - "epoch": 0.92, - "learning_rate": 1.3867780461780724e-05, - "loss": 1.3173, + "epoch": 3.09, + "learning_rate": 1.6908e-05, + "loss": 1.1947, "step": 386500 }, { - "epoch": 0.92, - "learning_rate": 1.385984744814784e-05, - "loss": 1.3049, + "epoch": 3.1, + "learning_rate": 1.6904e-05, + "loss": 1.191, "step": 387000 }, { - "epoch": 0.92, - "learning_rate": 1.3851914434514956e-05, - "loss": 1.2951, + "epoch": 3.1, + "learning_rate": 1.69e-05, + "loss": 1.1958, "step": 387500 }, { - "epoch": 0.92, - "learning_rate": 1.3843981420882074e-05, - "loss": 1.3051, + "epoch": 3.1, + "learning_rate": 1.6896000000000002e-05, + "loss": 1.1936, "step": 388000 }, { - "epoch": 0.92, - "learning_rate": 1.383604840724919e-05, - "loss": 1.2801, + "epoch": 3.11, + "learning_rate": 1.6892e-05, + "loss": 1.1912, "step": 388500 }, { - "epoch": 0.93, - "learning_rate": 1.3828115393616305e-05, - "loss": 1.2945, + "epoch": 3.11, + "learning_rate": 1.6888e-05, + "loss": 1.1888, "step": 389000 }, { - "epoch": 0.93, - "learning_rate": 1.382018237998342e-05, - "loss": 1.3094, + "epoch": 3.12, + "learning_rate": 1.6884e-05, + "loss": 1.201, "step": 389500 }, { - "epoch": 0.93, - "learning_rate": 1.3812249366350537e-05, - "loss": 1.2932, + "epoch": 3.12, + "learning_rate": 1.688e-05, + "loss": 1.1902, "step": 390000 }, { - "epoch": 0.93, - "learning_rate": 1.3804316352717653e-05, - "loss": 1.3115, + "epoch": 3.12, + "learning_rate": 1.6876000000000002e-05, + "loss": 1.1854, "step": 390500 }, { - "epoch": 0.93, - "learning_rate": 1.3796383339084769e-05, - "loss": 1.2897, + "epoch": 3.13, + "learning_rate": 1.6872e-05, + "loss": 1.1942, "step": 391000 }, { - "epoch": 0.93, - "learning_rate": 1.3788450325451885e-05, - "loss": 1.3055, + "epoch": 3.13, + "learning_rate": 1.6868000000000002e-05, + "loss": 1.1923, "step": 391500 }, { - "epoch": 0.93, - "learning_rate": 1.3780517311819002e-05, - "loss": 1.2972, + "epoch": 3.14, + "learning_rate": 1.6864e-05, + "loss": 1.1982, "step": 392000 }, { - "epoch": 0.93, - "learning_rate": 1.3772584298186118e-05, - "loss": 1.306, + "epoch": 3.14, + "learning_rate": 1.686e-05, + "loss": 1.1952, "step": 392500 }, { - "epoch": 0.94, - "learning_rate": 1.3764651284553234e-05, - "loss": 1.2972, + "epoch": 3.14, + "learning_rate": 1.6856000000000003e-05, + "loss": 1.1924, "step": 393000 }, { - "epoch": 0.94, - "learning_rate": 1.375671827092035e-05, - "loss": 1.3171, + "epoch": 3.15, + "learning_rate": 1.6852e-05, + "loss": 1.1905, "step": 393500 }, { - "epoch": 0.94, - "learning_rate": 1.3748785257287464e-05, - "loss": 1.2894, + "epoch": 3.15, + "learning_rate": 1.6848000000000002e-05, + "loss": 1.1935, "step": 394000 }, { - "epoch": 0.94, - "learning_rate": 1.3740852243654582e-05, - "loss": 1.2987, + "epoch": 3.16, + "learning_rate": 1.6844e-05, + "loss": 1.1952, "step": 394500 }, { - "epoch": 0.94, - "learning_rate": 1.3732919230021698e-05, - "loss": 1.3164, + "epoch": 3.16, + "learning_rate": 1.684e-05, + "loss": 1.192, "step": 395000 }, { - "epoch": 0.94, - "learning_rate": 1.3724986216388813e-05, - "loss": 1.2989, + "epoch": 3.16, + "learning_rate": 1.6836000000000003e-05, + "loss": 1.1836, "step": 395500 }, { - "epoch": 0.94, - "learning_rate": 1.371705320275593e-05, - "loss": 1.2899, + "epoch": 3.17, + "learning_rate": 1.6832e-05, + "loss": 1.1882, "step": 396000 }, { - "epoch": 0.94, - "learning_rate": 1.3709120189123047e-05, - "loss": 1.2895, + "epoch": 3.17, + "learning_rate": 1.6828000000000002e-05, + "loss": 1.1887, "step": 396500 }, { - "epoch": 0.94, - "learning_rate": 1.3701187175490163e-05, - "loss": 1.3058, + "epoch": 3.18, + "learning_rate": 1.6824e-05, + "loss": 1.1919, "step": 397000 }, { - "epoch": 0.95, - "learning_rate": 1.3693254161857279e-05, - "loss": 1.3147, + "epoch": 3.18, + "learning_rate": 1.682e-05, + "loss": 1.187, "step": 397500 }, { - "epoch": 0.95, - "learning_rate": 1.3685321148224396e-05, - "loss": 1.3051, + "epoch": 3.18, + "learning_rate": 1.6816e-05, + "loss": 1.1905, "step": 398000 }, { - "epoch": 0.95, - "learning_rate": 1.3677388134591509e-05, - "loss": 1.3063, + "epoch": 3.19, + "learning_rate": 1.6812e-05, + "loss": 1.1838, "step": 398500 }, { - "epoch": 0.95, - "learning_rate": 1.3669455120958626e-05, - "loss": 1.2834, + "epoch": 3.19, + "learning_rate": 1.6808000000000002e-05, + "loss": 1.1948, "step": 399000 }, { - "epoch": 0.95, - "learning_rate": 1.3661522107325742e-05, - "loss": 1.3036, + "epoch": 3.2, + "learning_rate": 1.6804e-05, + "loss": 1.1851, "step": 399500 }, { - "epoch": 0.95, - "learning_rate": 1.3653589093692858e-05, - "loss": 1.2919, + "epoch": 3.2, + "learning_rate": 1.6800000000000002e-05, + "loss": 1.1852, "step": 400000 }, { - "epoch": 0.95, - "learning_rate": 1.3645656080059975e-05, - "loss": 1.2906, + "epoch": 3.2, + "learning_rate": 1.6796e-05, + "loss": 1.1913, "step": 400500 }, { - "epoch": 0.95, - "learning_rate": 1.3637723066427091e-05, - "loss": 1.3035, + "epoch": 3.21, + "learning_rate": 1.6792e-05, + "loss": 1.1823, "step": 401000 }, { - "epoch": 0.96, - "learning_rate": 1.3629790052794207e-05, - "loss": 1.3112, + "epoch": 3.21, + "learning_rate": 1.6788000000000003e-05, + "loss": 1.1902, "step": 401500 }, { - "epoch": 0.96, - "learning_rate": 1.3621857039161323e-05, - "loss": 1.3068, + "epoch": 3.22, + "learning_rate": 1.6784e-05, + "loss": 1.1918, "step": 402000 }, { - "epoch": 0.96, - "learning_rate": 1.361392402552844e-05, - "loss": 1.2963, + "epoch": 3.22, + "learning_rate": 1.6780000000000002e-05, + "loss": 1.1869, "step": 402500 }, { - "epoch": 0.96, - "learning_rate": 1.3605991011895555e-05, - "loss": 1.283, + "epoch": 3.22, + "learning_rate": 1.6776e-05, + "loss": 1.1833, "step": 403000 }, { - "epoch": 0.96, - "learning_rate": 1.359805799826267e-05, - "loss": 1.2828, + "epoch": 3.23, + "learning_rate": 1.6772e-05, + "loss": 1.1936, "step": 403500 }, { - "epoch": 0.96, - "learning_rate": 1.3590124984629786e-05, - "loss": 1.306, + "epoch": 3.23, + "learning_rate": 1.6768000000000003e-05, + "loss": 1.1898, "step": 404000 }, { - "epoch": 0.96, - "learning_rate": 1.3582191970996902e-05, - "loss": 1.2986, + "epoch": 3.24, + "learning_rate": 1.6764e-05, + "loss": 1.1863, "step": 404500 }, { - "epoch": 0.96, - "learning_rate": 1.357425895736402e-05, - "loss": 1.2872, + "epoch": 3.24, + "learning_rate": 1.6760000000000002e-05, + "loss": 1.1815, "step": 405000 }, { - "epoch": 0.97, - "learning_rate": 1.3566325943731136e-05, - "loss": 1.2923, + "epoch": 3.24, + "learning_rate": 1.6756e-05, + "loss": 1.1915, "step": 405500 }, { - "epoch": 0.97, - "learning_rate": 1.3558392930098252e-05, - "loss": 1.2967, + "epoch": 3.25, + "learning_rate": 1.6752e-05, + "loss": 1.1861, "step": 406000 }, { - "epoch": 0.97, - "learning_rate": 1.3550459916465367e-05, - "loss": 1.2779, + "epoch": 3.25, + "learning_rate": 1.6748000000000003e-05, + "loss": 1.187, "step": 406500 }, { - "epoch": 0.97, - "learning_rate": 1.3542526902832485e-05, - "loss": 1.2828, + "epoch": 3.26, + "learning_rate": 1.6744e-05, + "loss": 1.1894, "step": 407000 }, { - "epoch": 0.97, - "learning_rate": 1.35345938891996e-05, - "loss": 1.2953, + "epoch": 3.26, + "learning_rate": 1.6740000000000002e-05, + "loss": 1.191, "step": 407500 }, { - "epoch": 0.97, - "learning_rate": 1.3526660875566715e-05, - "loss": 1.2748, + "epoch": 3.26, + "learning_rate": 1.6736e-05, + "loss": 1.1859, "step": 408000 }, { - "epoch": 0.97, - "learning_rate": 1.3518727861933831e-05, - "loss": 1.2979, + "epoch": 3.27, + "learning_rate": 1.6732000000000002e-05, + "loss": 1.1908, "step": 408500 }, { - "epoch": 0.97, - "learning_rate": 1.3510794848300949e-05, - "loss": 1.2942, + "epoch": 3.27, + "learning_rate": 1.6728000000000003e-05, + "loss": 1.1944, "step": 409000 }, { - "epoch": 0.97, - "learning_rate": 1.3502861834668064e-05, - "loss": 1.3034, + "epoch": 3.28, + "learning_rate": 1.6724e-05, + "loss": 1.1877, "step": 409500 }, { - "epoch": 0.98, - "learning_rate": 1.349492882103518e-05, - "loss": 1.3038, + "epoch": 3.28, + "learning_rate": 1.672e-05, + "loss": 1.1828, "step": 410000 }, { - "epoch": 0.98, - "learning_rate": 1.3486995807402296e-05, - "loss": 1.3012, + "epoch": 3.28, + "learning_rate": 1.6716e-05, + "loss": 1.1844, "step": 410500 }, { - "epoch": 0.98, - "learning_rate": 1.3479062793769414e-05, - "loss": 1.307, + "epoch": 3.29, + "learning_rate": 1.6712000000000002e-05, + "loss": 1.1919, "step": 411000 }, { - "epoch": 0.98, - "learning_rate": 1.347112978013653e-05, - "loss": 1.296, + "epoch": 3.29, + "learning_rate": 1.6708000000000003e-05, + "loss": 1.1876, "step": 411500 }, { - "epoch": 0.98, - "learning_rate": 1.3463196766503644e-05, - "loss": 1.3014, + "epoch": 3.3, + "learning_rate": 1.6704e-05, + "loss": 1.1812, "step": 412000 }, { - "epoch": 0.98, - "learning_rate": 1.345526375287076e-05, - "loss": 1.2976, + "epoch": 3.3, + "learning_rate": 1.67e-05, + "loss": 1.1826, "step": 412500 }, { - "epoch": 0.98, - "learning_rate": 1.3447330739237875e-05, - "loss": 1.3056, + "epoch": 3.3, + "learning_rate": 1.6696e-05, + "loss": 1.1894, "step": 413000 }, { - "epoch": 0.98, - "learning_rate": 1.3439397725604993e-05, - "loss": 1.2974, + "epoch": 3.31, + "learning_rate": 1.6692000000000002e-05, + "loss": 1.1832, "step": 413500 }, { - "epoch": 0.99, - "learning_rate": 1.3431464711972109e-05, - "loss": 1.287, + "epoch": 3.31, + "learning_rate": 1.6688000000000004e-05, + "loss": 1.1883, "step": 414000 }, { - "epoch": 0.99, - "learning_rate": 1.3423531698339225e-05, - "loss": 1.2929, + "epoch": 3.32, + "learning_rate": 1.6684e-05, + "loss": 1.1826, "step": 414500 }, { - "epoch": 0.99, - "learning_rate": 1.341559868470634e-05, - "loss": 1.2903, + "epoch": 3.32, + "learning_rate": 1.668e-05, + "loss": 1.1855, "step": 415000 }, { - "epoch": 0.99, - "learning_rate": 1.3407665671073458e-05, - "loss": 1.2967, + "epoch": 3.32, + "learning_rate": 1.6676e-05, + "loss": 1.187, "step": 415500 }, { - "epoch": 0.99, - "learning_rate": 1.3399732657440574e-05, - "loss": 1.2881, + "epoch": 3.33, + "learning_rate": 1.6672000000000002e-05, + "loss": 1.1851, "step": 416000 }, { - "epoch": 0.99, - "learning_rate": 1.3391799643807688e-05, - "loss": 1.2844, + "epoch": 3.33, + "learning_rate": 1.6668e-05, + "loss": 1.1814, "step": 416500 }, { - "epoch": 0.99, - "learning_rate": 1.3383866630174804e-05, - "loss": 1.2803, + "epoch": 3.34, + "learning_rate": 1.6664000000000002e-05, + "loss": 1.1778, "step": 417000 }, { - "epoch": 0.99, - "learning_rate": 1.337593361654192e-05, - "loss": 1.2975, + "epoch": 3.34, + "learning_rate": 1.666e-05, + "loss": 1.1759, "step": 417500 }, { - "epoch": 0.99, - "learning_rate": 1.3368000602909037e-05, - "loss": 1.2977, + "epoch": 3.34, + "learning_rate": 1.6656e-05, + "loss": 1.1782, "step": 418000 }, { - "epoch": 1.0, - "learning_rate": 1.3360067589276153e-05, - "loss": 1.2968, + "epoch": 3.35, + "learning_rate": 1.6652000000000003e-05, + "loss": 1.1881, "step": 418500 }, { - "epoch": 1.0, - "learning_rate": 1.335213457564327e-05, - "loss": 1.2808, + "epoch": 3.35, + "learning_rate": 1.6648e-05, + "loss": 1.1909, "step": 419000 }, { - "epoch": 1.0, - "learning_rate": 1.3344201562010387e-05, - "loss": 1.291, + "epoch": 3.36, + "learning_rate": 1.6644000000000002e-05, + "loss": 1.1796, "step": 419500 }, { - "epoch": 1.0, - "learning_rate": 1.3336268548377503e-05, - "loss": 1.2857, + "epoch": 3.36, + "learning_rate": 1.664e-05, + "loss": 1.1754, "step": 420000 }, { - "epoch": 1.0, - "eval_loss": 1.2632273435592651, - "eval_runtime": 3622.7986, - "eval_samples_per_second": 366.442, - "eval_steps_per_second": 22.903, - "step": 420185 - }, - { - "epoch": 1.0, - "learning_rate": 1.3328335534744618e-05, - "loss": 1.2928, + "epoch": 3.36, + "learning_rate": 1.6636e-05, + "loss": 1.1875, "step": 420500 }, { - "epoch": 1.0, - "learning_rate": 1.3320402521111733e-05, - "loss": 1.274, + "epoch": 3.37, + "learning_rate": 1.6632000000000003e-05, + "loss": 1.1817, "step": 421000 }, { - "epoch": 1.0, - "learning_rate": 1.3312469507478849e-05, - "loss": 1.2829, + "epoch": 3.37, + "learning_rate": 1.6628e-05, + "loss": 1.1857, "step": 421500 }, { - "epoch": 1.0, - "learning_rate": 1.3304536493845966e-05, - "loss": 1.2867, + "epoch": 3.38, + "learning_rate": 1.6624000000000002e-05, + "loss": 1.1819, "step": 422000 }, { - "epoch": 1.01, - "learning_rate": 1.3296603480213082e-05, - "loss": 1.2886, + "epoch": 3.38, + "learning_rate": 1.662e-05, + "loss": 1.1793, "step": 422500 }, { - "epoch": 1.01, - "learning_rate": 1.3288670466580198e-05, - "loss": 1.2908, + "epoch": 3.38, + "learning_rate": 1.6616e-05, + "loss": 1.1847, "step": 423000 }, { - "epoch": 1.01, - "learning_rate": 1.3280737452947314e-05, - "loss": 1.2776, + "epoch": 3.39, + "learning_rate": 1.6612000000000003e-05, + "loss": 1.185, "step": 423500 }, { - "epoch": 1.01, - "learning_rate": 1.3272804439314431e-05, - "loss": 1.2855, + "epoch": 3.39, + "learning_rate": 1.6608e-05, + "loss": 1.1784, "step": 424000 }, { - "epoch": 1.01, - "learning_rate": 1.3264871425681547e-05, - "loss": 1.2843, + "epoch": 3.4, + "learning_rate": 1.6604000000000002e-05, + "loss": 1.1818, "step": 424500 }, { - "epoch": 1.01, - "learning_rate": 1.3256938412048663e-05, - "loss": 1.276, + "epoch": 3.4, + "learning_rate": 1.66e-05, + "loss": 1.1802, "step": 425000 }, { - "epoch": 1.01, - "learning_rate": 1.3249005398415777e-05, - "loss": 1.2771, + "epoch": 3.4, + "learning_rate": 1.6596000000000002e-05, + "loss": 1.177, "step": 425500 }, { - "epoch": 1.01, - "learning_rate": 1.3241072384782893e-05, - "loss": 1.291, + "epoch": 3.41, + "learning_rate": 1.6592000000000003e-05, + "loss": 1.1916, "step": 426000 }, { - "epoch": 1.02, - "learning_rate": 1.323313937115001e-05, - "loss": 1.2731, + "epoch": 3.41, + "learning_rate": 1.6588e-05, + "loss": 1.1828, "step": 426500 }, { - "epoch": 1.02, - "learning_rate": 1.3225206357517126e-05, - "loss": 1.2979, + "epoch": 3.42, + "learning_rate": 1.6584000000000002e-05, + "loss": 1.1867, "step": 427000 }, { - "epoch": 1.02, - "learning_rate": 1.3217273343884242e-05, - "loss": 1.3006, + "epoch": 3.42, + "learning_rate": 1.658e-05, + "loss": 1.1824, "step": 427500 }, { - "epoch": 1.02, - "learning_rate": 1.3209340330251358e-05, - "loss": 1.2872, + "epoch": 3.42, + "learning_rate": 1.6576000000000002e-05, + "loss": 1.1842, "step": 428000 }, { - "epoch": 1.02, - "learning_rate": 1.3201407316618476e-05, - "loss": 1.2708, + "epoch": 3.43, + "learning_rate": 1.6572e-05, + "loss": 1.1784, "step": 428500 }, { - "epoch": 1.02, - "learning_rate": 1.3193474302985592e-05, - "loss": 1.2794, + "epoch": 3.43, + "learning_rate": 1.6568e-05, + "loss": 1.1784, "step": 429000 }, { - "epoch": 1.02, - "learning_rate": 1.3185541289352707e-05, - "loss": 1.2916, + "epoch": 3.44, + "learning_rate": 1.6564000000000003e-05, + "loss": 1.1832, "step": 429500 }, { - "epoch": 1.02, - "learning_rate": 1.3177608275719822e-05, - "loss": 1.2796, + "epoch": 3.44, + "learning_rate": 1.656e-05, + "loss": 1.1866, "step": 430000 }, { - "epoch": 1.02, - "learning_rate": 1.316967526208694e-05, - "loss": 1.2867, + "epoch": 3.44, + "learning_rate": 1.6556000000000002e-05, + "loss": 1.1874, "step": 430500 }, { - "epoch": 1.03, - "learning_rate": 1.3161742248454055e-05, - "loss": 1.2937, + "epoch": 3.45, + "learning_rate": 1.6552e-05, + "loss": 1.1833, "step": 431000 }, { - "epoch": 1.03, - "learning_rate": 1.3153809234821171e-05, - "loss": 1.2805, + "epoch": 3.45, + "learning_rate": 1.6548e-05, + "loss": 1.177, "step": 431500 }, { - "epoch": 1.03, - "learning_rate": 1.3145876221188287e-05, - "loss": 1.2908, + "epoch": 3.46, + "learning_rate": 1.6544000000000003e-05, + "loss": 1.186, "step": 432000 }, { - "epoch": 1.03, - "learning_rate": 1.3137943207555404e-05, - "loss": 1.2829, + "epoch": 3.46, + "learning_rate": 1.654e-05, + "loss": 1.1775, "step": 432500 }, { - "epoch": 1.03, - "learning_rate": 1.313001019392252e-05, - "loss": 1.2626, + "epoch": 3.46, + "learning_rate": 1.6536000000000002e-05, + "loss": 1.1769, "step": 433000 }, { - "epoch": 1.03, - "learning_rate": 1.3122077180289636e-05, - "loss": 1.2799, + "epoch": 3.47, + "learning_rate": 1.6532e-05, + "loss": 1.1782, "step": 433500 }, { - "epoch": 1.03, - "learning_rate": 1.311414416665675e-05, - "loss": 1.282, + "epoch": 3.47, + "learning_rate": 1.6528e-05, + "loss": 1.176, "step": 434000 }, { - "epoch": 1.03, - "learning_rate": 1.3106211153023866e-05, - "loss": 1.2944, + "epoch": 3.48, + "learning_rate": 1.6524000000000003e-05, + "loss": 1.1798, "step": 434500 }, { - "epoch": 1.04, - "learning_rate": 1.3098278139390984e-05, - "loss": 1.2696, + "epoch": 3.48, + "learning_rate": 1.652e-05, + "loss": 1.1817, "step": 435000 }, { - "epoch": 1.04, - "learning_rate": 1.30903451257581e-05, - "loss": 1.2904, + "epoch": 3.48, + "learning_rate": 1.6516000000000002e-05, + "loss": 1.1744, "step": 435500 }, { - "epoch": 1.04, - "learning_rate": 1.3082412112125215e-05, - "loss": 1.2845, + "epoch": 3.49, + "learning_rate": 1.6512e-05, + "loss": 1.1763, "step": 436000 }, { - "epoch": 1.04, - "learning_rate": 1.3074479098492331e-05, - "loss": 1.2837, + "epoch": 3.49, + "learning_rate": 1.6508000000000002e-05, + "loss": 1.1829, "step": 436500 }, { - "epoch": 1.04, - "learning_rate": 1.3066546084859449e-05, - "loss": 1.2837, + "epoch": 3.5, + "learning_rate": 1.6504000000000003e-05, + "loss": 1.1812, "step": 437000 }, { - "epoch": 1.04, - "learning_rate": 1.3058613071226565e-05, - "loss": 1.2696, + "epoch": 3.5, + "learning_rate": 1.65e-05, + "loss": 1.1802, "step": 437500 }, { - "epoch": 1.04, - "learning_rate": 1.305068005759368e-05, - "loss": 1.2887, + "epoch": 3.5, + "learning_rate": 1.6496e-05, + "loss": 1.1792, "step": 438000 }, { - "epoch": 1.04, - "learning_rate": 1.3042747043960795e-05, - "loss": 1.2719, + "epoch": 3.51, + "learning_rate": 1.6492e-05, + "loss": 1.1719, "step": 438500 }, { - "epoch": 1.04, - "learning_rate": 1.303481403032791e-05, - "loss": 1.2717, + "epoch": 3.51, + "learning_rate": 1.6488000000000002e-05, + "loss": 1.1821, "step": 439000 }, { - "epoch": 1.05, - "learning_rate": 1.3026881016695028e-05, - "loss": 1.2922, + "epoch": 3.52, + "learning_rate": 1.6484000000000003e-05, + "loss": 1.1748, "step": 439500 }, { - "epoch": 1.05, - "learning_rate": 1.3018948003062144e-05, - "loss": 1.2643, + "epoch": 3.52, + "learning_rate": 1.648e-05, + "loss": 1.1721, "step": 440000 }, { - "epoch": 1.05, - "learning_rate": 1.301101498942926e-05, - "loss": 1.2637, + "epoch": 3.52, + "learning_rate": 1.6476e-05, + "loss": 1.1778, "step": 440500 }, { - "epoch": 1.05, - "learning_rate": 1.3003081975796377e-05, - "loss": 1.2732, + "epoch": 3.53, + "learning_rate": 1.6472e-05, + "loss": 1.1827, "step": 441000 }, { - "epoch": 1.05, - "learning_rate": 1.2995148962163493e-05, - "loss": 1.2595, + "epoch": 3.53, + "learning_rate": 1.6468000000000002e-05, + "loss": 1.1802, "step": 441500 }, { - "epoch": 1.05, - "learning_rate": 1.2987215948530609e-05, - "loss": 1.2757, + "epoch": 3.54, + "learning_rate": 1.6464000000000004e-05, + "loss": 1.1715, "step": 442000 }, { - "epoch": 1.05, - "learning_rate": 1.2979282934897725e-05, - "loss": 1.3007, + "epoch": 3.54, + "learning_rate": 1.646e-05, + "loss": 1.1792, "step": 442500 }, { - "epoch": 1.05, - "learning_rate": 1.297134992126484e-05, - "loss": 1.2722, + "epoch": 3.54, + "learning_rate": 1.6456e-05, + "loss": 1.1726, "step": 443000 }, { - "epoch": 1.06, - "learning_rate": 1.2963416907631957e-05, - "loss": 1.2739, + "epoch": 3.55, + "learning_rate": 1.6452e-05, + "loss": 1.1764, "step": 443500 }, { - "epoch": 1.06, - "learning_rate": 1.2955483893999073e-05, - "loss": 1.267, + "epoch": 3.55, + "learning_rate": 1.6448000000000002e-05, + "loss": 1.1704, "step": 444000 }, { - "epoch": 1.06, - "learning_rate": 1.2947550880366188e-05, - "loss": 1.2832, + "epoch": 3.56, + "learning_rate": 1.6444000000000004e-05, + "loss": 1.1757, "step": 444500 }, { - "epoch": 1.06, - "learning_rate": 1.2939617866733304e-05, - "loss": 1.2718, + "epoch": 3.56, + "learning_rate": 1.6440000000000002e-05, + "loss": 1.1678, "step": 445000 }, { - "epoch": 1.06, - "learning_rate": 1.2931684853100422e-05, - "loss": 1.2563, + "epoch": 3.56, + "learning_rate": 1.6436e-05, + "loss": 1.1785, "step": 445500 }, { - "epoch": 1.06, - "learning_rate": 1.2923751839467538e-05, - "loss": 1.28, + "epoch": 3.57, + "learning_rate": 1.6432e-05, + "loss": 1.1728, "step": 446000 }, { - "epoch": 1.06, - "learning_rate": 1.2915818825834654e-05, - "loss": 1.2692, + "epoch": 3.57, + "learning_rate": 1.6428000000000003e-05, + "loss": 1.1744, "step": 446500 }, { - "epoch": 1.06, - "learning_rate": 1.290788581220177e-05, - "loss": 1.278, + "epoch": 3.58, + "learning_rate": 1.6424e-05, + "loss": 1.1834, "step": 447000 }, { - "epoch": 1.07, - "learning_rate": 1.2899952798568884e-05, - "loss": 1.288, + "epoch": 3.58, + "learning_rate": 1.6420000000000002e-05, + "loss": 1.1728, "step": 447500 }, { - "epoch": 1.07, - "learning_rate": 1.2892019784936001e-05, - "loss": 1.2656, + "epoch": 3.58, + "learning_rate": 1.6416e-05, + "loss": 1.1737, "step": 448000 }, { - "epoch": 1.07, - "learning_rate": 1.2884086771303117e-05, - "loss": 1.28, + "epoch": 3.59, + "learning_rate": 1.6412e-05, + "loss": 1.1758, "step": 448500 }, { - "epoch": 1.07, - "learning_rate": 1.2876153757670233e-05, - "loss": 1.2734, + "epoch": 3.59, + "learning_rate": 1.6408000000000003e-05, + "loss": 1.1765, "step": 449000 }, { - "epoch": 1.07, - "learning_rate": 1.2868220744037349e-05, - "loss": 1.2654, + "epoch": 3.6, + "learning_rate": 1.6404e-05, + "loss": 1.1766, "step": 449500 }, { - "epoch": 1.07, - "learning_rate": 1.2860287730404466e-05, - "loss": 1.2963, + "epoch": 3.6, + "learning_rate": 1.64e-05, + "loss": 1.1749, "step": 450000 }, { - "epoch": 1.07, - "learning_rate": 1.2852354716771582e-05, - "loss": 1.2817, + "epoch": 3.6, + "learning_rate": 1.6396e-05, + "loss": 1.1668, "step": 450500 }, { - "epoch": 1.07, - "learning_rate": 1.2844421703138698e-05, - "loss": 1.2841, + "epoch": 3.61, + "learning_rate": 1.6392e-05, + "loss": 1.1831, "step": 451000 }, { - "epoch": 1.07, - "learning_rate": 1.2836488689505816e-05, - "loss": 1.2771, + "epoch": 3.61, + "learning_rate": 1.6388000000000003e-05, + "loss": 1.1829, "step": 451500 }, { - "epoch": 1.08, - "learning_rate": 1.282855567587293e-05, - "loss": 1.2715, + "epoch": 3.62, + "learning_rate": 1.6384e-05, + "loss": 1.1731, "step": 452000 }, { - "epoch": 1.08, - "learning_rate": 1.2820622662240046e-05, - "loss": 1.2824, + "epoch": 3.62, + "learning_rate": 1.638e-05, + "loss": 1.1816, "step": 452500 }, { - "epoch": 1.08, - "learning_rate": 1.2812689648607162e-05, - "loss": 1.2809, + "epoch": 3.62, + "learning_rate": 1.6376e-05, + "loss": 1.1754, "step": 453000 }, { - "epoch": 1.08, - "learning_rate": 1.2804756634974277e-05, - "loss": 1.2755, + "epoch": 3.63, + "learning_rate": 1.6372000000000002e-05, + "loss": 1.1797, "step": 453500 }, { - "epoch": 1.08, - "learning_rate": 1.2796823621341395e-05, - "loss": 1.2732, + "epoch": 3.63, + "learning_rate": 1.6368000000000003e-05, + "loss": 1.1721, "step": 454000 }, { - "epoch": 1.08, - "learning_rate": 1.2788890607708511e-05, - "loss": 1.3004, + "epoch": 3.64, + "learning_rate": 1.6364e-05, + "loss": 1.1729, "step": 454500 }, { - "epoch": 1.08, - "learning_rate": 1.2780957594075627e-05, - "loss": 1.268, + "epoch": 3.64, + "learning_rate": 1.636e-05, + "loss": 1.1752, "step": 455000 }, { - "epoch": 1.08, - "learning_rate": 1.2773024580442743e-05, - "loss": 1.2652, + "epoch": 3.64, + "learning_rate": 1.6356e-05, + "loss": 1.1668, "step": 455500 }, { - "epoch": 1.09, - "learning_rate": 1.276509156680986e-05, - "loss": 1.2742, + "epoch": 3.65, + "learning_rate": 1.6352000000000002e-05, + "loss": 1.17, "step": 456000 }, { - "epoch": 1.09, - "learning_rate": 1.2757158553176974e-05, - "loss": 1.2517, + "epoch": 3.65, + "learning_rate": 1.6348e-05, + "loss": 1.1812, "step": 456500 }, { - "epoch": 1.09, - "learning_rate": 1.274922553954409e-05, - "loss": 1.2721, + "epoch": 3.66, + "learning_rate": 1.6344e-05, + "loss": 1.1788, "step": 457000 }, { - "epoch": 1.09, - "learning_rate": 1.2741292525911206e-05, - "loss": 1.281, + "epoch": 3.66, + "learning_rate": 1.634e-05, + "loss": 1.1743, "step": 457500 }, { - "epoch": 1.09, - "learning_rate": 1.2733359512278322e-05, - "loss": 1.272, + "epoch": 3.66, + "learning_rate": 1.6336e-05, + "loss": 1.1749, "step": 458000 }, { - "epoch": 1.09, - "learning_rate": 1.272542649864544e-05, - "loss": 1.2756, + "epoch": 3.67, + "learning_rate": 1.6332000000000002e-05, + "loss": 1.1717, "step": 458500 }, { - "epoch": 1.09, - "learning_rate": 1.2717493485012555e-05, - "loss": 1.2691, + "epoch": 3.67, + "learning_rate": 1.6328e-05, + "loss": 1.1752, "step": 459000 }, { - "epoch": 1.09, - "learning_rate": 1.2709560471379671e-05, - "loss": 1.2635, + "epoch": 3.68, + "learning_rate": 1.6324e-05, + "loss": 1.1732, "step": 459500 }, { - "epoch": 1.09, - "learning_rate": 1.2701627457746789e-05, - "loss": 1.2589, + "epoch": 3.68, + "learning_rate": 1.632e-05, + "loss": 1.1763, "step": 460000 }, { - "epoch": 1.1, - "learning_rate": 1.2693694444113905e-05, - "loss": 1.2861, + "epoch": 3.68, + "learning_rate": 1.6316e-05, + "loss": 1.1745, "step": 460500 }, { - "epoch": 1.1, - "learning_rate": 1.2685761430481019e-05, - "loss": 1.2718, + "epoch": 3.69, + "learning_rate": 1.6312000000000002e-05, + "loss": 1.1701, "step": 461000 }, { - "epoch": 1.1, - "learning_rate": 1.2677828416848135e-05, - "loss": 1.2716, + "epoch": 3.69, + "learning_rate": 1.6308e-05, + "loss": 1.1702, "step": 461500 }, { - "epoch": 1.1, - "learning_rate": 1.266989540321525e-05, - "loss": 1.2627, + "epoch": 3.7, + "learning_rate": 1.6304000000000002e-05, + "loss": 1.1753, "step": 462000 }, { - "epoch": 1.1, - "learning_rate": 1.2661962389582368e-05, - "loss": 1.2708, + "epoch": 3.7, + "learning_rate": 1.63e-05, + "loss": 1.1708, "step": 462500 }, { - "epoch": 1.1, - "learning_rate": 1.2654029375949484e-05, - "loss": 1.2742, + "epoch": 3.7, + "learning_rate": 1.6296e-05, + "loss": 1.1715, "step": 463000 }, { - "epoch": 1.1, - "learning_rate": 1.26460963623166e-05, - "loss": 1.2576, + "epoch": 3.71, + "learning_rate": 1.6292000000000003e-05, + "loss": 1.1687, "step": 463500 }, { - "epoch": 1.1, - "learning_rate": 1.2638163348683716e-05, - "loss": 1.2793, + "epoch": 3.71, + "learning_rate": 1.6288e-05, + "loss": 1.1749, "step": 464000 }, { - "epoch": 1.11, - "learning_rate": 1.2630230335050833e-05, - "loss": 1.2698, + "epoch": 3.72, + "learning_rate": 1.6284000000000002e-05, + "loss": 1.1726, "step": 464500 }, { - "epoch": 1.11, - "learning_rate": 1.2622297321417949e-05, - "loss": 1.2602, + "epoch": 3.72, + "learning_rate": 1.628e-05, + "loss": 1.1722, "step": 465000 }, { - "epoch": 1.11, - "learning_rate": 1.2614364307785063e-05, - "loss": 1.2813, + "epoch": 3.72, + "learning_rate": 1.6276e-05, + "loss": 1.1736, "step": 465500 }, { - "epoch": 1.11, - "learning_rate": 1.2606431294152179e-05, - "loss": 1.2683, + "epoch": 3.73, + "learning_rate": 1.6272000000000003e-05, + "loss": 1.17, "step": 466000 }, { - "epoch": 1.11, - "learning_rate": 1.2598498280519295e-05, - "loss": 1.2693, + "epoch": 3.73, + "learning_rate": 1.6268e-05, + "loss": 1.1682, "step": 466500 }, { - "epoch": 1.11, - "learning_rate": 1.2590565266886413e-05, - "loss": 1.2724, + "epoch": 3.74, + "learning_rate": 1.6264000000000002e-05, + "loss": 1.1698, "step": 467000 }, { - "epoch": 1.11, - "learning_rate": 1.2582632253253528e-05, - "loss": 1.272, + "epoch": 3.74, + "learning_rate": 1.626e-05, + "loss": 1.1782, "step": 467500 }, { - "epoch": 1.11, - "learning_rate": 1.2574699239620644e-05, - "loss": 1.2723, + "epoch": 3.74, + "learning_rate": 1.6256e-05, + "loss": 1.1645, "step": 468000 }, { - "epoch": 1.11, - "learning_rate": 1.256676622598776e-05, - "loss": 1.2667, + "epoch": 3.75, + "learning_rate": 1.6252e-05, + "loss": 1.1628, "step": 468500 }, { - "epoch": 1.12, - "learning_rate": 1.2558833212354878e-05, - "loss": 1.2661, + "epoch": 3.75, + "learning_rate": 1.6248e-05, + "loss": 1.1681, "step": 469000 }, { - "epoch": 1.12, - "learning_rate": 1.2550900198721994e-05, - "loss": 1.2673, + "epoch": 3.76, + "learning_rate": 1.6244000000000002e-05, + "loss": 1.1764, "step": 469500 }, { - "epoch": 1.12, - "learning_rate": 1.2542967185089108e-05, - "loss": 1.2657, + "epoch": 3.76, + "learning_rate": 1.6240000000000004e-05, + "loss": 1.1692, "step": 470000 }, { - "epoch": 1.12, - "learning_rate": 1.2535034171456224e-05, - "loss": 1.2734, + "epoch": 3.76, + "learning_rate": 1.6236000000000002e-05, + "loss": 1.1682, "step": 470500 }, { - "epoch": 1.12, - "learning_rate": 1.2527101157823341e-05, - "loss": 1.2571, + "epoch": 3.77, + "learning_rate": 1.6232e-05, + "loss": 1.1692, "step": 471000 }, { - "epoch": 1.12, - "learning_rate": 1.2519168144190457e-05, - "loss": 1.2637, + "epoch": 3.77, + "learning_rate": 1.6228e-05, + "loss": 1.1715, "step": 471500 }, { - "epoch": 1.12, - "learning_rate": 1.2511235130557573e-05, - "loss": 1.2697, + "epoch": 3.78, + "learning_rate": 1.6224000000000003e-05, + "loss": 1.1729, "step": 472000 }, { - "epoch": 1.12, - "learning_rate": 1.2503302116924689e-05, - "loss": 1.2722, + "epoch": 3.78, + "learning_rate": 1.6220000000000004e-05, + "loss": 1.1708, "step": 472500 }, { - "epoch": 1.13, - "learning_rate": 1.2495369103291806e-05, - "loss": 1.2659, + "epoch": 3.78, + "learning_rate": 1.6216000000000002e-05, + "loss": 1.1658, "step": 473000 }, { - "epoch": 1.13, - "learning_rate": 1.2487436089658922e-05, - "loss": 1.2732, + "epoch": 3.79, + "learning_rate": 1.6212e-05, + "loss": 1.1732, "step": 473500 }, { - "epoch": 1.13, - "learning_rate": 1.2479503076026038e-05, - "loss": 1.2419, + "epoch": 3.79, + "learning_rate": 1.6208e-05, + "loss": 1.1708, "step": 474000 }, { - "epoch": 1.13, - "learning_rate": 1.2471570062393152e-05, - "loss": 1.2505, + "epoch": 3.8, + "learning_rate": 1.6204000000000003e-05, + "loss": 1.1728, "step": 474500 }, { - "epoch": 1.13, - "learning_rate": 1.2463637048760268e-05, - "loss": 1.2709, + "epoch": 3.8, + "learning_rate": 1.62e-05, + "loss": 1.1706, "step": 475000 }, { - "epoch": 1.13, - "learning_rate": 1.2455704035127386e-05, - "loss": 1.2733, + "epoch": 3.8, + "learning_rate": 1.6196000000000002e-05, + "loss": 1.1685, "step": 475500 }, { - "epoch": 1.13, - "learning_rate": 1.2447771021494502e-05, - "loss": 1.2586, + "epoch": 3.81, + "learning_rate": 1.6192e-05, + "loss": 1.1747, "step": 476000 }, { - "epoch": 1.13, - "learning_rate": 1.2439838007861617e-05, - "loss": 1.2517, + "epoch": 3.81, + "learning_rate": 1.6188e-05, + "loss": 1.1659, "step": 476500 }, { - "epoch": 1.14, - "learning_rate": 1.2431904994228733e-05, - "loss": 1.2657, + "epoch": 3.82, + "learning_rate": 1.6184000000000003e-05, + "loss": 1.1724, "step": 477000 }, { - "epoch": 1.14, - "learning_rate": 1.242397198059585e-05, - "loss": 1.2724, + "epoch": 3.82, + "learning_rate": 1.618e-05, + "loss": 1.1669, "step": 477500 }, { - "epoch": 1.14, - "learning_rate": 1.2416038966962967e-05, - "loss": 1.2481, + "epoch": 3.82, + "learning_rate": 1.6176e-05, + "loss": 1.1697, "step": 478000 }, { - "epoch": 1.14, - "learning_rate": 1.240810595333008e-05, - "loss": 1.2894, + "epoch": 3.83, + "learning_rate": 1.6172e-05, + "loss": 1.1685, "step": 478500 }, { - "epoch": 1.14, - "learning_rate": 1.2400172939697197e-05, - "loss": 1.2753, + "epoch": 3.83, + "learning_rate": 1.6168000000000002e-05, + "loss": 1.1733, "step": 479000 }, { - "epoch": 1.14, - "learning_rate": 1.2392239926064313e-05, - "loss": 1.254, + "epoch": 3.84, + "learning_rate": 1.6164000000000003e-05, + "loss": 1.1688, "step": 479500 }, { - "epoch": 1.14, - "learning_rate": 1.238430691243143e-05, - "loss": 1.2603, + "epoch": 3.84, + "learning_rate": 1.616e-05, + "loss": 1.1703, "step": 480000 }, { - "epoch": 1.14, - "learning_rate": 1.2376373898798546e-05, - "loss": 1.2692, + "epoch": 3.84, + "learning_rate": 1.6156e-05, + "loss": 1.1612, "step": 480500 }, { - "epoch": 1.14, - "learning_rate": 1.2368440885165662e-05, - "loss": 1.2742, + "epoch": 3.85, + "learning_rate": 1.6152e-05, + "loss": 1.1667, "step": 481000 }, { - "epoch": 1.15, - "learning_rate": 1.236050787153278e-05, - "loss": 1.2838, + "epoch": 3.85, + "learning_rate": 1.6148000000000002e-05, + "loss": 1.1614, "step": 481500 }, { - "epoch": 1.15, - "learning_rate": 1.2352574857899895e-05, - "loss": 1.2536, + "epoch": 3.86, + "learning_rate": 1.6144000000000003e-05, + "loss": 1.1668, "step": 482000 }, { - "epoch": 1.15, - "learning_rate": 1.2344641844267011e-05, - "loss": 1.2669, + "epoch": 3.86, + "learning_rate": 1.614e-05, + "loss": 1.1684, "step": 482500 }, { - "epoch": 1.15, - "learning_rate": 1.2336708830634125e-05, - "loss": 1.2685, + "epoch": 3.86, + "learning_rate": 1.6136e-05, + "loss": 1.1666, "step": 483000 }, { - "epoch": 1.15, - "learning_rate": 1.2328775817001241e-05, - "loss": 1.2623, + "epoch": 3.87, + "learning_rate": 1.6132e-05, + "loss": 1.1691, "step": 483500 }, { - "epoch": 1.15, - "learning_rate": 1.2320842803368359e-05, - "loss": 1.2721, + "epoch": 3.87, + "learning_rate": 1.6128000000000002e-05, + "loss": 1.1692, "step": 484000 }, { - "epoch": 1.15, - "learning_rate": 1.2312909789735475e-05, - "loss": 1.2683, + "epoch": 3.88, + "learning_rate": 1.6124000000000004e-05, + "loss": 1.1654, "step": 484500 }, { - "epoch": 1.15, - "learning_rate": 1.230497677610259e-05, - "loss": 1.2667, + "epoch": 3.88, + "learning_rate": 1.612e-05, + "loss": 1.1644, "step": 485000 }, { - "epoch": 1.16, - "learning_rate": 1.2297043762469706e-05, - "loss": 1.2757, + "epoch": 3.88, + "learning_rate": 1.6116e-05, + "loss": 1.1645, "step": 485500 }, { - "epoch": 1.16, - "learning_rate": 1.2289110748836824e-05, - "loss": 1.2731, + "epoch": 3.89, + "learning_rate": 1.6112e-05, + "loss": 1.1735, "step": 486000 }, { - "epoch": 1.16, - "learning_rate": 1.228117773520394e-05, - "loss": 1.2847, + "epoch": 3.89, + "learning_rate": 1.6108000000000002e-05, + "loss": 1.1704, "step": 486500 }, { - "epoch": 1.16, - "learning_rate": 1.2273244721571056e-05, - "loss": 1.2712, + "epoch": 3.9, + "learning_rate": 1.6104e-05, + "loss": 1.164, "step": 487000 }, { - "epoch": 1.16, - "learning_rate": 1.226531170793817e-05, - "loss": 1.2747, + "epoch": 3.9, + "learning_rate": 1.6100000000000002e-05, + "loss": 1.1728, "step": 487500 }, { - "epoch": 1.16, - "learning_rate": 1.2257378694305286e-05, - "loss": 1.2632, + "epoch": 3.9, + "learning_rate": 1.6096e-05, + "loss": 1.1637, "step": 488000 }, { - "epoch": 1.16, - "learning_rate": 1.2249445680672403e-05, - "loss": 1.2437, + "epoch": 3.91, + "learning_rate": 1.6092e-05, + "loss": 1.1682, "step": 488500 }, { - "epoch": 1.16, - "learning_rate": 1.2241512667039519e-05, - "loss": 1.2564, + "epoch": 3.91, + "learning_rate": 1.6088000000000002e-05, + "loss": 1.167, "step": 489000 }, { - "epoch": 1.16, - "learning_rate": 1.2233579653406635e-05, - "loss": 1.2496, + "epoch": 3.92, + "learning_rate": 1.6084e-05, + "loss": 1.167, "step": 489500 }, { - "epoch": 1.17, - "learning_rate": 1.222564663977375e-05, - "loss": 1.2671, + "epoch": 3.92, + "learning_rate": 1.6080000000000002e-05, + "loss": 1.1655, "step": 490000 }, { - "epoch": 1.17, - "learning_rate": 1.2217713626140868e-05, - "loss": 1.2625, + "epoch": 3.92, + "learning_rate": 1.6076e-05, + "loss": 1.1643, "step": 490500 }, { - "epoch": 1.17, - "learning_rate": 1.2209780612507984e-05, - "loss": 1.2625, + "epoch": 3.93, + "learning_rate": 1.6072e-05, + "loss": 1.1687, "step": 491000 }, { - "epoch": 1.17, - "learning_rate": 1.22018475988751e-05, - "loss": 1.2608, + "epoch": 3.93, + "learning_rate": 1.6068000000000003e-05, + "loss": 1.1636, "step": 491500 }, { - "epoch": 1.17, - "learning_rate": 1.2193914585242214e-05, - "loss": 1.2733, + "epoch": 3.94, + "learning_rate": 1.6064e-05, + "loss": 1.1609, "step": 492000 }, { - "epoch": 1.17, - "learning_rate": 1.2185981571609332e-05, - "loss": 1.269, + "epoch": 3.94, + "learning_rate": 1.6060000000000002e-05, + "loss": 1.1678, "step": 492500 }, { - "epoch": 1.17, - "learning_rate": 1.2178048557976448e-05, - "loss": 1.2539, + "epoch": 3.94, + "learning_rate": 1.6056e-05, + "loss": 1.1704, "step": 493000 }, { - "epoch": 1.17, - "learning_rate": 1.2170115544343564e-05, - "loss": 1.271, + "epoch": 3.95, + "learning_rate": 1.6052e-05, + "loss": 1.1666, "step": 493500 }, { - "epoch": 1.18, - "learning_rate": 1.216218253071068e-05, - "loss": 1.2579, + "epoch": 3.95, + "learning_rate": 1.6048000000000003e-05, + "loss": 1.1636, "step": 494000 }, { - "epoch": 1.18, - "learning_rate": 1.2154249517077797e-05, - "loss": 1.2536, + "epoch": 3.96, + "learning_rate": 1.6044e-05, + "loss": 1.1686, "step": 494500 }, { - "epoch": 1.18, - "learning_rate": 1.2146316503444913e-05, - "loss": 1.2563, + "epoch": 3.96, + "learning_rate": 1.6040000000000002e-05, + "loss": 1.1635, "step": 495000 }, { - "epoch": 1.18, - "learning_rate": 1.2138383489812029e-05, - "loss": 1.2678, + "epoch": 3.96, + "learning_rate": 1.6036e-05, + "loss": 1.1607, "step": 495500 }, { - "epoch": 1.18, - "learning_rate": 1.2130450476179145e-05, - "loss": 1.2557, + "epoch": 3.97, + "learning_rate": 1.6032e-05, + "loss": 1.1592, "step": 496000 }, { - "epoch": 1.18, - "learning_rate": 1.2122517462546259e-05, - "loss": 1.2654, + "epoch": 3.97, + "learning_rate": 1.6028e-05, + "loss": 1.1602, "step": 496500 }, { - "epoch": 1.18, - "learning_rate": 1.2114584448913376e-05, - "loss": 1.2754, + "epoch": 3.98, + "learning_rate": 1.6024e-05, + "loss": 1.1608, "step": 497000 }, { - "epoch": 1.18, - "learning_rate": 1.2106651435280492e-05, - "loss": 1.2644, + "epoch": 3.98, + "learning_rate": 1.6020000000000002e-05, + "loss": 1.1578, "step": 497500 }, { - "epoch": 1.19, - "learning_rate": 1.2098718421647608e-05, - "loss": 1.2749, + "epoch": 3.98, + "learning_rate": 1.6016e-05, + "loss": 1.1653, "step": 498000 }, { - "epoch": 1.19, - "learning_rate": 1.2090785408014724e-05, - "loss": 1.2735, + "epoch": 3.99, + "learning_rate": 1.6012000000000002e-05, + "loss": 1.1625, "step": 498500 }, { - "epoch": 1.19, - "learning_rate": 1.2082852394381841e-05, - "loss": 1.2462, + "epoch": 3.99, + "learning_rate": 1.6008e-05, + "loss": 1.1701, "step": 499000 }, { - "epoch": 1.19, - "learning_rate": 1.2074919380748957e-05, - "loss": 1.2446, + "epoch": 4.0, + "learning_rate": 1.6004e-05, + "loss": 1.1644, "step": 499500 }, { - "epoch": 1.19, - "learning_rate": 1.2066986367116073e-05, - "loss": 1.2712, + "epoch": 4.0, + "learning_rate": 1.6000000000000003e-05, + "loss": 1.1652, "step": 500000 }, { - "epoch": 1.19, - "learning_rate": 1.205905335348319e-05, - "loss": 1.2877, + "epoch": 4.0, + "eval_loss": 1.1185648441314697, + "eval_runtime": 90.4279, + "eval_samples_per_second": 148.328, + "eval_steps_per_second": 2.322, + "step": 500000 + }, + { + "epoch": 4.0, + "learning_rate": 1.5996e-05, + "loss": 1.1599, "step": 500500 }, { - "epoch": 1.19, - "learning_rate": 1.2051120339850303e-05, - "loss": 1.2603, + "epoch": 4.01, + "learning_rate": 1.5992000000000002e-05, + "loss": 1.1548, "step": 501000 }, { - "epoch": 1.19, - "learning_rate": 1.204318732621742e-05, - "loss": 1.2627, + "epoch": 4.01, + "learning_rate": 1.5988e-05, + "loss": 1.1586, "step": 501500 }, { - "epoch": 1.19, - "learning_rate": 1.2035254312584537e-05, - "loss": 1.2672, + "epoch": 4.02, + "learning_rate": 1.5984e-05, + "loss": 1.1593, "step": 502000 }, { - "epoch": 1.2, - "learning_rate": 1.2027321298951653e-05, - "loss": 1.2648, + "epoch": 4.02, + "learning_rate": 1.5980000000000003e-05, + "loss": 1.1563, "step": 502500 }, { - "epoch": 1.2, - "learning_rate": 1.201938828531877e-05, - "loss": 1.2715, + "epoch": 4.02, + "learning_rate": 1.5976e-05, + "loss": 1.1611, "step": 503000 }, { - "epoch": 1.2, - "learning_rate": 1.2011455271685886e-05, - "loss": 1.2591, + "epoch": 4.03, + "learning_rate": 1.5972000000000002e-05, + "loss": 1.1596, "step": 503500 }, { - "epoch": 1.2, - "learning_rate": 1.2003522258053002e-05, - "loss": 1.2527, + "epoch": 4.03, + "learning_rate": 1.5968e-05, + "loss": 1.167, "step": 504000 }, { - "epoch": 1.2, - "learning_rate": 1.1995589244420118e-05, - "loss": 1.2525, + "epoch": 4.04, + "learning_rate": 1.5964e-05, + "loss": 1.1596, "step": 504500 }, { - "epoch": 1.2, - "learning_rate": 1.1987656230787235e-05, - "loss": 1.2487, + "epoch": 4.04, + "learning_rate": 1.5960000000000003e-05, + "loss": 1.1587, "step": 505000 }, { - "epoch": 1.2, - "learning_rate": 1.197972321715435e-05, - "loss": 1.2802, + "epoch": 4.04, + "learning_rate": 1.5956e-05, + "loss": 1.1529, "step": 505500 }, { - "epoch": 1.2, - "learning_rate": 1.1971790203521465e-05, - "loss": 1.2523, + "epoch": 4.05, + "learning_rate": 1.5952000000000002e-05, + "loss": 1.1622, "step": 506000 }, { - "epoch": 1.21, - "learning_rate": 1.1963857189888581e-05, - "loss": 1.2546, + "epoch": 4.05, + "learning_rate": 1.5948e-05, + "loss": 1.1545, "step": 506500 }, { - "epoch": 1.21, - "learning_rate": 1.1955924176255697e-05, - "loss": 1.2496, + "epoch": 4.06, + "learning_rate": 1.5944000000000002e-05, + "loss": 1.1604, "step": 507000 }, { - "epoch": 1.21, - "learning_rate": 1.1947991162622815e-05, - "loss": 1.2648, + "epoch": 4.06, + "learning_rate": 1.5940000000000003e-05, + "loss": 1.1553, "step": 507500 }, { - "epoch": 1.21, - "learning_rate": 1.194005814898993e-05, - "loss": 1.2594, + "epoch": 4.06, + "learning_rate": 1.5936e-05, + "loss": 1.1561, "step": 508000 }, { - "epoch": 1.21, - "learning_rate": 1.1932125135357046e-05, - "loss": 1.2646, + "epoch": 4.07, + "learning_rate": 1.5932e-05, + "loss": 1.161, "step": 508500 }, { - "epoch": 1.21, - "learning_rate": 1.1924192121724162e-05, - "loss": 1.2682, + "epoch": 4.07, + "learning_rate": 1.5928e-05, + "loss": 1.1563, "step": 509000 }, { - "epoch": 1.21, - "learning_rate": 1.191625910809128e-05, - "loss": 1.2853, + "epoch": 4.08, + "learning_rate": 1.5924000000000002e-05, + "loss": 1.1626, "step": 509500 }, { - "epoch": 1.21, - "learning_rate": 1.1908326094458394e-05, - "loss": 1.2447, + "epoch": 4.08, + "learning_rate": 1.5920000000000003e-05, + "loss": 1.1587, "step": 510000 }, { - "epoch": 1.21, - "learning_rate": 1.190039308082551e-05, - "loss": 1.2698, + "epoch": 4.08, + "learning_rate": 1.5916e-05, + "loss": 1.1594, "step": 510500 }, { - "epoch": 1.22, - "learning_rate": 1.1892460067192626e-05, - "loss": 1.261, + "epoch": 4.09, + "learning_rate": 1.5912e-05, + "loss": 1.1605, "step": 511000 }, { - "epoch": 1.22, - "learning_rate": 1.1884527053559743e-05, - "loss": 1.2583, + "epoch": 4.09, + "learning_rate": 1.5908e-05, + "loss": 1.1605, "step": 511500 }, { - "epoch": 1.22, - "learning_rate": 1.1876594039926859e-05, - "loss": 1.2381, + "epoch": 4.1, + "learning_rate": 1.5904000000000002e-05, + "loss": 1.1573, "step": 512000 }, { - "epoch": 1.22, - "learning_rate": 1.1868661026293975e-05, - "loss": 1.2632, + "epoch": 4.1, + "learning_rate": 1.5900000000000004e-05, + "loss": 1.1549, "step": 512500 }, { - "epoch": 1.22, - "learning_rate": 1.186072801266109e-05, - "loss": 1.2616, + "epoch": 4.1, + "learning_rate": 1.5896e-05, + "loss": 1.162, "step": 513000 }, { - "epoch": 1.22, - "learning_rate": 1.1852794999028208e-05, - "loss": 1.2488, + "epoch": 4.11, + "learning_rate": 1.5892e-05, + "loss": 1.1561, "step": 513500 }, { - "epoch": 1.22, - "learning_rate": 1.1844861985395324e-05, - "loss": 1.2648, + "epoch": 4.11, + "learning_rate": 1.5888e-05, + "loss": 1.1526, "step": 514000 }, { - "epoch": 1.22, - "learning_rate": 1.1836928971762438e-05, - "loss": 1.2885, + "epoch": 4.12, + "learning_rate": 1.5884000000000002e-05, + "loss": 1.1637, "step": 514500 }, { - "epoch": 1.23, - "learning_rate": 1.1828995958129554e-05, - "loss": 1.2656, + "epoch": 4.12, + "learning_rate": 1.588e-05, + "loss": 1.1591, "step": 515000 }, { - "epoch": 1.23, - "learning_rate": 1.182106294449667e-05, - "loss": 1.2588, + "epoch": 4.12, + "learning_rate": 1.5876000000000002e-05, + "loss": 1.155, "step": 515500 }, { - "epoch": 1.23, - "learning_rate": 1.1813129930863788e-05, - "loss": 1.2548, + "epoch": 4.13, + "learning_rate": 1.5872e-05, + "loss": 1.1536, "step": 516000 }, { - "epoch": 1.23, - "learning_rate": 1.1805196917230904e-05, - "loss": 1.2548, + "epoch": 4.13, + "learning_rate": 1.5868e-05, + "loss": 1.1491, "step": 516500 }, { - "epoch": 1.23, - "learning_rate": 1.179726390359802e-05, - "loss": 1.2725, + "epoch": 4.14, + "learning_rate": 1.5864000000000003e-05, + "loss": 1.1498, "step": 517000 }, { - "epoch": 1.23, - "learning_rate": 1.1789330889965135e-05, - "loss": 1.2597, + "epoch": 4.14, + "learning_rate": 1.586e-05, + "loss": 1.1559, "step": 517500 }, { - "epoch": 1.23, - "learning_rate": 1.1781397876332253e-05, - "loss": 1.2402, + "epoch": 4.14, + "learning_rate": 1.5856e-05, + "loss": 1.159, "step": 518000 }, { - "epoch": 1.23, - "learning_rate": 1.1773464862699369e-05, - "loss": 1.2608, + "epoch": 4.15, + "learning_rate": 1.5852e-05, + "loss": 1.1526, "step": 518500 }, { - "epoch": 1.24, - "learning_rate": 1.1765531849066483e-05, - "loss": 1.2566, + "epoch": 4.15, + "learning_rate": 1.5848e-05, + "loss": 1.1584, "step": 519000 }, { - "epoch": 1.24, - "learning_rate": 1.1757598835433599e-05, - "loss": 1.2647, + "epoch": 4.16, + "learning_rate": 1.5844000000000003e-05, + "loss": 1.1558, "step": 519500 }, { - "epoch": 1.24, - "learning_rate": 1.1749665821800715e-05, - "loss": 1.2612, + "epoch": 4.16, + "learning_rate": 1.584e-05, + "loss": 1.1523, "step": 520000 }, { - "epoch": 1.24, - "learning_rate": 1.1741732808167832e-05, - "loss": 1.2745, + "epoch": 4.16, + "learning_rate": 1.5836e-05, + "loss": 1.1543, "step": 520500 }, { - "epoch": 1.24, - "learning_rate": 1.1733799794534948e-05, - "loss": 1.2398, + "epoch": 4.17, + "learning_rate": 1.5832e-05, + "loss": 1.1553, "step": 521000 }, { - "epoch": 1.24, - "learning_rate": 1.1725866780902064e-05, - "loss": 1.2673, + "epoch": 4.17, + "learning_rate": 1.5828e-05, + "loss": 1.1503, "step": 521500 }, { - "epoch": 1.24, - "learning_rate": 1.1717933767269181e-05, - "loss": 1.2562, + "epoch": 4.18, + "learning_rate": 1.5824000000000003e-05, + "loss": 1.1507, "step": 522000 }, { - "epoch": 1.24, - "learning_rate": 1.1710000753636297e-05, - "loss": 1.2438, + "epoch": 4.18, + "learning_rate": 1.582e-05, + "loss": 1.1615, "step": 522500 }, { - "epoch": 1.24, - "learning_rate": 1.1702067740003411e-05, - "loss": 1.2576, + "epoch": 4.18, + "learning_rate": 1.5816e-05, + "loss": 1.1574, "step": 523000 }, { - "epoch": 1.25, - "learning_rate": 1.1694134726370527e-05, - "loss": 1.2526, + "epoch": 4.19, + "learning_rate": 1.5812e-05, + "loss": 1.1551, "step": 523500 }, { - "epoch": 1.25, - "learning_rate": 1.1686201712737643e-05, - "loss": 1.2539, + "epoch": 4.19, + "learning_rate": 1.5808000000000002e-05, + "loss": 1.1556, "step": 524000 }, { - "epoch": 1.25, - "learning_rate": 1.167826869910476e-05, - "loss": 1.266, + "epoch": 4.2, + "learning_rate": 1.5804000000000003e-05, + "loss": 1.1576, "step": 524500 }, { - "epoch": 1.25, - "learning_rate": 1.1670335685471877e-05, - "loss": 1.2492, + "epoch": 4.2, + "learning_rate": 1.58e-05, + "loss": 1.1539, "step": 525000 }, { - "epoch": 1.25, - "learning_rate": 1.1662402671838992e-05, - "loss": 1.2492, + "epoch": 4.2, + "learning_rate": 1.5796e-05, + "loss": 1.1565, "step": 525500 }, { - "epoch": 1.25, - "learning_rate": 1.1654469658206108e-05, - "loss": 1.2582, + "epoch": 4.21, + "learning_rate": 1.5792e-05, + "loss": 1.1587, "step": 526000 }, { - "epoch": 1.25, - "learning_rate": 1.1646536644573226e-05, - "loss": 1.255, + "epoch": 4.21, + "learning_rate": 1.5788000000000002e-05, + "loss": 1.1537, "step": 526500 }, { - "epoch": 1.25, - "learning_rate": 1.1638603630940342e-05, - "loss": 1.2504, + "epoch": 4.22, + "learning_rate": 1.5784e-05, + "loss": 1.1569, "step": 527000 }, { - "epoch": 1.26, - "learning_rate": 1.1630670617307456e-05, - "loss": 1.274, + "epoch": 4.22, + "learning_rate": 1.578e-05, + "loss": 1.1536, "step": 527500 }, { - "epoch": 1.26, - "learning_rate": 1.1622737603674572e-05, - "loss": 1.2571, + "epoch": 4.22, + "learning_rate": 1.5776e-05, + "loss": 1.1576, "step": 528000 }, { - "epoch": 1.26, - "learning_rate": 1.1614804590041688e-05, - "loss": 1.2645, + "epoch": 4.23, + "learning_rate": 1.5772e-05, + "loss": 1.1559, "step": 528500 }, { - "epoch": 1.26, - "learning_rate": 1.1606871576408805e-05, - "loss": 1.2595, + "epoch": 4.23, + "learning_rate": 1.5768000000000002e-05, + "loss": 1.1601, "step": 529000 }, { - "epoch": 1.26, - "learning_rate": 1.1598938562775921e-05, - "loss": 1.2425, + "epoch": 4.24, + "learning_rate": 1.5764e-05, + "loss": 1.1544, "step": 529500 }, { - "epoch": 1.26, - "learning_rate": 1.1591005549143037e-05, - "loss": 1.2575, + "epoch": 4.24, + "learning_rate": 1.576e-05, + "loss": 1.1554, "step": 530000 }, { - "epoch": 1.26, - "learning_rate": 1.1583072535510153e-05, - "loss": 1.2421, + "epoch": 4.24, + "learning_rate": 1.5756e-05, + "loss": 1.155, "step": 530500 }, { - "epoch": 1.26, - "learning_rate": 1.157513952187727e-05, - "loss": 1.2479, + "epoch": 4.25, + "learning_rate": 1.5752e-05, + "loss": 1.159, "step": 531000 }, { - "epoch": 1.26, - "learning_rate": 1.1567206508244386e-05, - "loss": 1.2727, + "epoch": 4.25, + "learning_rate": 1.5748000000000002e-05, + "loss": 1.1549, "step": 531500 }, { - "epoch": 1.27, - "learning_rate": 1.15592734946115e-05, - "loss": 1.2378, + "epoch": 4.26, + "learning_rate": 1.5744e-05, + "loss": 1.1496, "step": 532000 }, { - "epoch": 1.27, - "learning_rate": 1.1551340480978616e-05, - "loss": 1.2603, + "epoch": 4.26, + "learning_rate": 1.5740000000000002e-05, + "loss": 1.1525, "step": 532500 }, { - "epoch": 1.27, - "learning_rate": 1.1543407467345734e-05, - "loss": 1.2565, + "epoch": 4.26, + "learning_rate": 1.5736000000000003e-05, + "loss": 1.154, "step": 533000 }, { - "epoch": 1.27, - "learning_rate": 1.153547445371285e-05, - "loss": 1.2526, + "epoch": 4.27, + "learning_rate": 1.5732e-05, + "loss": 1.157, "step": 533500 }, { - "epoch": 1.27, - "learning_rate": 1.1527541440079966e-05, - "loss": 1.2603, + "epoch": 4.27, + "learning_rate": 1.5728000000000003e-05, + "loss": 1.1542, "step": 534000 }, { - "epoch": 1.27, - "learning_rate": 1.1519608426447081e-05, - "loss": 1.2542, + "epoch": 4.28, + "learning_rate": 1.5724e-05, + "loss": 1.1529, "step": 534500 }, { - "epoch": 1.27, - "learning_rate": 1.1511675412814199e-05, - "loss": 1.2574, + "epoch": 4.28, + "learning_rate": 1.5720000000000002e-05, + "loss": 1.1508, "step": 535000 }, { - "epoch": 1.27, - "learning_rate": 1.1503742399181315e-05, - "loss": 1.258, + "epoch": 4.28, + "learning_rate": 1.5716000000000003e-05, + "loss": 1.1584, "step": 535500 }, { - "epoch": 1.28, - "learning_rate": 1.149580938554843e-05, - "loss": 1.2556, + "epoch": 4.29, + "learning_rate": 1.5712e-05, + "loss": 1.1536, "step": 536000 }, { - "epoch": 1.28, - "learning_rate": 1.1487876371915545e-05, - "loss": 1.2515, + "epoch": 4.29, + "learning_rate": 1.5708e-05, + "loss": 1.1577, "step": 536500 }, { - "epoch": 1.28, - "learning_rate": 1.147994335828266e-05, - "loss": 1.2427, + "epoch": 4.3, + "learning_rate": 1.5704e-05, + "loss": 1.1519, "step": 537000 }, { - "epoch": 1.28, - "learning_rate": 1.1472010344649778e-05, - "loss": 1.2583, + "epoch": 4.3, + "learning_rate": 1.5700000000000002e-05, + "loss": 1.1511, "step": 537500 }, { - "epoch": 1.28, - "learning_rate": 1.1464077331016894e-05, - "loss": 1.2782, + "epoch": 4.3, + "learning_rate": 1.5696000000000004e-05, + "loss": 1.1562, "step": 538000 }, { - "epoch": 1.28, - "learning_rate": 1.145614431738401e-05, - "loss": 1.2524, + "epoch": 4.31, + "learning_rate": 1.5692e-05, + "loss": 1.1563, "step": 538500 }, { - "epoch": 1.28, - "learning_rate": 1.1448211303751126e-05, - "loss": 1.2447, + "epoch": 4.31, + "learning_rate": 1.5688e-05, + "loss": 1.1557, "step": 539000 }, { - "epoch": 1.28, - "learning_rate": 1.1440278290118243e-05, - "loss": 1.2417, + "epoch": 4.32, + "learning_rate": 1.5684e-05, + "loss": 1.1491, "step": 539500 }, { - "epoch": 1.29, - "learning_rate": 1.143234527648536e-05, - "loss": 1.2527, + "epoch": 4.32, + "learning_rate": 1.5680000000000002e-05, + "loss": 1.1512, "step": 540000 }, { - "epoch": 1.29, - "learning_rate": 1.1424412262852475e-05, - "loss": 1.2551, + "epoch": 4.32, + "learning_rate": 1.5676000000000004e-05, + "loss": 1.1544, "step": 540500 }, { - "epoch": 1.29, - "learning_rate": 1.141647924921959e-05, - "loss": 1.2426, + "epoch": 4.33, + "learning_rate": 1.5672000000000002e-05, + "loss": 1.1547, "step": 541000 }, { - "epoch": 1.29, - "learning_rate": 1.1408546235586705e-05, - "loss": 1.2493, + "epoch": 4.33, + "learning_rate": 1.5668e-05, + "loss": 1.1499, "step": 541500 }, { - "epoch": 1.29, - "learning_rate": 1.1400613221953823e-05, - "loss": 1.2722, + "epoch": 4.34, + "learning_rate": 1.5664e-05, + "loss": 1.1503, "step": 542000 }, { - "epoch": 1.29, - "learning_rate": 1.1392680208320939e-05, - "loss": 1.2572, + "epoch": 4.34, + "learning_rate": 1.5660000000000003e-05, + "loss": 1.1546, "step": 542500 }, { - "epoch": 1.29, - "learning_rate": 1.1384747194688055e-05, - "loss": 1.2443, + "epoch": 4.34, + "learning_rate": 1.5656000000000004e-05, + "loss": 1.1511, "step": 543000 }, { - "epoch": 1.29, - "learning_rate": 1.1376814181055172e-05, - "loss": 1.2579, + "epoch": 4.35, + "learning_rate": 1.5652000000000002e-05, + "loss": 1.1508, "step": 543500 }, { - "epoch": 1.29, - "learning_rate": 1.1368881167422288e-05, - "loss": 1.2437, + "epoch": 4.35, + "learning_rate": 1.5648e-05, + "loss": 1.15, "step": 544000 }, { - "epoch": 1.3, - "learning_rate": 1.1360948153789404e-05, - "loss": 1.2388, + "epoch": 4.36, + "learning_rate": 1.5644e-05, + "loss": 1.15, "step": 544500 }, { - "epoch": 1.3, - "learning_rate": 1.135301514015652e-05, - "loss": 1.2611, + "epoch": 4.36, + "learning_rate": 1.5640000000000003e-05, + "loss": 1.1526, "step": 545000 }, { - "epoch": 1.3, - "learning_rate": 1.1345082126523634e-05, - "loss": 1.2557, + "epoch": 4.36, + "learning_rate": 1.5636e-05, + "loss": 1.1565, "step": 545500 }, { - "epoch": 1.3, - "learning_rate": 1.1337149112890751e-05, - "loss": 1.2445, + "epoch": 4.37, + "learning_rate": 1.5632000000000002e-05, + "loss": 1.149, "step": 546000 }, { - "epoch": 1.3, - "learning_rate": 1.1329216099257867e-05, - "loss": 1.2309, + "epoch": 4.37, + "learning_rate": 1.5628e-05, + "loss": 1.1605, "step": 546500 }, { - "epoch": 1.3, - "learning_rate": 1.1321283085624983e-05, - "loss": 1.2554, + "epoch": 4.38, + "learning_rate": 1.5624e-05, + "loss": 1.1538, "step": 547000 }, { - "epoch": 1.3, - "learning_rate": 1.1313350071992099e-05, - "loss": 1.2679, + "epoch": 4.38, + "learning_rate": 1.5620000000000003e-05, + "loss": 1.148, "step": 547500 }, { - "epoch": 1.3, - "learning_rate": 1.1305417058359217e-05, - "loss": 1.2626, + "epoch": 4.38, + "learning_rate": 1.5616e-05, + "loss": 1.1501, "step": 548000 }, { - "epoch": 1.31, - "learning_rate": 1.1297484044726332e-05, - "loss": 1.2426, + "epoch": 4.39, + "learning_rate": 1.5612e-05, + "loss": 1.1526, "step": 548500 }, { - "epoch": 1.31, - "learning_rate": 1.1289551031093448e-05, - "loss": 1.2694, + "epoch": 4.39, + "learning_rate": 1.5608e-05, + "loss": 1.155, "step": 549000 }, { - "epoch": 1.31, - "learning_rate": 1.1281618017460564e-05, - "loss": 1.2539, + "epoch": 4.4, + "learning_rate": 1.5604000000000002e-05, + "loss": 1.1529, "step": 549500 }, { - "epoch": 1.31, - "learning_rate": 1.1273685003827678e-05, - "loss": 1.2498, + "epoch": 4.4, + "learning_rate": 1.5600000000000003e-05, + "loss": 1.1506, "step": 550000 }, { - "epoch": 1.31, - "learning_rate": 1.1265751990194796e-05, - "loss": 1.2407, + "epoch": 4.4, + "learning_rate": 1.5596e-05, + "loss": 1.1557, "step": 550500 }, { - "epoch": 1.31, - "learning_rate": 1.1257818976561912e-05, - "loss": 1.2472, + "epoch": 4.41, + "learning_rate": 1.5592e-05, + "loss": 1.1498, "step": 551000 }, { - "epoch": 1.31, - "learning_rate": 1.1249885962929028e-05, - "loss": 1.2337, + "epoch": 4.41, + "learning_rate": 1.5588e-05, + "loss": 1.153, "step": 551500 }, { - "epoch": 1.31, - "learning_rate": 1.1241952949296145e-05, - "loss": 1.2631, + "epoch": 4.42, + "learning_rate": 1.5584000000000002e-05, + "loss": 1.1534, "step": 552000 }, { - "epoch": 1.31, - "learning_rate": 1.1234019935663261e-05, - "loss": 1.2417, + "epoch": 4.42, + "learning_rate": 1.5580000000000003e-05, + "loss": 1.1564, "step": 552500 }, { - "epoch": 1.32, - "learning_rate": 1.1226086922030377e-05, - "loss": 1.2538, + "epoch": 4.42, + "learning_rate": 1.5576e-05, + "loss": 1.1463, "step": 553000 }, { - "epoch": 1.32, - "learning_rate": 1.1218153908397493e-05, - "loss": 1.2546, + "epoch": 4.43, + "learning_rate": 1.5572e-05, + "loss": 1.1479, "step": 553500 }, { - "epoch": 1.32, - "learning_rate": 1.121022089476461e-05, - "loss": 1.253, + "epoch": 4.43, + "learning_rate": 1.5568e-05, + "loss": 1.15, "step": 554000 }, { - "epoch": 1.32, - "learning_rate": 1.1202287881131724e-05, - "loss": 1.2454, + "epoch": 4.44, + "learning_rate": 1.5564000000000002e-05, + "loss": 1.1564, "step": 554500 }, { - "epoch": 1.32, - "learning_rate": 1.119435486749884e-05, - "loss": 1.2478, + "epoch": 4.44, + "learning_rate": 1.556e-05, + "loss": 1.146, "step": 555000 }, { - "epoch": 1.32, - "learning_rate": 1.1186421853865956e-05, - "loss": 1.2628, + "epoch": 4.44, + "learning_rate": 1.5556e-05, + "loss": 1.1495, "step": 555500 }, { - "epoch": 1.32, - "learning_rate": 1.1178488840233072e-05, - "loss": 1.2592, + "epoch": 4.45, + "learning_rate": 1.5552e-05, + "loss": 1.1479, "step": 556000 }, { - "epoch": 1.32, - "learning_rate": 1.117055582660019e-05, - "loss": 1.258, + "epoch": 4.45, + "learning_rate": 1.5548e-05, + "loss": 1.1542, "step": 556500 }, { - "epoch": 1.33, - "learning_rate": 1.1162622812967306e-05, - "loss": 1.2573, + "epoch": 4.46, + "learning_rate": 1.5544000000000002e-05, + "loss": 1.1566, "step": 557000 }, { - "epoch": 1.33, - "learning_rate": 1.1154689799334421e-05, - "loss": 1.2372, + "epoch": 4.46, + "learning_rate": 1.554e-05, + "loss": 1.1505, "step": 557500 }, { - "epoch": 1.33, - "learning_rate": 1.1146756785701537e-05, - "loss": 1.2416, + "epoch": 4.46, + "learning_rate": 1.5536e-05, + "loss": 1.149, "step": 558000 }, { - "epoch": 1.33, - "learning_rate": 1.1138823772068655e-05, - "loss": 1.2381, + "epoch": 4.47, + "learning_rate": 1.5532e-05, + "loss": 1.1447, "step": 558500 }, { - "epoch": 1.33, - "learning_rate": 1.1130890758435769e-05, - "loss": 1.2492, + "epoch": 4.47, + "learning_rate": 1.5528e-05, + "loss": 1.1472, "step": 559000 }, { - "epoch": 1.33, - "learning_rate": 1.1122957744802885e-05, - "loss": 1.259, + "epoch": 4.48, + "learning_rate": 1.5524000000000002e-05, + "loss": 1.1476, "step": 559500 }, { - "epoch": 1.33, - "learning_rate": 1.111502473117e-05, - "loss": 1.2353, + "epoch": 4.48, + "learning_rate": 1.552e-05, + "loss": 1.1506, "step": 560000 }, { - "epoch": 1.33, - "learning_rate": 1.1107091717537117e-05, - "loss": 1.2314, + "epoch": 4.48, + "learning_rate": 1.5516000000000002e-05, + "loss": 1.1446, "step": 560500 }, { - "epoch": 1.34, - "learning_rate": 1.1099158703904234e-05, - "loss": 1.2596, + "epoch": 4.49, + "learning_rate": 1.5512e-05, + "loss": 1.1546, "step": 561000 }, { - "epoch": 1.34, - "learning_rate": 1.109122569027135e-05, - "loss": 1.2457, + "epoch": 4.49, + "learning_rate": 1.5508e-05, + "loss": 1.1502, "step": 561500 }, { - "epoch": 1.34, - "learning_rate": 1.1083292676638466e-05, - "loss": 1.2315, + "epoch": 4.5, + "learning_rate": 1.5504000000000003e-05, + "loss": 1.148, "step": 562000 }, { - "epoch": 1.34, - "learning_rate": 1.1075359663005583e-05, - "loss": 1.2322, + "epoch": 4.5, + "learning_rate": 1.55e-05, + "loss": 1.1537, "step": 562500 }, { - "epoch": 1.34, - "learning_rate": 1.10674266493727e-05, - "loss": 1.2474, + "epoch": 4.5, + "learning_rate": 1.5496000000000002e-05, + "loss": 1.1527, "step": 563000 }, { - "epoch": 1.34, - "learning_rate": 1.1059493635739813e-05, - "loss": 1.251, + "epoch": 4.51, + "learning_rate": 1.5492e-05, + "loss": 1.1474, "step": 563500 }, { - "epoch": 1.34, - "learning_rate": 1.105156062210693e-05, - "loss": 1.2431, + "epoch": 4.51, + "learning_rate": 1.5488e-05, + "loss": 1.1475, "step": 564000 }, { - "epoch": 1.34, - "learning_rate": 1.1043627608474045e-05, - "loss": 1.2544, + "epoch": 4.52, + "learning_rate": 1.5484000000000003e-05, + "loss": 1.1504, "step": 564500 }, { - "epoch": 1.34, - "learning_rate": 1.1035694594841163e-05, - "loss": 1.2439, + "epoch": 4.52, + "learning_rate": 1.548e-05, + "loss": 1.1429, "step": 565000 }, { - "epoch": 1.35, - "learning_rate": 1.1027761581208279e-05, - "loss": 1.2488, + "epoch": 4.52, + "learning_rate": 1.5476000000000002e-05, + "loss": 1.1501, "step": 565500 }, { - "epoch": 1.35, - "learning_rate": 1.1019828567575394e-05, - "loss": 1.2515, + "epoch": 4.53, + "learning_rate": 1.5472e-05, + "loss": 1.1445, "step": 566000 }, { - "epoch": 1.35, - "learning_rate": 1.101189555394251e-05, - "loss": 1.2324, + "epoch": 4.53, + "learning_rate": 1.5468e-05, + "loss": 1.1389, "step": 566500 }, { - "epoch": 1.35, - "learning_rate": 1.1003962540309628e-05, - "loss": 1.2294, + "epoch": 4.54, + "learning_rate": 1.5464e-05, + "loss": 1.1516, "step": 567000 }, { - "epoch": 1.35, - "learning_rate": 1.0996029526676742e-05, - "loss": 1.2316, + "epoch": 4.54, + "learning_rate": 1.546e-05, + "loss": 1.1445, "step": 567500 }, { - "epoch": 1.35, - "learning_rate": 1.0988096513043858e-05, - "loss": 1.219, + "epoch": 4.54, + "learning_rate": 1.5456000000000002e-05, + "loss": 1.1443, "step": 568000 }, { - "epoch": 1.35, - "learning_rate": 1.0980163499410974e-05, - "loss": 1.2251, + "epoch": 4.55, + "learning_rate": 1.5452e-05, + "loss": 1.1487, "step": 568500 }, { - "epoch": 1.35, - "learning_rate": 1.097223048577809e-05, - "loss": 1.2195, + "epoch": 4.55, + "learning_rate": 1.5448000000000002e-05, + "loss": 1.1455, "step": 569000 }, { - "epoch": 1.36, - "learning_rate": 1.0964297472145207e-05, - "loss": 1.2508, + "epoch": 4.56, + "learning_rate": 1.5444e-05, + "loss": 1.1459, "step": 569500 }, { - "epoch": 1.36, - "learning_rate": 1.0956364458512323e-05, - "loss": 1.2491, + "epoch": 4.56, + "learning_rate": 1.544e-05, + "loss": 1.1431, "step": 570000 }, { - "epoch": 1.36, - "learning_rate": 1.0948431444879439e-05, - "loss": 1.2378, + "epoch": 4.56, + "learning_rate": 1.5436000000000003e-05, + "loss": 1.1438, "step": 570500 }, { - "epoch": 1.36, - "learning_rate": 1.0940498431246555e-05, - "loss": 1.2378, + "epoch": 4.57, + "learning_rate": 1.5432e-05, + "loss": 1.1398, "step": 571000 }, { - "epoch": 1.36, - "learning_rate": 1.0932565417613672e-05, - "loss": 1.2436, + "epoch": 4.57, + "learning_rate": 1.5428000000000002e-05, + "loss": 1.1496, "step": 571500 }, { - "epoch": 1.36, - "learning_rate": 1.0924632403980787e-05, - "loss": 1.2443, + "epoch": 4.58, + "learning_rate": 1.5424e-05, + "loss": 1.1434, "step": 572000 }, { - "epoch": 1.36, - "learning_rate": 1.0916699390347902e-05, - "loss": 1.2383, + "epoch": 4.58, + "learning_rate": 1.542e-05, + "loss": 1.1453, "step": 572500 }, { - "epoch": 1.36, - "learning_rate": 1.0908766376715018e-05, - "loss": 1.2309, + "epoch": 4.58, + "learning_rate": 1.5416000000000003e-05, + "loss": 1.1447, "step": 573000 }, { - "epoch": 1.36, - "learning_rate": 1.0900833363082136e-05, - "loss": 1.2478, + "epoch": 4.59, + "learning_rate": 1.5412e-05, + "loss": 1.1413, "step": 573500 }, { - "epoch": 1.37, - "learning_rate": 1.0892900349449252e-05, - "loss": 1.2505, + "epoch": 4.59, + "learning_rate": 1.5408000000000002e-05, + "loss": 1.1439, "step": 574000 }, { - "epoch": 1.37, - "learning_rate": 1.0884967335816368e-05, - "loss": 1.2439, + "epoch": 4.6, + "learning_rate": 1.5404e-05, + "loss": 1.142, "step": 574500 }, { - "epoch": 1.37, - "learning_rate": 1.0877034322183483e-05, - "loss": 1.2237, + "epoch": 4.6, + "learning_rate": 1.54e-05, + "loss": 1.1379, "step": 575000 }, { - "epoch": 1.37, - "learning_rate": 1.0869101308550601e-05, - "loss": 1.2199, + "epoch": 4.6, + "learning_rate": 1.5396000000000003e-05, + "loss": 1.1464, "step": 575500 }, { - "epoch": 1.37, - "learning_rate": 1.0861168294917717e-05, - "loss": 1.2324, + "epoch": 4.61, + "learning_rate": 1.5392e-05, + "loss": 1.1433, "step": 576000 }, { - "epoch": 1.37, - "learning_rate": 1.0853235281284831e-05, - "loss": 1.2398, + "epoch": 4.61, + "learning_rate": 1.5388e-05, + "loss": 1.1473, "step": 576500 }, { - "epoch": 1.37, - "learning_rate": 1.0845302267651947e-05, - "loss": 1.2273, + "epoch": 4.62, + "learning_rate": 1.5384e-05, + "loss": 1.1487, "step": 577000 }, { - "epoch": 1.37, - "learning_rate": 1.0837369254019063e-05, - "loss": 1.2428, + "epoch": 4.62, + "learning_rate": 1.5380000000000002e-05, + "loss": 1.1462, "step": 577500 }, { - "epoch": 1.38, - "learning_rate": 1.082943624038618e-05, - "loss": 1.255, + "epoch": 4.62, + "learning_rate": 1.5376000000000003e-05, + "loss": 1.1419, "step": 578000 }, { - "epoch": 1.38, - "learning_rate": 1.0821503226753296e-05, - "loss": 1.2402, + "epoch": 4.63, + "learning_rate": 1.5372e-05, + "loss": 1.143, "step": 578500 }, { - "epoch": 1.38, - "learning_rate": 1.0813570213120412e-05, - "loss": 1.2374, + "epoch": 4.63, + "learning_rate": 1.5368e-05, + "loss": 1.1429, "step": 579000 }, { - "epoch": 1.38, - "learning_rate": 1.0805637199487528e-05, - "loss": 1.2142, + "epoch": 4.64, + "learning_rate": 1.5364e-05, + "loss": 1.1469, "step": 579500 }, { - "epoch": 1.38, - "learning_rate": 1.0797704185854645e-05, - "loss": 1.2368, + "epoch": 4.64, + "learning_rate": 1.5360000000000002e-05, + "loss": 1.1418, "step": 580000 }, { - "epoch": 1.38, - "learning_rate": 1.0789771172221761e-05, - "loss": 1.2328, + "epoch": 4.64, + "learning_rate": 1.5356000000000003e-05, + "loss": 1.1427, "step": 580500 }, { - "epoch": 1.38, - "learning_rate": 1.0781838158588875e-05, - "loss": 1.2355, + "epoch": 4.65, + "learning_rate": 1.5352e-05, + "loss": 1.1446, "step": 581000 }, { - "epoch": 1.38, - "learning_rate": 1.0773905144955991e-05, - "loss": 1.2521, + "epoch": 4.65, + "learning_rate": 1.5348e-05, + "loss": 1.1447, "step": 581500 }, { - "epoch": 1.39, - "learning_rate": 1.0765972131323107e-05, - "loss": 1.234, + "epoch": 4.66, + "learning_rate": 1.5344e-05, + "loss": 1.1427, "step": 582000 }, { - "epoch": 1.39, - "learning_rate": 1.0758039117690225e-05, - "loss": 1.2303, + "epoch": 4.66, + "learning_rate": 1.5340000000000002e-05, + "loss": 1.1451, "step": 582500 }, { - "epoch": 1.39, - "learning_rate": 1.075010610405734e-05, - "loss": 1.2484, + "epoch": 4.66, + "learning_rate": 1.5336000000000004e-05, + "loss": 1.1531, "step": 583000 }, { - "epoch": 1.39, - "learning_rate": 1.0742173090424457e-05, - "loss": 1.2394, + "epoch": 4.67, + "learning_rate": 1.5332e-05, + "loss": 1.1482, "step": 583500 }, { - "epoch": 1.39, - "learning_rate": 1.0734240076791574e-05, - "loss": 1.2242, + "epoch": 4.67, + "learning_rate": 1.5328e-05, + "loss": 1.1402, "step": 584000 }, { - "epoch": 1.39, - "learning_rate": 1.072630706315869e-05, - "loss": 1.2562, + "epoch": 4.68, + "learning_rate": 1.5324e-05, + "loss": 1.1497, "step": 584500 }, { - "epoch": 1.39, - "learning_rate": 1.0718374049525806e-05, - "loss": 1.2375, + "epoch": 4.68, + "learning_rate": 1.5320000000000002e-05, + "loss": 1.1405, "step": 585000 }, { - "epoch": 1.39, - "learning_rate": 1.071044103589292e-05, - "loss": 1.2331, + "epoch": 4.68, + "learning_rate": 1.5316e-05, + "loss": 1.1484, "step": 585500 }, { - "epoch": 1.39, - "learning_rate": 1.0702508022260036e-05, - "loss": 1.2309, + "epoch": 4.69, + "learning_rate": 1.5312000000000002e-05, + "loss": 1.1422, "step": 586000 }, { - "epoch": 1.4, - "learning_rate": 1.0694575008627153e-05, - "loss": 1.2495, + "epoch": 4.69, + "learning_rate": 1.5308e-05, + "loss": 1.1462, "step": 586500 }, { - "epoch": 1.4, - "learning_rate": 1.068664199499427e-05, - "loss": 1.2242, + "epoch": 4.7, + "learning_rate": 1.5304e-05, + "loss": 1.1398, "step": 587000 }, { - "epoch": 1.4, - "learning_rate": 1.0678708981361385e-05, - "loss": 1.2333, + "epoch": 4.7, + "learning_rate": 1.5300000000000003e-05, + "loss": 1.1404, "step": 587500 }, { - "epoch": 1.4, - "learning_rate": 1.0670775967728501e-05, - "loss": 1.2456, + "epoch": 4.7, + "learning_rate": 1.5296e-05, + "loss": 1.1431, "step": 588000 }, { - "epoch": 1.4, - "learning_rate": 1.0662842954095619e-05, - "loss": 1.2451, + "epoch": 4.71, + "learning_rate": 1.5292e-05, + "loss": 1.1457, "step": 588500 }, { - "epoch": 1.4, - "learning_rate": 1.0654909940462734e-05, - "loss": 1.2471, + "epoch": 4.71, + "learning_rate": 1.5288e-05, + "loss": 1.1436, "step": 589000 }, { - "epoch": 1.4, - "learning_rate": 1.064697692682985e-05, - "loss": 1.2343, + "epoch": 4.72, + "learning_rate": 1.5284e-05, + "loss": 1.1406, "step": 589500 }, { - "epoch": 1.4, - "learning_rate": 1.0639043913196964e-05, - "loss": 1.225, + "epoch": 4.72, + "learning_rate": 1.5280000000000003e-05, + "loss": 1.1449, "step": 590000 }, { - "epoch": 1.41, - "learning_rate": 1.063111089956408e-05, - "loss": 1.2223, + "epoch": 4.72, + "learning_rate": 1.5276e-05, + "loss": 1.1451, "step": 590500 }, { - "epoch": 1.41, - "learning_rate": 1.0623177885931198e-05, - "loss": 1.2502, + "epoch": 4.73, + "learning_rate": 1.5272e-05, + "loss": 1.1442, "step": 591000 }, { - "epoch": 1.41, - "learning_rate": 1.0615244872298314e-05, - "loss": 1.2455, + "epoch": 4.73, + "learning_rate": 1.5268e-05, + "loss": 1.1447, "step": 591500 }, { - "epoch": 1.41, - "learning_rate": 1.060731185866543e-05, - "loss": 1.2542, + "epoch": 4.74, + "learning_rate": 1.5264e-05, + "loss": 1.1363, "step": 592000 }, { - "epoch": 1.41, - "learning_rate": 1.0599378845032547e-05, - "loss": 1.2274, + "epoch": 4.74, + "learning_rate": 1.5260000000000003e-05, + "loss": 1.1394, "step": 592500 }, { - "epoch": 1.41, - "learning_rate": 1.0591445831399663e-05, - "loss": 1.2338, + "epoch": 4.74, + "learning_rate": 1.5256000000000003e-05, + "loss": 1.1427, "step": 593000 }, { - "epoch": 1.41, - "learning_rate": 1.0583512817766779e-05, - "loss": 1.2195, + "epoch": 4.75, + "learning_rate": 1.5252e-05, + "loss": 1.1433, "step": 593500 }, { - "epoch": 1.41, - "learning_rate": 1.0575579804133895e-05, - "loss": 1.2531, + "epoch": 4.75, + "learning_rate": 1.5248e-05, + "loss": 1.1434, "step": 594000 }, { - "epoch": 1.41, - "learning_rate": 1.0567646790501009e-05, - "loss": 1.2461, + "epoch": 4.76, + "learning_rate": 1.5244000000000002e-05, + "loss": 1.1395, "step": 594500 }, { - "epoch": 1.42, - "learning_rate": 1.0559713776868126e-05, - "loss": 1.2358, + "epoch": 4.76, + "learning_rate": 1.5240000000000001e-05, + "loss": 1.1394, "step": 595000 }, { - "epoch": 1.42, - "learning_rate": 1.0551780763235242e-05, - "loss": 1.2296, + "epoch": 4.76, + "learning_rate": 1.5236000000000001e-05, + "loss": 1.1442, "step": 595500 }, { - "epoch": 1.42, - "learning_rate": 1.0543847749602358e-05, - "loss": 1.228, + "epoch": 4.77, + "learning_rate": 1.5232000000000003e-05, + "loss": 1.1438, "step": 596000 }, { - "epoch": 1.42, - "learning_rate": 1.0535914735969474e-05, - "loss": 1.2335, + "epoch": 4.77, + "learning_rate": 1.5228e-05, + "loss": 1.1381, "step": 596500 }, { - "epoch": 1.42, - "learning_rate": 1.0527981722336592e-05, - "loss": 1.2414, + "epoch": 4.78, + "learning_rate": 1.5224e-05, + "loss": 1.1445, "step": 597000 }, { - "epoch": 1.42, - "learning_rate": 1.0520048708703708e-05, - "loss": 1.2255, + "epoch": 4.78, + "learning_rate": 1.5220000000000002e-05, + "loss": 1.1399, "step": 597500 }, { - "epoch": 1.42, - "learning_rate": 1.0512115695070823e-05, - "loss": 1.2497, + "epoch": 4.78, + "learning_rate": 1.5216000000000001e-05, + "loss": 1.1416, "step": 598000 }, { - "epoch": 1.42, - "learning_rate": 1.050418268143794e-05, - "loss": 1.2509, + "epoch": 4.79, + "learning_rate": 1.5212000000000003e-05, + "loss": 1.1322, "step": 598500 }, { - "epoch": 1.43, - "learning_rate": 1.0496249667805053e-05, - "loss": 1.2435, + "epoch": 4.79, + "learning_rate": 1.5208e-05, + "loss": 1.1481, "step": 599000 }, { - "epoch": 1.43, - "learning_rate": 1.0488316654172171e-05, - "loss": 1.2341, + "epoch": 4.8, + "learning_rate": 1.5204e-05, + "loss": 1.1438, "step": 599500 }, { - "epoch": 1.43, - "learning_rate": 1.0480383640539287e-05, - "loss": 1.2228, + "epoch": 4.8, + "learning_rate": 1.5200000000000002e-05, + "loss": 1.1459, "step": 600000 }, { - "epoch": 1.43, - "learning_rate": 1.0472450626906403e-05, - "loss": 1.24, + "epoch": 4.8, + "learning_rate": 1.5196000000000002e-05, + "loss": 1.1441, "step": 600500 }, { - "epoch": 1.43, - "learning_rate": 1.0464517613273519e-05, - "loss": 1.2094, + "epoch": 4.81, + "learning_rate": 1.5192000000000003e-05, + "loss": 1.1437, "step": 601000 }, { - "epoch": 1.43, - "learning_rate": 1.0456584599640636e-05, - "loss": 1.2489, + "epoch": 4.81, + "learning_rate": 1.5188000000000001e-05, + "loss": 1.1386, "step": 601500 }, { - "epoch": 1.43, - "learning_rate": 1.0448651586007752e-05, - "loss": 1.2388, + "epoch": 4.82, + "learning_rate": 1.5184e-05, + "loss": 1.1394, "step": 602000 }, { - "epoch": 1.43, - "learning_rate": 1.0440718572374868e-05, - "loss": 1.2268, + "epoch": 4.82, + "learning_rate": 1.5180000000000002e-05, + "loss": 1.1445, "step": 602500 }, { - "epoch": 1.44, - "learning_rate": 1.0432785558741985e-05, - "loss": 1.2429, + "epoch": 4.82, + "learning_rate": 1.5176000000000002e-05, + "loss": 1.1377, "step": 603000 }, { - "epoch": 1.44, - "learning_rate": 1.0424852545109098e-05, - "loss": 1.2336, + "epoch": 4.83, + "learning_rate": 1.5172000000000001e-05, + "loss": 1.1405, "step": 603500 }, { - "epoch": 1.44, - "learning_rate": 1.0416919531476215e-05, - "loss": 1.2319, + "epoch": 4.83, + "learning_rate": 1.5168000000000001e-05, + "loss": 1.1404, "step": 604000 }, { - "epoch": 1.44, - "learning_rate": 1.0408986517843331e-05, - "loss": 1.231, + "epoch": 4.84, + "learning_rate": 1.5164e-05, + "loss": 1.1472, "step": 604500 }, { - "epoch": 1.44, - "learning_rate": 1.0401053504210447e-05, - "loss": 1.2306, + "epoch": 4.84, + "learning_rate": 1.516e-05, + "loss": 1.1384, "step": 605000 }, { - "epoch": 1.44, - "learning_rate": 1.0393120490577565e-05, - "loss": 1.2165, + "epoch": 4.84, + "learning_rate": 1.5156000000000002e-05, + "loss": 1.1396, "step": 605500 }, { - "epoch": 1.44, - "learning_rate": 1.038518747694468e-05, - "loss": 1.2401, + "epoch": 4.85, + "learning_rate": 1.5152000000000002e-05, + "loss": 1.1337, "step": 606000 }, { - "epoch": 1.44, - "learning_rate": 1.0377254463311796e-05, - "loss": 1.2291, + "epoch": 4.85, + "learning_rate": 1.5148e-05, + "loss": 1.1399, "step": 606500 }, { - "epoch": 1.44, - "learning_rate": 1.0369321449678912e-05, - "loss": 1.2372, + "epoch": 4.86, + "learning_rate": 1.5144000000000001e-05, + "loss": 1.1407, "step": 607000 }, { - "epoch": 1.45, - "learning_rate": 1.036138843604603e-05, - "loss": 1.2341, + "epoch": 4.86, + "learning_rate": 1.514e-05, + "loss": 1.1419, "step": 607500 }, { - "epoch": 1.45, - "learning_rate": 1.0353455422413144e-05, - "loss": 1.2404, + "epoch": 4.86, + "learning_rate": 1.5136000000000002e-05, + "loss": 1.1408, "step": 608000 }, { - "epoch": 1.45, - "learning_rate": 1.034552240878026e-05, - "loss": 1.2312, + "epoch": 4.87, + "learning_rate": 1.5132000000000002e-05, + "loss": 1.1376, "step": 608500 }, { - "epoch": 1.45, - "learning_rate": 1.0337589395147376e-05, - "loss": 1.2478, + "epoch": 4.87, + "learning_rate": 1.5128e-05, + "loss": 1.1439, "step": 609000 }, { - "epoch": 1.45, - "learning_rate": 1.0329656381514492e-05, - "loss": 1.2271, + "epoch": 4.88, + "learning_rate": 1.5124000000000001e-05, + "loss": 1.1368, "step": 609500 }, { - "epoch": 1.45, - "learning_rate": 1.032172336788161e-05, - "loss": 1.2301, + "epoch": 4.88, + "learning_rate": 1.5120000000000001e-05, + "loss": 1.1404, "step": 610000 }, { - "epoch": 1.45, - "learning_rate": 1.0313790354248725e-05, - "loss": 1.219, + "epoch": 4.88, + "learning_rate": 1.5116000000000002e-05, + "loss": 1.138, "step": 610500 }, { - "epoch": 1.45, - "learning_rate": 1.0305857340615841e-05, - "loss": 1.2283, + "epoch": 4.89, + "learning_rate": 1.5112000000000002e-05, + "loss": 1.1406, "step": 611000 }, { - "epoch": 1.46, - "learning_rate": 1.0297924326982957e-05, - "loss": 1.2512, + "epoch": 4.89, + "learning_rate": 1.5108e-05, + "loss": 1.1373, "step": 611500 }, { - "epoch": 1.46, - "learning_rate": 1.0289991313350071e-05, - "loss": 1.2383, + "epoch": 4.9, + "learning_rate": 1.5104000000000001e-05, + "loss": 1.1421, "step": 612000 }, { - "epoch": 1.46, - "learning_rate": 1.0282058299717189e-05, - "loss": 1.2406, + "epoch": 4.9, + "learning_rate": 1.5100000000000001e-05, + "loss": 1.1362, "step": 612500 }, { - "epoch": 1.46, - "learning_rate": 1.0274125286084304e-05, - "loss": 1.2417, + "epoch": 4.9, + "learning_rate": 1.5096000000000003e-05, + "loss": 1.1476, "step": 613000 }, { - "epoch": 1.46, - "learning_rate": 1.026619227245142e-05, - "loss": 1.2424, + "epoch": 4.91, + "learning_rate": 1.5092000000000002e-05, + "loss": 1.1333, "step": 613500 }, { - "epoch": 1.46, - "learning_rate": 1.0258259258818538e-05, - "loss": 1.2309, + "epoch": 4.91, + "learning_rate": 1.5088e-05, + "loss": 1.1306, "step": 614000 }, { - "epoch": 1.46, - "learning_rate": 1.0250326245185654e-05, - "loss": 1.2327, + "epoch": 4.92, + "learning_rate": 1.5084000000000002e-05, + "loss": 1.1361, "step": 614500 }, { - "epoch": 1.46, - "learning_rate": 1.024239323155277e-05, - "loss": 1.2523, + "epoch": 4.92, + "learning_rate": 1.5080000000000001e-05, + "loss": 1.1351, "step": 615000 }, { - "epoch": 1.46, - "learning_rate": 1.0234460217919885e-05, - "loss": 1.2256, + "epoch": 4.92, + "learning_rate": 1.5076000000000001e-05, + "loss": 1.1363, "step": 615500 }, { - "epoch": 1.47, - "learning_rate": 1.0226527204287003e-05, - "loss": 1.2333, + "epoch": 4.93, + "learning_rate": 1.5072000000000002e-05, + "loss": 1.1414, "step": 616000 }, { - "epoch": 1.47, - "learning_rate": 1.0218594190654117e-05, - "loss": 1.237, + "epoch": 4.93, + "learning_rate": 1.5068e-05, + "loss": 1.1449, "step": 616500 }, { - "epoch": 1.47, - "learning_rate": 1.0210661177021233e-05, - "loss": 1.2365, + "epoch": 4.94, + "learning_rate": 1.5064e-05, + "loss": 1.137, "step": 617000 }, { - "epoch": 1.47, - "learning_rate": 1.0202728163388349e-05, - "loss": 1.2267, + "epoch": 4.94, + "learning_rate": 1.5060000000000001e-05, + "loss": 1.1387, "step": 617500 }, { - "epoch": 1.47, - "learning_rate": 1.0194795149755465e-05, - "loss": 1.2259, + "epoch": 4.94, + "learning_rate": 1.5056000000000001e-05, + "loss": 1.1336, "step": 618000 }, { - "epoch": 1.47, - "learning_rate": 1.0186862136122582e-05, - "loss": 1.2367, + "epoch": 4.95, + "learning_rate": 1.5052000000000003e-05, + "loss": 1.1365, "step": 618500 }, { - "epoch": 1.47, - "learning_rate": 1.0178929122489698e-05, - "loss": 1.2462, + "epoch": 4.95, + "learning_rate": 1.5048e-05, + "loss": 1.1387, "step": 619000 }, { - "epoch": 1.47, - "learning_rate": 1.0170996108856814e-05, - "loss": 1.2328, + "epoch": 4.96, + "learning_rate": 1.5044e-05, + "loss": 1.1352, "step": 619500 }, { - "epoch": 1.48, - "learning_rate": 1.016306309522393e-05, - "loss": 1.2295, + "epoch": 4.96, + "learning_rate": 1.5040000000000002e-05, + "loss": 1.1321, "step": 620000 }, { - "epoch": 1.48, - "learning_rate": 1.0155130081591047e-05, - "loss": 1.2077, + "epoch": 4.96, + "learning_rate": 1.5036000000000001e-05, + "loss": 1.1341, "step": 620500 }, { - "epoch": 1.48, - "learning_rate": 1.0147197067958162e-05, - "loss": 1.2405, + "epoch": 4.97, + "learning_rate": 1.5032000000000003e-05, + "loss": 1.1283, "step": 621000 }, { - "epoch": 1.48, - "learning_rate": 1.0139264054325277e-05, - "loss": 1.236, + "epoch": 4.97, + "learning_rate": 1.5028e-05, + "loss": 1.135, "step": 621500 }, { - "epoch": 1.48, - "learning_rate": 1.0131331040692393e-05, - "loss": 1.2219, + "epoch": 4.98, + "learning_rate": 1.5024e-05, + "loss": 1.1357, "step": 622000 }, { - "epoch": 1.48, - "learning_rate": 1.012339802705951e-05, - "loss": 1.2249, + "epoch": 4.98, + "learning_rate": 1.5020000000000002e-05, + "loss": 1.1351, "step": 622500 }, { - "epoch": 1.48, - "learning_rate": 1.0115465013426627e-05, - "loss": 1.2262, + "epoch": 4.98, + "learning_rate": 1.5016000000000002e-05, + "loss": 1.1347, "step": 623000 }, { - "epoch": 1.48, - "learning_rate": 1.0107531999793743e-05, - "loss": 1.2271, + "epoch": 4.99, + "learning_rate": 1.5012000000000001e-05, + "loss": 1.1358, "step": 623500 }, { - "epoch": 1.49, - "learning_rate": 1.0099598986160859e-05, - "loss": 1.2369, + "epoch": 4.99, + "learning_rate": 1.5008000000000001e-05, + "loss": 1.1409, "step": 624000 }, { - "epoch": 1.49, - "learning_rate": 1.0091665972527976e-05, - "loss": 1.2349, + "epoch": 5.0, + "learning_rate": 1.5004e-05, + "loss": 1.138, "step": 624500 }, { - "epoch": 1.49, - "learning_rate": 1.0083732958895092e-05, - "loss": 1.2275, + "epoch": 5.0, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.135, "step": 625000 }, { - "epoch": 1.49, - "learning_rate": 1.0075799945262206e-05, - "loss": 1.2324, - "step": 625500 - }, - { - "epoch": 1.49, - "learning_rate": 1.0067866931629322e-05, - "loss": 1.2136, - "step": 626000 - }, - { - "epoch": 1.49, - "learning_rate": 1.0059933917996438e-05, - "loss": 1.2246, - "step": 626500 - }, - { - "epoch": 1.49, - "learning_rate": 1.0052000904363555e-05, - "loss": 1.2136, - "step": 627000 - }, - { - "epoch": 1.49, - "learning_rate": 1.0044067890730671e-05, - "loss": 1.2296, - "step": 627500 - }, - { - "epoch": 1.49, - "learning_rate": 1.0036134877097787e-05, - "loss": 1.2207, - "step": 628000 - }, - { - "epoch": 1.5, - "learning_rate": 1.0028201863464903e-05, - "loss": 1.225, - "step": 628500 - }, - { - "epoch": 1.5, - "learning_rate": 1.002026884983202e-05, - "loss": 1.2222, - "step": 629000 - }, - { - "epoch": 1.5, - "learning_rate": 1.0012335836199136e-05, - "loss": 1.227, - "step": 629500 - }, - { - "epoch": 1.5, - "learning_rate": 1.000440282256625e-05, - "loss": 1.2424, - "step": 630000 - }, - { - "epoch": 1.5, - "learning_rate": 9.996469808933368e-06, - "loss": 1.2185, - "step": 630500 - }, - { - "epoch": 1.5, - "learning_rate": 9.988536795300482e-06, - "loss": 1.2252, - "step": 631000 - }, - { - "epoch": 1.5, - "learning_rate": 9.9806037816676e-06, - "loss": 1.2155, - "step": 631500 - }, - { - "epoch": 1.5, - "learning_rate": 9.972670768034716e-06, - "loss": 1.2481, - "step": 632000 - }, - { - "epoch": 1.51, - "learning_rate": 9.964737754401832e-06, - "loss": 1.2385, - "step": 632500 - }, - { - "epoch": 1.51, - "learning_rate": 9.95680474076895e-06, - "loss": 1.2393, - "step": 633000 - }, - { - "epoch": 1.51, - "learning_rate": 9.948871727136063e-06, - "loss": 1.2289, - "step": 633500 - }, - { - "epoch": 1.51, - "learning_rate": 9.94093871350318e-06, - "loss": 1.2412, - "step": 634000 - }, - { - "epoch": 1.51, - "learning_rate": 9.933005699870297e-06, - "loss": 1.2337, - "step": 634500 - }, - { - "epoch": 1.51, - "learning_rate": 9.925072686237413e-06, - "loss": 1.2283, - "step": 635000 - }, - { - "epoch": 1.51, - "learning_rate": 9.917139672604528e-06, - "loss": 1.2191, - "step": 635500 - }, - { - "epoch": 1.51, - "learning_rate": 9.909206658971644e-06, - "loss": 1.2281, - "step": 636000 - }, - { - "epoch": 1.51, - "learning_rate": 9.90127364533876e-06, - "loss": 1.2253, - "step": 636500 - }, - { - "epoch": 1.52, - "learning_rate": 9.893340631705876e-06, - "loss": 1.2368, - "step": 637000 - }, - { - "epoch": 1.52, - "learning_rate": 9.885407618072994e-06, - "loss": 1.221, - "step": 637500 - }, - { - "epoch": 1.52, - "learning_rate": 9.877474604440108e-06, - "loss": 1.2251, - "step": 638000 - }, - { - "epoch": 1.52, - "learning_rate": 9.869541590807224e-06, - "loss": 1.2191, - "step": 638500 - }, - { - "epoch": 1.52, - "learning_rate": 9.861608577174341e-06, - "loss": 1.2252, - "step": 639000 - }, - { - "epoch": 1.52, - "learning_rate": 9.853675563541457e-06, - "loss": 1.2376, - "step": 639500 - }, - { - "epoch": 1.52, - "learning_rate": 9.845742549908573e-06, - "loss": 1.2339, - "step": 640000 - }, - { - "epoch": 1.52, - "learning_rate": 9.837809536275689e-06, - "loss": 1.2308, - "step": 640500 - }, - { - "epoch": 1.53, - "learning_rate": 9.829876522642805e-06, - "loss": 1.2171, - "step": 641000 - }, - { - "epoch": 1.53, - "learning_rate": 9.82194350900992e-06, - "loss": 1.224, - "step": 641500 - }, - { - "epoch": 1.53, - "learning_rate": 9.814010495377038e-06, - "loss": 1.2415, - "step": 642000 - }, - { - "epoch": 1.53, - "learning_rate": 9.806077481744152e-06, - "loss": 1.2177, - "step": 642500 - }, - { - "epoch": 1.53, - "learning_rate": 9.79814446811127e-06, - "loss": 1.2285, - "step": 643000 - }, - { - "epoch": 1.53, - "learning_rate": 9.790211454478386e-06, - "loss": 1.2279, - "step": 643500 - }, - { - "epoch": 1.53, - "learning_rate": 9.782278440845502e-06, - "loss": 1.223, - "step": 644000 - }, - { - "epoch": 1.53, - "learning_rate": 9.774345427212617e-06, - "loss": 1.2278, - "step": 644500 - }, - { - "epoch": 1.54, - "learning_rate": 9.766412413579733e-06, - "loss": 1.2226, - "step": 645000 - }, - { - "epoch": 1.54, - "learning_rate": 9.75847939994685e-06, - "loss": 1.2169, - "step": 645500 - }, - { - "epoch": 1.54, - "learning_rate": 9.750546386313967e-06, - "loss": 1.216, - "step": 646000 - }, - { - "epoch": 1.54, - "learning_rate": 9.742613372681081e-06, - "loss": 1.2381, - "step": 646500 - }, - { - "epoch": 1.54, - "learning_rate": 9.734680359048197e-06, - "loss": 1.2404, - "step": 647000 - }, - { - "epoch": 1.54, - "learning_rate": 9.726747345415314e-06, - "loss": 1.2249, - "step": 647500 - }, - { - "epoch": 1.54, - "learning_rate": 9.71881433178243e-06, - "loss": 1.2213, - "step": 648000 - }, - { - "epoch": 1.54, - "learning_rate": 9.710881318149546e-06, - "loss": 1.2233, - "step": 648500 - }, - { - "epoch": 1.54, - "learning_rate": 9.702948304516662e-06, - "loss": 1.2286, - "step": 649000 - }, - { - "epoch": 1.55, - "learning_rate": 9.695015290883778e-06, - "loss": 1.2079, - "step": 649500 - }, - { - "epoch": 1.55, - "learning_rate": 9.687082277250894e-06, - "loss": 1.2364, - "step": 650000 - }, - { - "epoch": 1.55, - "learning_rate": 9.679149263618011e-06, - "loss": 1.2281, - "step": 650500 - }, - { - "epoch": 1.55, - "learning_rate": 9.671216249985125e-06, - "loss": 1.2287, - "step": 651000 - }, - { - "epoch": 1.55, - "learning_rate": 9.663283236352243e-06, - "loss": 1.2199, - "step": 651500 - }, - { - "epoch": 1.55, - "learning_rate": 9.655350222719359e-06, - "loss": 1.2197, - "step": 652000 - }, - { - "epoch": 1.55, - "learning_rate": 9.647417209086475e-06, - "loss": 1.2189, - "step": 652500 - }, - { - "epoch": 1.55, - "learning_rate": 9.63948419545359e-06, - "loss": 1.2235, - "step": 653000 - }, - { - "epoch": 1.56, - "learning_rate": 9.631551181820706e-06, - "loss": 1.2138, - "step": 653500 - }, - { - "epoch": 1.56, - "learning_rate": 9.623618168187822e-06, - "loss": 1.2133, - "step": 654000 - }, - { - "epoch": 1.56, - "learning_rate": 9.61568515455494e-06, - "loss": 1.2297, - "step": 654500 - }, - { - "epoch": 1.56, - "learning_rate": 9.607752140922056e-06, - "loss": 1.222, - "step": 655000 - }, - { - "epoch": 1.56, - "learning_rate": 9.59981912728917e-06, - "loss": 1.2225, - "step": 655500 - }, - { - "epoch": 1.56, - "learning_rate": 9.591886113656287e-06, - "loss": 1.214, - "step": 656000 - }, - { - "epoch": 1.56, - "learning_rate": 9.583953100023403e-06, - "loss": 1.2258, - "step": 656500 - }, - { - "epoch": 1.56, - "learning_rate": 9.576020086390519e-06, - "loss": 1.2062, - "step": 657000 - }, - { - "epoch": 1.56, - "learning_rate": 9.568087072757635e-06, - "loss": 1.2271, - "step": 657500 - }, - { - "epoch": 1.57, - "learning_rate": 9.560154059124751e-06, - "loss": 1.2179, - "step": 658000 - }, - { - "epoch": 1.57, - "learning_rate": 9.552221045491867e-06, - "loss": 1.2252, - "step": 658500 - }, - { - "epoch": 1.57, - "learning_rate": 9.544288031858984e-06, - "loss": 1.2442, - "step": 659000 - }, - { - "epoch": 1.57, - "learning_rate": 9.5363550182261e-06, - "loss": 1.2145, - "step": 659500 - }, - { - "epoch": 1.57, - "learning_rate": 9.528422004593216e-06, - "loss": 1.2347, - "step": 660000 - }, - { - "epoch": 1.57, - "learning_rate": 9.520488990960332e-06, - "loss": 1.2252, - "step": 660500 - }, - { - "epoch": 1.57, - "learning_rate": 9.512555977327448e-06, - "loss": 1.2191, - "step": 661000 - }, - { - "epoch": 1.57, - "learning_rate": 9.504622963694564e-06, - "loss": 1.2274, - "step": 661500 - }, - { - "epoch": 1.58, - "learning_rate": 9.496689950061681e-06, - "loss": 1.2219, - "step": 662000 - }, - { - "epoch": 1.58, - "learning_rate": 9.488756936428795e-06, - "loss": 1.2181, - "step": 662500 - }, - { - "epoch": 1.58, - "learning_rate": 9.480823922795911e-06, - "loss": 1.2131, - "step": 663000 - }, - { - "epoch": 1.58, - "learning_rate": 9.472890909163029e-06, - "loss": 1.2294, - "step": 663500 - }, - { - "epoch": 1.58, - "learning_rate": 9.464957895530145e-06, - "loss": 1.2429, - "step": 664000 - }, - { - "epoch": 1.58, - "learning_rate": 9.45702488189726e-06, - "loss": 1.2208, - "step": 664500 - }, - { - "epoch": 1.58, - "learning_rate": 9.449091868264376e-06, - "loss": 1.2231, - "step": 665000 - }, - { - "epoch": 1.58, - "learning_rate": 9.441158854631492e-06, - "loss": 1.2108, - "step": 665500 - }, - { - "epoch": 1.59, - "learning_rate": 9.433225840998608e-06, - "loss": 1.2158, - "step": 666000 - }, - { - "epoch": 1.59, - "learning_rate": 9.425292827365726e-06, - "loss": 1.2191, - "step": 666500 - }, - { - "epoch": 1.59, - "learning_rate": 9.41735981373284e-06, - "loss": 1.2236, - "step": 667000 - }, - { - "epoch": 1.59, - "learning_rate": 9.409426800099957e-06, - "loss": 1.2326, - "step": 667500 - }, - { - "epoch": 1.59, - "learning_rate": 9.401493786467073e-06, - "loss": 1.2071, - "step": 668000 - }, - { - "epoch": 1.59, - "learning_rate": 9.393560772834189e-06, - "loss": 1.2211, - "step": 668500 - }, - { - "epoch": 1.59, - "learning_rate": 9.385627759201305e-06, - "loss": 1.2259, - "step": 669000 - }, - { - "epoch": 1.59, - "learning_rate": 9.377694745568421e-06, - "loss": 1.2159, - "step": 669500 - }, - { - "epoch": 1.59, - "learning_rate": 9.369761731935537e-06, - "loss": 1.2172, - "step": 670000 - }, - { - "epoch": 1.6, - "learning_rate": 9.361828718302654e-06, - "loss": 1.2237, - "step": 670500 - }, - { - "epoch": 1.6, - "learning_rate": 9.353895704669768e-06, - "loss": 1.2084, - "step": 671000 - }, - { - "epoch": 1.6, - "learning_rate": 9.345962691036884e-06, - "loss": 1.2226, - "step": 671500 - }, - { - "epoch": 1.6, - "learning_rate": 9.338029677404002e-06, - "loss": 1.2274, - "step": 672000 - }, - { - "epoch": 1.6, - "learning_rate": 9.330096663771118e-06, - "loss": 1.2333, - "step": 672500 - }, - { - "epoch": 1.6, - "learning_rate": 9.322163650138234e-06, - "loss": 1.2306, - "step": 673000 - }, - { - "epoch": 1.6, - "learning_rate": 9.31423063650535e-06, - "loss": 1.2133, - "step": 673500 - }, - { - "epoch": 1.6, - "learning_rate": 9.306297622872465e-06, - "loss": 1.2164, - "step": 674000 - }, - { - "epoch": 1.61, - "learning_rate": 9.298364609239581e-06, - "loss": 1.2292, - "step": 674500 - }, - { - "epoch": 1.61, - "learning_rate": 9.290431595606699e-06, - "loss": 1.2242, - "step": 675000 - }, - { - "epoch": 1.61, - "learning_rate": 9.282498581973813e-06, - "loss": 1.2099, - "step": 675500 - }, - { - "epoch": 1.61, - "learning_rate": 9.27456556834093e-06, - "loss": 1.234, - "step": 676000 - }, - { - "epoch": 1.61, - "learning_rate": 9.266632554708046e-06, - "loss": 1.216, - "step": 676500 - }, - { - "epoch": 1.61, - "learning_rate": 9.258699541075162e-06, - "loss": 1.2322, - "step": 677000 - }, - { - "epoch": 1.61, - "learning_rate": 9.250766527442278e-06, - "loss": 1.2198, - "step": 677500 - }, - { - "epoch": 1.61, - "learning_rate": 9.242833513809394e-06, - "loss": 1.217, - "step": 678000 - }, - { - "epoch": 1.61, - "learning_rate": 9.23490050017651e-06, - "loss": 1.2182, - "step": 678500 - }, - { - "epoch": 1.62, - "learning_rate": 9.226967486543626e-06, - "loss": 1.2326, - "step": 679000 - }, - { - "epoch": 1.62, - "learning_rate": 9.219034472910743e-06, - "loss": 1.2211, - "step": 679500 - }, - { - "epoch": 1.62, - "learning_rate": 9.211101459277857e-06, - "loss": 1.2206, - "step": 680000 - }, - { - "epoch": 1.62, - "learning_rate": 9.203168445644975e-06, - "loss": 1.2312, - "step": 680500 - }, - { - "epoch": 1.62, - "learning_rate": 9.19523543201209e-06, - "loss": 1.2136, - "step": 681000 - }, - { - "epoch": 1.62, - "learning_rate": 9.187302418379207e-06, - "loss": 1.2123, - "step": 681500 - }, - { - "epoch": 1.62, - "learning_rate": 9.179369404746323e-06, - "loss": 1.2244, - "step": 682000 - }, - { - "epoch": 1.62, - "learning_rate": 9.171436391113438e-06, - "loss": 1.2188, - "step": 682500 - }, - { - "epoch": 1.63, - "learning_rate": 9.163503377480554e-06, - "loss": 1.2209, - "step": 683000 - }, - { - "epoch": 1.63, - "learning_rate": 9.155570363847672e-06, - "loss": 1.2239, - "step": 683500 - }, - { - "epoch": 1.63, - "learning_rate": 9.147637350214788e-06, - "loss": 1.2193, - "step": 684000 - }, - { - "epoch": 1.63, - "learning_rate": 9.139704336581902e-06, - "loss": 1.2076, - "step": 684500 - }, - { - "epoch": 1.63, - "learning_rate": 9.13177132294902e-06, - "loss": 1.2177, - "step": 685000 - }, - { - "epoch": 1.63, - "learning_rate": 9.123838309316135e-06, - "loss": 1.2086, - "step": 685500 - }, - { - "epoch": 1.63, - "learning_rate": 9.115905295683251e-06, - "loss": 1.2029, - "step": 686000 - }, - { - "epoch": 1.63, - "learning_rate": 9.107972282050369e-06, - "loss": 1.2213, - "step": 686500 - }, - { - "epoch": 1.63, - "learning_rate": 9.100039268417483e-06, - "loss": 1.2117, - "step": 687000 - }, - { - "epoch": 1.64, - "learning_rate": 9.092106254784599e-06, - "loss": 1.2133, - "step": 687500 - }, - { - "epoch": 1.64, - "learning_rate": 9.084173241151716e-06, - "loss": 1.2118, - "step": 688000 - }, - { - "epoch": 1.64, - "learning_rate": 9.076240227518832e-06, - "loss": 1.2065, - "step": 688500 - }, - { - "epoch": 1.64, - "learning_rate": 9.068307213885948e-06, - "loss": 1.2298, - "step": 689000 - }, - { - "epoch": 1.64, - "learning_rate": 9.060374200253064e-06, - "loss": 1.2306, - "step": 689500 - }, - { - "epoch": 1.64, - "learning_rate": 9.05244118662018e-06, - "loss": 1.2198, - "step": 690000 - }, - { - "epoch": 1.64, - "learning_rate": 9.044508172987296e-06, - "loss": 1.1961, - "step": 690500 - }, - { - "epoch": 1.64, - "learning_rate": 9.036575159354412e-06, - "loss": 1.2213, - "step": 691000 - }, - { - "epoch": 1.65, - "learning_rate": 9.028642145721527e-06, - "loss": 1.2063, - "step": 691500 - }, - { - "epoch": 1.65, - "learning_rate": 9.020709132088645e-06, - "loss": 1.2194, - "step": 692000 - }, - { - "epoch": 1.65, - "learning_rate": 9.01277611845576e-06, - "loss": 1.2069, - "step": 692500 - }, - { - "epoch": 1.65, - "learning_rate": 9.004843104822877e-06, - "loss": 1.2097, - "step": 693000 - }, - { - "epoch": 1.65, - "learning_rate": 8.996910091189993e-06, - "loss": 1.239, - "step": 693500 - }, - { - "epoch": 1.65, - "learning_rate": 8.988977077557108e-06, - "loss": 1.2312, - "step": 694000 - }, - { - "epoch": 1.65, - "learning_rate": 8.981044063924224e-06, - "loss": 1.2149, - "step": 694500 - }, - { - "epoch": 1.65, - "learning_rate": 8.973111050291342e-06, - "loss": 1.2192, - "step": 695000 - }, - { - "epoch": 1.66, - "learning_rate": 8.965178036658456e-06, - "loss": 1.212, - "step": 695500 - }, - { - "epoch": 1.66, - "learning_rate": 8.957245023025572e-06, - "loss": 1.2026, - "step": 696000 - }, - { - "epoch": 1.66, - "learning_rate": 8.94931200939269e-06, - "loss": 1.2288, - "step": 696500 - }, - { - "epoch": 1.66, - "learning_rate": 8.941378995759805e-06, - "loss": 1.2238, - "step": 697000 - }, - { - "epoch": 1.66, - "learning_rate": 8.933445982126921e-06, - "loss": 1.2216, - "step": 697500 - }, - { - "epoch": 1.66, - "learning_rate": 8.925512968494037e-06, - "loss": 1.2042, - "step": 698000 - }, - { - "epoch": 1.66, - "learning_rate": 8.917579954861153e-06, - "loss": 1.2156, - "step": 698500 - }, - { - "epoch": 1.66, - "learning_rate": 8.909646941228269e-06, - "loss": 1.2029, - "step": 699000 - }, - { - "epoch": 1.66, - "learning_rate": 8.901713927595386e-06, - "loss": 1.2186, - "step": 699500 - }, - { - "epoch": 1.67, - "learning_rate": 8.8937809139625e-06, - "loss": 1.2116, - "step": 700000 - }, - { - "epoch": 1.67, - "learning_rate": 8.885847900329618e-06, - "loss": 1.2051, - "step": 700500 - }, - { - "epoch": 1.67, - "learning_rate": 8.877914886696734e-06, - "loss": 1.2052, - "step": 701000 - }, - { - "epoch": 1.67, - "learning_rate": 8.86998187306385e-06, - "loss": 1.2049, - "step": 701500 - }, - { - "epoch": 1.67, - "learning_rate": 8.862048859430966e-06, - "loss": 1.2096, - "step": 702000 - }, - { - "epoch": 1.67, - "learning_rate": 8.854115845798081e-06, - "loss": 1.1989, - "step": 702500 - }, - { - "epoch": 1.67, - "learning_rate": 8.846182832165197e-06, - "loss": 1.2356, - "step": 703000 - }, - { - "epoch": 1.67, - "learning_rate": 8.838249818532313e-06, - "loss": 1.2042, - "step": 703500 - }, - { - "epoch": 1.68, - "learning_rate": 8.83031680489943e-06, - "loss": 1.2152, - "step": 704000 - }, - { - "epoch": 1.68, - "learning_rate": 8.822383791266545e-06, - "loss": 1.2113, - "step": 704500 - }, - { - "epoch": 1.68, - "learning_rate": 8.814450777633663e-06, - "loss": 1.218, - "step": 705000 - }, - { - "epoch": 1.68, - "learning_rate": 8.806517764000778e-06, - "loss": 1.2144, - "step": 705500 - }, - { - "epoch": 1.68, - "learning_rate": 8.798584750367894e-06, - "loss": 1.1974, - "step": 706000 - }, - { - "epoch": 1.68, - "learning_rate": 8.79065173673501e-06, - "loss": 1.2119, - "step": 706500 - }, - { - "epoch": 1.68, - "learning_rate": 8.782718723102126e-06, - "loss": 1.2106, - "step": 707000 - }, - { - "epoch": 1.68, - "learning_rate": 8.774785709469242e-06, - "loss": 1.2054, - "step": 707500 - }, - { - "epoch": 1.68, - "learning_rate": 8.76685269583636e-06, - "loss": 1.2112, - "step": 708000 - }, - { - "epoch": 1.69, - "learning_rate": 8.758919682203475e-06, - "loss": 1.1947, - "step": 708500 - }, - { - "epoch": 1.69, - "learning_rate": 8.75098666857059e-06, - "loss": 1.1973, - "step": 709000 - }, - { - "epoch": 1.69, - "learning_rate": 8.743053654937707e-06, - "loss": 1.2104, - "step": 709500 - }, - { - "epoch": 1.69, - "learning_rate": 8.735120641304823e-06, - "loss": 1.2197, - "step": 710000 - }, - { - "epoch": 1.69, - "learning_rate": 8.727187627671939e-06, - "loss": 1.2159, - "step": 710500 - }, - { - "epoch": 1.69, - "learning_rate": 8.719254614039056e-06, - "loss": 1.2197, - "step": 711000 - }, - { - "epoch": 1.69, - "learning_rate": 8.71132160040617e-06, - "loss": 1.2245, - "step": 711500 - }, - { - "epoch": 1.69, - "learning_rate": 8.703388586773286e-06, - "loss": 1.209, - "step": 712000 - }, - { - "epoch": 1.7, - "learning_rate": 8.695455573140404e-06, - "loss": 1.2146, - "step": 712500 - }, - { - "epoch": 1.7, - "learning_rate": 8.68752255950752e-06, - "loss": 1.2209, - "step": 713000 - }, - { - "epoch": 1.7, - "learning_rate": 8.679589545874636e-06, - "loss": 1.2176, - "step": 713500 - }, - { - "epoch": 1.7, - "learning_rate": 8.671656532241751e-06, - "loss": 1.1941, - "step": 714000 - }, - { - "epoch": 1.7, - "learning_rate": 8.663723518608867e-06, - "loss": 1.2102, - "step": 714500 - }, - { - "epoch": 1.7, - "learning_rate": 8.655790504975983e-06, - "loss": 1.211, - "step": 715000 - }, - { - "epoch": 1.7, - "learning_rate": 8.647857491343099e-06, - "loss": 1.2402, - "step": 715500 - }, - { - "epoch": 1.7, - "learning_rate": 8.639924477710215e-06, - "loss": 1.2039, - "step": 716000 - }, - { - "epoch": 1.71, - "learning_rate": 8.631991464077332e-06, - "loss": 1.2133, - "step": 716500 - }, - { - "epoch": 1.71, - "learning_rate": 8.624058450444448e-06, - "loss": 1.2174, - "step": 717000 - }, - { - "epoch": 1.71, - "learning_rate": 8.616125436811564e-06, - "loss": 1.2189, - "step": 717500 - }, - { - "epoch": 1.71, - "learning_rate": 8.60819242317868e-06, - "loss": 1.2109, - "step": 718000 - }, - { - "epoch": 1.71, - "learning_rate": 8.600259409545796e-06, - "loss": 1.2058, - "step": 718500 - }, - { - "epoch": 1.71, - "learning_rate": 8.592326395912912e-06, - "loss": 1.2209, - "step": 719000 - }, - { - "epoch": 1.71, - "learning_rate": 8.584393382280028e-06, - "loss": 1.2154, - "step": 719500 - }, - { - "epoch": 1.71, - "learning_rate": 8.576460368647144e-06, - "loss": 1.2254, - "step": 720000 - }, - { - "epoch": 1.71, - "learning_rate": 8.56852735501426e-06, - "loss": 1.2102, - "step": 720500 - }, - { - "epoch": 1.72, - "learning_rate": 8.560594341381377e-06, - "loss": 1.2163, - "step": 721000 - }, - { - "epoch": 1.72, - "learning_rate": 8.552661327748493e-06, - "loss": 1.2094, - "step": 721500 - }, - { - "epoch": 1.72, - "learning_rate": 8.544728314115609e-06, - "loss": 1.1982, - "step": 722000 - }, - { - "epoch": 1.72, - "learning_rate": 8.536795300482725e-06, - "loss": 1.2143, - "step": 722500 - }, - { - "epoch": 1.72, - "learning_rate": 8.52886228684984e-06, - "loss": 1.2127, - "step": 723000 - }, - { - "epoch": 1.72, - "learning_rate": 8.520929273216956e-06, - "loss": 1.2106, - "step": 723500 - }, - { - "epoch": 1.72, - "learning_rate": 8.512996259584074e-06, - "loss": 1.2127, - "step": 724000 - }, - { - "epoch": 1.72, - "learning_rate": 8.505063245951188e-06, - "loss": 1.2172, - "step": 724500 - }, - { - "epoch": 1.73, - "learning_rate": 8.497130232318304e-06, - "loss": 1.2115, - "step": 725000 - }, - { - "epoch": 1.73, - "learning_rate": 8.489197218685421e-06, - "loss": 1.1987, - "step": 725500 - }, - { - "epoch": 1.73, - "learning_rate": 8.481264205052537e-06, - "loss": 1.217, - "step": 726000 - }, - { - "epoch": 1.73, - "learning_rate": 8.473331191419653e-06, - "loss": 1.2072, - "step": 726500 - }, - { - "epoch": 1.73, - "learning_rate": 8.465398177786769e-06, - "loss": 1.2258, - "step": 727000 - }, - { - "epoch": 1.73, - "learning_rate": 8.457465164153885e-06, - "loss": 1.197, - "step": 727500 - }, - { - "epoch": 1.73, - "learning_rate": 8.449532150521e-06, - "loss": 1.2187, - "step": 728000 - }, - { - "epoch": 1.73, - "learning_rate": 8.441599136888118e-06, - "loss": 1.1952, - "step": 728500 - }, - { - "epoch": 1.73, - "learning_rate": 8.433666123255232e-06, - "loss": 1.1982, - "step": 729000 - }, - { - "epoch": 1.74, - "learning_rate": 8.42573310962235e-06, - "loss": 1.2218, - "step": 729500 - }, - { - "epoch": 1.74, - "learning_rate": 8.417800095989466e-06, - "loss": 1.214, - "step": 730000 - }, - { - "epoch": 1.74, - "learning_rate": 8.409867082356582e-06, - "loss": 1.2218, - "step": 730500 - }, - { - "epoch": 1.74, - "learning_rate": 8.401934068723698e-06, - "loss": 1.1943, - "step": 731000 - }, - { - "epoch": 1.74, - "learning_rate": 8.394001055090814e-06, - "loss": 1.2029, - "step": 731500 - }, - { - "epoch": 1.74, - "learning_rate": 8.38606804145793e-06, - "loss": 1.1989, - "step": 732000 - }, - { - "epoch": 1.74, - "learning_rate": 8.378135027825047e-06, - "loss": 1.2144, - "step": 732500 - }, - { - "epoch": 1.74, - "learning_rate": 8.370202014192163e-06, - "loss": 1.2149, - "step": 733000 - }, - { - "epoch": 1.75, - "learning_rate": 8.362269000559277e-06, - "loss": 1.1873, - "step": 733500 - }, - { - "epoch": 1.75, - "learning_rate": 8.354335986926395e-06, - "loss": 1.1982, - "step": 734000 - }, - { - "epoch": 1.75, - "learning_rate": 8.34640297329351e-06, - "loss": 1.1941, - "step": 734500 - }, - { - "epoch": 1.75, - "learning_rate": 8.338469959660626e-06, - "loss": 1.201, - "step": 735000 - }, - { - "epoch": 1.75, - "learning_rate": 8.330536946027742e-06, - "loss": 1.2132, - "step": 735500 - }, - { - "epoch": 1.75, - "learning_rate": 8.322603932394858e-06, - "loss": 1.2072, - "step": 736000 - }, - { - "epoch": 1.75, - "learning_rate": 8.314670918761974e-06, - "loss": 1.2003, - "step": 736500 - }, - { - "epoch": 1.75, - "learning_rate": 8.306737905129091e-06, - "loss": 1.2099, - "step": 737000 - }, - { - "epoch": 1.76, - "learning_rate": 8.298804891496207e-06, - "loss": 1.2126, - "step": 737500 - }, - { - "epoch": 1.76, - "learning_rate": 8.290871877863323e-06, - "loss": 1.2085, - "step": 738000 - }, - { - "epoch": 1.76, - "learning_rate": 8.282938864230439e-06, - "loss": 1.208, - "step": 738500 - }, - { - "epoch": 1.76, - "learning_rate": 8.275005850597555e-06, - "loss": 1.2003, - "step": 739000 - }, - { - "epoch": 1.76, - "learning_rate": 8.26707283696467e-06, - "loss": 1.2053, - "step": 739500 - }, - { - "epoch": 1.76, - "learning_rate": 8.259139823331787e-06, - "loss": 1.2025, - "step": 740000 - }, - { - "epoch": 1.76, - "learning_rate": 8.251206809698902e-06, - "loss": 1.2121, - "step": 740500 - }, - { - "epoch": 1.76, - "learning_rate": 8.24327379606602e-06, - "loss": 1.2019, - "step": 741000 - }, - { - "epoch": 1.76, - "learning_rate": 8.235340782433136e-06, - "loss": 1.2121, - "step": 741500 - }, - { - "epoch": 1.77, - "learning_rate": 8.227407768800252e-06, - "loss": 1.2208, - "step": 742000 - }, - { - "epoch": 1.77, - "learning_rate": 8.219474755167368e-06, - "loss": 1.2161, - "step": 742500 - }, - { - "epoch": 1.77, - "learning_rate": 8.211541741534483e-06, - "loss": 1.2203, - "step": 743000 - }, - { - "epoch": 1.77, - "learning_rate": 8.2036087279016e-06, - "loss": 1.2102, - "step": 743500 - }, - { - "epoch": 1.77, - "learning_rate": 8.195675714268715e-06, - "loss": 1.2069, - "step": 744000 - }, - { - "epoch": 1.77, - "learning_rate": 8.187742700635831e-06, - "loss": 1.2107, - "step": 744500 - }, - { - "epoch": 1.77, - "learning_rate": 8.179809687002947e-06, - "loss": 1.196, - "step": 745000 - }, - { - "epoch": 1.77, - "learning_rate": 8.171876673370064e-06, - "loss": 1.1884, - "step": 745500 - }, - { - "epoch": 1.78, - "learning_rate": 8.16394365973718e-06, - "loss": 1.2029, - "step": 746000 - }, - { - "epoch": 1.78, - "learning_rate": 8.156010646104296e-06, - "loss": 1.2077, - "step": 746500 - }, - { - "epoch": 1.78, - "learning_rate": 8.148077632471412e-06, - "loss": 1.2021, - "step": 747000 - }, - { - "epoch": 1.78, - "learning_rate": 8.140144618838528e-06, - "loss": 1.2097, - "step": 747500 - }, - { - "epoch": 1.78, - "learning_rate": 8.132211605205644e-06, - "loss": 1.2107, - "step": 748000 - }, - { - "epoch": 1.78, - "learning_rate": 8.124278591572761e-06, - "loss": 1.2107, - "step": 748500 - }, - { - "epoch": 1.78, - "learning_rate": 8.116345577939876e-06, - "loss": 1.2031, - "step": 749000 - }, - { - "epoch": 1.78, - "learning_rate": 8.108412564306991e-06, - "loss": 1.215, - "step": 749500 - }, - { - "epoch": 1.78, - "learning_rate": 8.100479550674109e-06, - "loss": 1.2002, - "step": 750000 - }, - { - "epoch": 1.79, - "learning_rate": 8.092546537041225e-06, - "loss": 1.2135, - "step": 750500 - }, - { - "epoch": 1.79, - "learning_rate": 8.08461352340834e-06, - "loss": 1.196, - "step": 751000 - }, - { - "epoch": 1.79, - "learning_rate": 8.076680509775457e-06, - "loss": 1.2067, - "step": 751500 - }, - { - "epoch": 1.79, - "learning_rate": 8.068747496142572e-06, - "loss": 1.2048, - "step": 752000 - }, - { - "epoch": 1.79, - "learning_rate": 8.060814482509688e-06, - "loss": 1.2176, - "step": 752500 - }, - { - "epoch": 1.79, - "learning_rate": 8.052881468876806e-06, - "loss": 1.2126, - "step": 753000 - }, - { - "epoch": 1.79, - "learning_rate": 8.04494845524392e-06, - "loss": 1.2093, - "step": 753500 - }, - { - "epoch": 1.79, - "learning_rate": 8.037015441611038e-06, - "loss": 1.206, - "step": 754000 - }, - { - "epoch": 1.8, - "learning_rate": 8.029082427978153e-06, - "loss": 1.1969, - "step": 754500 - }, - { - "epoch": 1.8, - "learning_rate": 8.02114941434527e-06, - "loss": 1.2162, - "step": 755000 - }, - { - "epoch": 1.8, - "learning_rate": 8.013216400712385e-06, - "loss": 1.1944, - "step": 755500 - }, - { - "epoch": 1.8, - "learning_rate": 8.005283387079501e-06, - "loss": 1.2028, - "step": 756000 - }, - { - "epoch": 1.8, - "learning_rate": 7.997350373446617e-06, - "loss": 1.1856, - "step": 756500 - }, - { - "epoch": 1.8, - "learning_rate": 7.989417359813734e-06, - "loss": 1.221, - "step": 757000 - }, - { - "epoch": 1.8, - "learning_rate": 7.98148434618085e-06, - "loss": 1.1955, - "step": 757500 - }, - { - "epoch": 1.8, - "learning_rate": 7.973551332547965e-06, - "loss": 1.1967, - "step": 758000 - }, - { - "epoch": 1.81, - "learning_rate": 7.965618318915082e-06, - "loss": 1.2161, - "step": 758500 - }, - { - "epoch": 1.81, - "learning_rate": 7.957685305282198e-06, - "loss": 1.203, - "step": 759000 - }, - { - "epoch": 1.81, - "learning_rate": 7.949752291649314e-06, - "loss": 1.2127, - "step": 759500 - }, - { - "epoch": 1.81, - "learning_rate": 7.94181927801643e-06, - "loss": 1.2106, - "step": 760000 - }, - { - "epoch": 1.81, - "learning_rate": 7.933886264383546e-06, - "loss": 1.2082, - "step": 760500 - }, - { - "epoch": 1.81, - "learning_rate": 7.925953250750661e-06, - "loss": 1.2047, - "step": 761000 - }, - { - "epoch": 1.81, - "learning_rate": 7.918020237117779e-06, - "loss": 1.1979, - "step": 761500 - }, - { - "epoch": 1.81, - "learning_rate": 7.910087223484895e-06, - "loss": 1.1888, - "step": 762000 - }, - { - "epoch": 1.81, - "learning_rate": 7.90215420985201e-06, - "loss": 1.2056, - "step": 762500 - }, - { - "epoch": 1.82, - "learning_rate": 7.894221196219127e-06, - "loss": 1.2081, - "step": 763000 - }, - { - "epoch": 1.82, - "learning_rate": 7.886288182586242e-06, - "loss": 1.183, - "step": 763500 - }, - { - "epoch": 1.82, - "learning_rate": 7.878355168953358e-06, - "loss": 1.1933, - "step": 764000 - }, - { - "epoch": 1.82, - "learning_rate": 7.870422155320474e-06, - "loss": 1.1922, - "step": 764500 - }, - { - "epoch": 1.82, - "learning_rate": 7.86248914168759e-06, - "loss": 1.2201, - "step": 765000 - }, - { - "epoch": 1.82, - "learning_rate": 7.854556128054706e-06, - "loss": 1.1971, - "step": 765500 - }, - { - "epoch": 1.82, - "learning_rate": 7.846623114421823e-06, - "loss": 1.205, - "step": 766000 - }, - { - "epoch": 1.82, - "learning_rate": 7.83869010078894e-06, - "loss": 1.2127, - "step": 766500 - }, - { - "epoch": 1.83, - "learning_rate": 7.830757087156055e-06, - "loss": 1.1925, - "step": 767000 - }, - { - "epoch": 1.83, - "learning_rate": 7.822824073523171e-06, - "loss": 1.2001, - "step": 767500 - }, - { - "epoch": 1.83, - "learning_rate": 7.814891059890287e-06, - "loss": 1.2059, - "step": 768000 - }, - { - "epoch": 1.83, - "learning_rate": 7.806958046257403e-06, - "loss": 1.1868, - "step": 768500 - }, - { - "epoch": 1.83, - "learning_rate": 7.799025032624519e-06, - "loss": 1.1982, - "step": 769000 - }, - { - "epoch": 1.83, - "learning_rate": 7.791092018991634e-06, - "loss": 1.1957, - "step": 769500 - }, - { - "epoch": 1.83, - "learning_rate": 7.783159005358752e-06, - "loss": 1.2069, - "step": 770000 - }, - { - "epoch": 1.83, - "learning_rate": 7.775225991725868e-06, - "loss": 1.191, - "step": 770500 - }, - { - "epoch": 1.83, - "learning_rate": 7.767292978092984e-06, - "loss": 1.2078, - "step": 771000 - }, - { - "epoch": 1.84, - "learning_rate": 7.7593599644601e-06, - "loss": 1.1973, - "step": 771500 - }, - { - "epoch": 1.84, - "learning_rate": 7.751426950827216e-06, - "loss": 1.1992, - "step": 772000 - }, - { - "epoch": 1.84, - "learning_rate": 7.743493937194331e-06, - "loss": 1.1951, - "step": 772500 - }, - { - "epoch": 1.84, - "learning_rate": 7.735560923561449e-06, - "loss": 1.1879, - "step": 773000 - }, - { - "epoch": 1.84, - "learning_rate": 7.727627909928563e-06, - "loss": 1.1847, - "step": 773500 - }, - { - "epoch": 1.84, - "learning_rate": 7.719694896295679e-06, - "loss": 1.2002, - "step": 774000 - }, - { - "epoch": 1.84, - "learning_rate": 7.711761882662797e-06, - "loss": 1.193, - "step": 774500 - }, - { - "epoch": 1.84, - "learning_rate": 7.703828869029912e-06, - "loss": 1.1985, - "step": 775000 - }, - { - "epoch": 1.85, - "learning_rate": 7.695895855397028e-06, - "loss": 1.1993, - "step": 775500 - }, - { - "epoch": 1.85, - "learning_rate": 7.687962841764144e-06, - "loss": 1.2016, - "step": 776000 - }, - { - "epoch": 1.85, - "learning_rate": 7.68002982813126e-06, - "loss": 1.2017, - "step": 776500 - }, - { - "epoch": 1.85, - "learning_rate": 7.672096814498376e-06, - "loss": 1.2132, - "step": 777000 - }, - { - "epoch": 1.85, - "learning_rate": 7.664163800865493e-06, - "loss": 1.2184, - "step": 777500 - }, - { - "epoch": 1.85, - "learning_rate": 7.656230787232608e-06, - "loss": 1.1996, - "step": 778000 - }, - { - "epoch": 1.85, - "learning_rate": 7.648297773599725e-06, - "loss": 1.2067, - "step": 778500 - }, - { - "epoch": 1.85, - "learning_rate": 7.640364759966841e-06, - "loss": 1.1858, - "step": 779000 - }, - { - "epoch": 1.86, - "learning_rate": 7.632431746333957e-06, - "loss": 1.1925, - "step": 779500 - }, - { - "epoch": 1.86, - "learning_rate": 7.6244987327010736e-06, - "loss": 1.1994, - "step": 780000 - }, - { - "epoch": 1.86, - "learning_rate": 7.616565719068189e-06, - "loss": 1.2115, - "step": 780500 - }, - { - "epoch": 1.86, - "learning_rate": 7.6086327054353045e-06, - "loss": 1.2029, - "step": 781000 - }, - { - "epoch": 1.86, - "learning_rate": 7.600699691802421e-06, - "loss": 1.202, - "step": 781500 - }, - { - "epoch": 1.86, - "learning_rate": 7.592766678169538e-06, - "loss": 1.1985, - "step": 782000 - }, - { - "epoch": 1.86, - "learning_rate": 7.584833664536653e-06, - "loss": 1.1924, - "step": 782500 - }, - { - "epoch": 1.86, - "learning_rate": 7.576900650903769e-06, - "loss": 1.199, - "step": 783000 - }, - { - "epoch": 1.86, - "learning_rate": 7.5689676372708855e-06, - "loss": 1.21, - "step": 783500 - }, - { - "epoch": 1.87, - "learning_rate": 7.561034623638001e-06, - "loss": 1.2042, - "step": 784000 - }, - { - "epoch": 1.87, - "learning_rate": 7.553101610005117e-06, - "loss": 1.202, - "step": 784500 - }, - { - "epoch": 1.87, - "learning_rate": 7.545168596372233e-06, - "loss": 1.1943, - "step": 785000 - }, - { - "epoch": 1.87, - "learning_rate": 7.53723558273935e-06, - "loss": 1.1947, - "step": 785500 - }, - { - "epoch": 1.87, - "learning_rate": 7.529302569106466e-06, - "loss": 1.2078, - "step": 786000 - }, - { - "epoch": 1.87, - "learning_rate": 7.521369555473582e-06, - "loss": 1.1961, - "step": 786500 - }, - { - "epoch": 1.87, - "learning_rate": 7.513436541840697e-06, - "loss": 1.1909, - "step": 787000 - }, - { - "epoch": 1.87, - "learning_rate": 7.505503528207814e-06, - "loss": 1.1897, - "step": 787500 - }, - { - "epoch": 1.88, - "learning_rate": 7.49757051457493e-06, - "loss": 1.1851, - "step": 788000 - }, - { - "epoch": 1.88, - "learning_rate": 7.489637500942047e-06, - "loss": 1.1985, - "step": 788500 - }, - { - "epoch": 1.88, - "learning_rate": 7.481704487309162e-06, - "loss": 1.2052, - "step": 789000 - }, - { - "epoch": 1.88, - "learning_rate": 7.4737714736762775e-06, - "loss": 1.1853, - "step": 789500 - }, - { - "epoch": 1.88, - "learning_rate": 7.465838460043394e-06, - "loss": 1.2105, - "step": 790000 - }, - { - "epoch": 1.88, - "learning_rate": 7.45790544641051e-06, - "loss": 1.2175, - "step": 790500 - }, - { - "epoch": 1.88, - "learning_rate": 7.449972432777627e-06, - "loss": 1.1808, - "step": 791000 - }, - { - "epoch": 1.88, - "learning_rate": 7.442039419144742e-06, - "loss": 1.1983, - "step": 791500 - }, - { - "epoch": 1.88, - "learning_rate": 7.4341064055118586e-06, - "loss": 1.2002, - "step": 792000 - }, - { - "epoch": 1.89, - "learning_rate": 7.4261733918789744e-06, - "loss": 1.2018, - "step": 792500 - }, - { - "epoch": 1.89, - "learning_rate": 7.418240378246091e-06, - "loss": 1.2046, - "step": 793000 - }, - { - "epoch": 1.89, - "learning_rate": 7.410307364613206e-06, - "loss": 1.2096, - "step": 793500 - }, - { - "epoch": 1.89, - "learning_rate": 7.402374350980323e-06, - "loss": 1.1899, - "step": 794000 - }, - { - "epoch": 1.89, - "learning_rate": 7.394441337347439e-06, - "loss": 1.2129, - "step": 794500 - }, - { - "epoch": 1.89, - "learning_rate": 7.3865083237145554e-06, - "loss": 1.2009, - "step": 795000 - }, - { - "epoch": 1.89, - "learning_rate": 7.378575310081671e-06, - "loss": 1.1873, - "step": 795500 - }, - { - "epoch": 1.89, - "learning_rate": 7.370642296448786e-06, - "loss": 1.1951, - "step": 796000 - }, - { - "epoch": 1.9, - "learning_rate": 7.362709282815903e-06, - "loss": 1.189, - "step": 796500 - }, - { - "epoch": 1.9, - "learning_rate": 7.35477626918302e-06, - "loss": 1.196, - "step": 797000 - }, - { - "epoch": 1.9, - "learning_rate": 7.346843255550136e-06, - "loss": 1.1971, - "step": 797500 - }, - { - "epoch": 1.9, - "learning_rate": 7.338910241917251e-06, - "loss": 1.1967, - "step": 798000 - }, - { - "epoch": 1.9, - "learning_rate": 7.330977228284367e-06, - "loss": 1.1899, - "step": 798500 - }, - { - "epoch": 1.9, - "learning_rate": 7.323044214651483e-06, - "loss": 1.1989, - "step": 799000 - }, - { - "epoch": 1.9, - "learning_rate": 7.3151112010186e-06, - "loss": 1.2093, - "step": 799500 - }, - { - "epoch": 1.9, - "learning_rate": 7.307178187385716e-06, - "loss": 1.1961, - "step": 800000 - }, - { - "epoch": 1.91, - "learning_rate": 7.299245173752832e-06, - "loss": 1.1913, - "step": 800500 - }, - { - "epoch": 1.91, - "learning_rate": 7.2913121601199475e-06, - "loss": 1.187, - "step": 801000 - }, - { - "epoch": 1.91, - "learning_rate": 7.283379146487064e-06, - "loss": 1.182, - "step": 801500 - }, - { - "epoch": 1.91, - "learning_rate": 7.27544613285418e-06, - "loss": 1.1837, - "step": 802000 - }, - { - "epoch": 1.91, - "learning_rate": 7.267513119221296e-06, - "loss": 1.1905, - "step": 802500 - }, - { - "epoch": 1.91, - "learning_rate": 7.259580105588412e-06, - "loss": 1.1965, - "step": 803000 - }, - { - "epoch": 1.91, - "learning_rate": 7.2516470919555285e-06, - "loss": 1.1928, - "step": 803500 - }, - { - "epoch": 1.91, - "learning_rate": 7.243714078322644e-06, - "loss": 1.1874, - "step": 804000 - }, - { - "epoch": 1.91, - "learning_rate": 7.235781064689759e-06, - "loss": 1.2034, - "step": 804500 - }, - { - "epoch": 1.92, - "learning_rate": 7.227848051056876e-06, - "loss": 1.1998, - "step": 805000 - }, - { - "epoch": 1.92, - "learning_rate": 7.219915037423992e-06, - "loss": 1.1794, - "step": 805500 - }, - { - "epoch": 1.92, - "learning_rate": 7.211982023791109e-06, - "loss": 1.2004, - "step": 806000 - }, - { - "epoch": 1.92, - "learning_rate": 7.204049010158225e-06, - "loss": 1.1982, - "step": 806500 - }, - { - "epoch": 1.92, - "learning_rate": 7.1961159965253404e-06, - "loss": 1.1876, - "step": 807000 - }, - { - "epoch": 1.92, - "learning_rate": 7.188182982892456e-06, - "loss": 1.2054, - "step": 807500 - }, - { - "epoch": 1.92, - "learning_rate": 7.180249969259573e-06, - "loss": 1.2039, - "step": 808000 - }, - { - "epoch": 1.92, - "learning_rate": 7.172316955626689e-06, - "loss": 1.1919, - "step": 808500 - }, - { - "epoch": 1.93, - "learning_rate": 7.164383941993805e-06, - "loss": 1.1786, - "step": 809000 - }, - { - "epoch": 1.93, - "learning_rate": 7.156450928360921e-06, - "loss": 1.1874, - "step": 809500 - }, - { - "epoch": 1.93, - "learning_rate": 7.148517914728037e-06, - "loss": 1.1925, - "step": 810000 - }, - { - "epoch": 1.93, - "learning_rate": 7.140584901095153e-06, - "loss": 1.2053, - "step": 810500 - }, - { - "epoch": 1.93, - "learning_rate": 7.13265188746227e-06, - "loss": 1.2005, - "step": 811000 - }, - { - "epoch": 1.93, - "learning_rate": 7.124718873829385e-06, - "loss": 1.204, - "step": 811500 - }, - { - "epoch": 1.93, - "learning_rate": 7.116785860196501e-06, - "loss": 1.214, - "step": 812000 - }, - { - "epoch": 1.93, - "learning_rate": 7.1088528465636175e-06, - "loss": 1.1993, - "step": 812500 - }, - { - "epoch": 1.93, - "learning_rate": 7.100919832930734e-06, - "loss": 1.1895, - "step": 813000 - }, - { - "epoch": 1.94, - "learning_rate": 7.092986819297849e-06, - "loss": 1.1815, - "step": 813500 - }, - { - "epoch": 1.94, - "learning_rate": 7.085053805664965e-06, - "loss": 1.1974, - "step": 814000 - }, - { - "epoch": 1.94, - "learning_rate": 7.077120792032082e-06, - "loss": 1.1966, - "step": 814500 - }, - { - "epoch": 1.94, - "learning_rate": 7.069187778399198e-06, - "loss": 1.2029, - "step": 815000 - }, - { - "epoch": 1.94, - "learning_rate": 7.061254764766314e-06, - "loss": 1.2016, - "step": 815500 - }, - { - "epoch": 1.94, - "learning_rate": 7.053321751133429e-06, - "loss": 1.191, - "step": 816000 - }, - { - "epoch": 1.94, - "learning_rate": 7.045388737500546e-06, - "loss": 1.1918, - "step": 816500 - }, - { - "epoch": 1.94, - "learning_rate": 7.037455723867662e-06, - "loss": 1.1887, - "step": 817000 - }, - { - "epoch": 1.95, - "learning_rate": 7.029522710234779e-06, - "loss": 1.2043, - "step": 817500 - }, - { - "epoch": 1.95, - "learning_rate": 7.021589696601894e-06, - "loss": 1.1876, - "step": 818000 - }, - { - "epoch": 1.95, - "learning_rate": 7.01365668296901e-06, - "loss": 1.1809, - "step": 818500 - }, - { - "epoch": 1.95, - "learning_rate": 7.005723669336126e-06, - "loss": 1.1993, - "step": 819000 - }, - { - "epoch": 1.95, - "learning_rate": 6.997790655703243e-06, - "loss": 1.1977, - "step": 819500 - }, - { - "epoch": 1.95, - "learning_rate": 6.989857642070359e-06, - "loss": 1.1895, - "step": 820000 - }, - { - "epoch": 1.95, - "learning_rate": 6.981924628437474e-06, - "loss": 1.1873, - "step": 820500 - }, - { - "epoch": 1.95, - "learning_rate": 6.973991614804591e-06, - "loss": 1.1772, - "step": 821000 - }, - { - "epoch": 1.96, - "learning_rate": 6.9660586011717065e-06, - "loss": 1.1851, - "step": 821500 - }, - { - "epoch": 1.96, - "learning_rate": 6.958125587538823e-06, - "loss": 1.1978, - "step": 822000 - }, - { - "epoch": 1.96, - "learning_rate": 6.950192573905938e-06, - "loss": 1.1879, - "step": 822500 - }, - { - "epoch": 1.96, - "learning_rate": 6.942259560273055e-06, - "loss": 1.1934, - "step": 823000 - }, - { - "epoch": 1.96, - "learning_rate": 6.934326546640171e-06, - "loss": 1.1992, - "step": 823500 - }, - { - "epoch": 1.96, - "learning_rate": 6.9263935330072875e-06, - "loss": 1.1991, - "step": 824000 - }, - { - "epoch": 1.96, - "learning_rate": 6.918460519374403e-06, - "loss": 1.2042, - "step": 824500 - }, - { - "epoch": 1.96, - "learning_rate": 6.910527505741519e-06, - "loss": 1.1911, - "step": 825000 - }, - { - "epoch": 1.96, - "learning_rate": 6.902594492108635e-06, - "loss": 1.1863, - "step": 825500 - }, - { - "epoch": 1.97, - "learning_rate": 6.894661478475752e-06, - "loss": 1.1915, - "step": 826000 - }, - { - "epoch": 1.97, - "learning_rate": 6.886728464842868e-06, - "loss": 1.1803, - "step": 826500 - }, - { - "epoch": 1.97, - "learning_rate": 6.878795451209983e-06, - "loss": 1.1952, - "step": 827000 - }, - { - "epoch": 1.97, - "learning_rate": 6.870862437577099e-06, - "loss": 1.178, - "step": 827500 - }, - { - "epoch": 1.97, - "learning_rate": 6.862929423944216e-06, - "loss": 1.1832, - "step": 828000 - }, - { - "epoch": 1.97, - "learning_rate": 6.854996410311332e-06, - "loss": 1.2055, - "step": 828500 - }, - { - "epoch": 1.97, - "learning_rate": 6.847063396678447e-06, - "loss": 1.1833, - "step": 829000 - }, - { - "epoch": 1.97, - "learning_rate": 6.839130383045564e-06, - "loss": 1.1813, - "step": 829500 - }, - { - "epoch": 1.98, - "learning_rate": 6.8311973694126795e-06, - "loss": 1.1941, - "step": 830000 - }, - { - "epoch": 1.98, - "learning_rate": 6.823264355779796e-06, - "loss": 1.1762, - "step": 830500 - }, - { - "epoch": 1.98, - "learning_rate": 6.815331342146912e-06, - "loss": 1.1858, - "step": 831000 - }, - { - "epoch": 1.98, - "learning_rate": 6.807398328514028e-06, - "loss": 1.1913, - "step": 831500 - }, - { - "epoch": 1.98, - "learning_rate": 6.799465314881144e-06, - "loss": 1.1903, - "step": 832000 - }, - { - "epoch": 1.98, - "learning_rate": 6.7915323012482606e-06, - "loss": 1.2029, - "step": 832500 - }, - { - "epoch": 1.98, - "learning_rate": 6.7835992876153764e-06, - "loss": 1.175, - "step": 833000 - }, - { - "epoch": 1.98, - "learning_rate": 6.775666273982492e-06, - "loss": 1.2037, - "step": 833500 - }, - { - "epoch": 1.98, - "learning_rate": 6.767733260349608e-06, - "loss": 1.204, - "step": 834000 - }, - { - "epoch": 1.99, - "learning_rate": 6.759800246716725e-06, - "loss": 1.1814, - "step": 834500 - }, - { - "epoch": 1.99, - "learning_rate": 6.751867233083841e-06, - "loss": 1.1863, - "step": 835000 - }, - { - "epoch": 1.99, - "learning_rate": 6.7439342194509574e-06, - "loss": 1.1878, - "step": 835500 - }, - { - "epoch": 1.99, - "learning_rate": 6.7360012058180725e-06, - "loss": 1.1881, - "step": 836000 - }, - { - "epoch": 1.99, - "learning_rate": 6.728068192185188e-06, - "loss": 1.1876, - "step": 836500 - }, - { - "epoch": 1.99, - "learning_rate": 6.720135178552305e-06, - "loss": 1.1892, - "step": 837000 - }, - { - "epoch": 1.99, - "learning_rate": 6.712202164919422e-06, - "loss": 1.1922, - "step": 837500 - }, - { - "epoch": 1.99, - "learning_rate": 6.704269151286537e-06, - "loss": 1.1981, - "step": 838000 - }, - { - "epoch": 2.0, - "learning_rate": 6.696336137653653e-06, - "loss": 1.1833, - "step": 838500 - }, - { - "epoch": 2.0, - "learning_rate": 6.688403124020769e-06, - "loss": 1.1869, - "step": 839000 - }, - { - "epoch": 2.0, - "learning_rate": 6.680470110387885e-06, - "loss": 1.1834, - "step": 839500 - }, - { - "epoch": 2.0, - "learning_rate": 6.672537096755002e-06, - "loss": 1.1937, - "step": 840000 - }, - { - "epoch": 2.0, - "eval_loss": 1.169049859046936, - "eval_runtime": 3623.1597, - "eval_samples_per_second": 366.405, - "eval_steps_per_second": 22.9, - "step": 840370 - }, - { - "epoch": 2.0, - "learning_rate": 6.664604083122117e-06, - "loss": 1.1996, - "step": 840500 - }, - { - "epoch": 2.0, - "learning_rate": 6.656671069489234e-06, - "loss": 1.1678, - "step": 841000 - }, - { - "epoch": 2.0, - "learning_rate": 6.6487380558563495e-06, - "loss": 1.1902, - "step": 841500 - }, - { - "epoch": 2.0, - "learning_rate": 6.640805042223466e-06, - "loss": 1.1885, - "step": 842000 - }, - { - "epoch": 2.01, - "learning_rate": 6.632872028590581e-06, - "loss": 1.1788, - "step": 842500 - }, - { - "epoch": 2.01, - "learning_rate": 6.624939014957697e-06, - "loss": 1.1884, - "step": 843000 - }, - { - "epoch": 2.01, - "learning_rate": 6.617006001324814e-06, - "loss": 1.1748, - "step": 843500 - }, - { - "epoch": 2.01, - "learning_rate": 6.6090729876919305e-06, - "loss": 1.1931, - "step": 844000 - }, - { - "epoch": 2.01, - "learning_rate": 6.601139974059046e-06, - "loss": 1.1791, - "step": 844500 - }, - { - "epoch": 2.01, - "learning_rate": 6.593206960426161e-06, - "loss": 1.1721, - "step": 845000 - }, - { - "epoch": 2.01, - "learning_rate": 6.585273946793278e-06, - "loss": 1.1864, - "step": 845500 - }, - { - "epoch": 2.01, - "learning_rate": 6.577340933160394e-06, - "loss": 1.1891, - "step": 846000 - }, - { - "epoch": 2.01, - "learning_rate": 6.569407919527511e-06, - "loss": 1.1789, - "step": 846500 - }, - { - "epoch": 2.02, - "learning_rate": 6.561474905894626e-06, - "loss": 1.1773, - "step": 847000 - }, - { - "epoch": 2.02, - "learning_rate": 6.5535418922617424e-06, - "loss": 1.1967, - "step": 847500 - }, - { - "epoch": 2.02, - "learning_rate": 6.545608878628858e-06, - "loss": 1.1974, - "step": 848000 - }, - { - "epoch": 2.02, - "learning_rate": 6.537675864995975e-06, - "loss": 1.1686, - "step": 848500 - }, - { - "epoch": 2.02, - "learning_rate": 6.52974285136309e-06, - "loss": 1.1928, - "step": 849000 - }, - { - "epoch": 2.02, - "learning_rate": 6.521809837730207e-06, - "loss": 1.1865, - "step": 849500 - }, - { - "epoch": 2.02, - "learning_rate": 6.513876824097323e-06, - "loss": 1.1695, - "step": 850000 - }, - { - "epoch": 2.02, - "learning_rate": 6.505943810464439e-06, - "loss": 1.1982, - "step": 850500 - }, - { - "epoch": 2.03, - "learning_rate": 6.498010796831555e-06, - "loss": 1.1748, - "step": 851000 - }, - { - "epoch": 2.03, - "learning_rate": 6.49007778319867e-06, - "loss": 1.1837, - "step": 851500 - }, - { - "epoch": 2.03, - "learning_rate": 6.482144769565787e-06, - "loss": 1.1869, - "step": 852000 - }, - { - "epoch": 2.03, - "learning_rate": 6.474211755932903e-06, - "loss": 1.178, - "step": 852500 - }, - { - "epoch": 2.03, - "learning_rate": 6.4662787423000195e-06, - "loss": 1.1664, - "step": 853000 - }, - { - "epoch": 2.03, - "learning_rate": 6.4583457286671345e-06, - "loss": 1.1672, - "step": 853500 - }, - { - "epoch": 2.03, - "learning_rate": 6.450412715034251e-06, - "loss": 1.1839, - "step": 854000 - }, - { - "epoch": 2.03, - "learning_rate": 6.442479701401367e-06, - "loss": 1.1904, - "step": 854500 - }, - { - "epoch": 2.03, - "learning_rate": 6.434546687768484e-06, - "loss": 1.1913, - "step": 855000 - }, - { - "epoch": 2.04, - "learning_rate": 6.4266136741356e-06, - "loss": 1.1941, - "step": 855500 - }, - { - "epoch": 2.04, - "learning_rate": 6.4186806605027155e-06, - "loss": 1.1875, - "step": 856000 - }, - { - "epoch": 2.04, - "learning_rate": 6.410747646869831e-06, - "loss": 1.1772, - "step": 856500 - }, - { - "epoch": 2.04, - "learning_rate": 6.402814633236948e-06, - "loss": 1.2005, - "step": 857000 - }, - { - "epoch": 2.04, - "learning_rate": 6.394881619604064e-06, - "loss": 1.1729, - "step": 857500 - }, - { - "epoch": 2.04, - "learning_rate": 6.386948605971179e-06, - "loss": 1.163, - "step": 858000 - }, - { - "epoch": 2.04, - "learning_rate": 6.379015592338296e-06, - "loss": 1.1781, - "step": 858500 - }, - { - "epoch": 2.04, - "learning_rate": 6.371082578705412e-06, - "loss": 1.1802, - "step": 859000 - }, - { - "epoch": 2.05, - "learning_rate": 6.363149565072528e-06, - "loss": 1.1739, - "step": 859500 - }, - { - "epoch": 2.05, - "learning_rate": 6.355216551439645e-06, - "loss": 1.1935, - "step": 860000 - }, - { - "epoch": 2.05, - "learning_rate": 6.34728353780676e-06, - "loss": 1.1873, - "step": 860500 - }, - { - "epoch": 2.05, - "learning_rate": 6.339350524173876e-06, - "loss": 1.1752, - "step": 861000 - }, - { - "epoch": 2.05, - "learning_rate": 6.331417510540993e-06, - "loss": 1.1839, - "step": 861500 - }, - { - "epoch": 2.05, - "learning_rate": 6.3234844969081084e-06, - "loss": 1.185, - "step": 862000 - }, - { - "epoch": 2.05, - "learning_rate": 6.315551483275224e-06, - "loss": 1.1716, - "step": 862500 - }, - { - "epoch": 2.05, - "learning_rate": 6.30761846964234e-06, - "loss": 1.1794, - "step": 863000 - }, - { - "epoch": 2.06, - "learning_rate": 6.299685456009457e-06, - "loss": 1.1826, - "step": 863500 - }, - { - "epoch": 2.06, - "learning_rate": 6.291752442376573e-06, - "loss": 1.1728, - "step": 864000 - }, - { - "epoch": 2.06, - "learning_rate": 6.2838194287436895e-06, - "loss": 1.1873, - "step": 864500 - }, - { - "epoch": 2.06, - "learning_rate": 6.2758864151108045e-06, - "loss": 1.1739, - "step": 865000 - }, - { - "epoch": 2.06, - "learning_rate": 6.267953401477921e-06, - "loss": 1.1873, - "step": 865500 - }, - { - "epoch": 2.06, - "learning_rate": 6.260020387845037e-06, - "loss": 1.1794, - "step": 866000 - }, - { - "epoch": 2.06, - "learning_rate": 6.252087374212154e-06, - "loss": 1.175, - "step": 866500 - }, - { - "epoch": 2.06, - "learning_rate": 6.244154360579269e-06, - "loss": 1.1816, - "step": 867000 - }, - { - "epoch": 2.06, - "learning_rate": 6.236221346946385e-06, - "loss": 1.1868, - "step": 867500 - }, - { - "epoch": 2.07, - "learning_rate": 6.228288333313501e-06, - "loss": 1.1741, - "step": 868000 - }, - { - "epoch": 2.07, - "learning_rate": 6.220355319680618e-06, - "loss": 1.1915, - "step": 868500 - }, - { - "epoch": 2.07, - "learning_rate": 6.212422306047734e-06, - "loss": 1.1641, - "step": 869000 - }, - { - "epoch": 2.07, - "learning_rate": 6.204489292414849e-06, - "loss": 1.1909, - "step": 869500 - }, - { - "epoch": 2.07, - "learning_rate": 6.196556278781966e-06, - "loss": 1.181, - "step": 870000 - }, - { - "epoch": 2.07, - "learning_rate": 6.1886232651490815e-06, - "loss": 1.1886, - "step": 870500 - }, - { - "epoch": 2.07, - "learning_rate": 6.180690251516198e-06, - "loss": 1.194, - "step": 871000 - }, - { - "epoch": 2.07, - "learning_rate": 6.172757237883313e-06, - "loss": 1.1887, - "step": 871500 - }, - { - "epoch": 2.08, - "learning_rate": 6.16482422425043e-06, - "loss": 1.1794, - "step": 872000 - }, - { - "epoch": 2.08, - "learning_rate": 6.156891210617546e-06, - "loss": 1.1913, - "step": 872500 - }, - { - "epoch": 2.08, - "learning_rate": 6.1489581969846626e-06, - "loss": 1.1855, - "step": 873000 - }, - { - "epoch": 2.08, - "learning_rate": 6.1410251833517776e-06, - "loss": 1.1831, - "step": 873500 - }, - { - "epoch": 2.08, - "learning_rate": 6.133092169718894e-06, - "loss": 1.1839, - "step": 874000 - }, - { - "epoch": 2.08, - "learning_rate": 6.12515915608601e-06, - "loss": 1.1855, - "step": 874500 - }, - { - "epoch": 2.08, - "learning_rate": 6.117226142453127e-06, - "loss": 1.1661, - "step": 875000 - }, - { - "epoch": 2.08, - "learning_rate": 6.109293128820243e-06, - "loss": 1.1778, - "step": 875500 - }, - { - "epoch": 2.08, - "learning_rate": 6.101360115187358e-06, - "loss": 1.1805, - "step": 876000 - }, - { - "epoch": 2.09, - "learning_rate": 6.0934271015544745e-06, - "loss": 1.1852, - "step": 876500 - }, - { - "epoch": 2.09, - "learning_rate": 6.08549408792159e-06, - "loss": 1.1708, - "step": 877000 - }, - { - "epoch": 2.09, - "learning_rate": 6.077561074288707e-06, - "loss": 1.1756, - "step": 877500 - }, - { - "epoch": 2.09, - "learning_rate": 6.069628060655822e-06, - "loss": 1.1901, - "step": 878000 - }, - { - "epoch": 2.09, - "learning_rate": 6.061695047022939e-06, - "loss": 1.1923, - "step": 878500 - }, - { - "epoch": 2.09, - "learning_rate": 6.053762033390055e-06, - "loss": 1.1793, - "step": 879000 - }, - { - "epoch": 2.09, - "learning_rate": 6.045829019757171e-06, - "loss": 1.1748, - "step": 879500 - }, - { - "epoch": 2.09, - "learning_rate": 6.037896006124287e-06, - "loss": 1.1789, - "step": 880000 - }, - { - "epoch": 2.1, - "learning_rate": 6.029962992491403e-06, - "loss": 1.1759, - "step": 880500 - }, - { - "epoch": 2.1, - "learning_rate": 6.022029978858519e-06, - "loss": 1.1656, - "step": 881000 - }, - { - "epoch": 2.1, - "learning_rate": 6.014096965225636e-06, - "loss": 1.1951, - "step": 881500 - }, - { - "epoch": 2.1, - "learning_rate": 6.0061639515927515e-06, - "loss": 1.1889, - "step": 882000 - }, - { - "epoch": 2.1, - "learning_rate": 5.9982309379598665e-06, - "loss": 1.1804, - "step": 882500 - }, - { - "epoch": 2.1, - "learning_rate": 5.990297924326983e-06, - "loss": 1.1807, - "step": 883000 - }, - { - "epoch": 2.1, - "learning_rate": 5.982364910694099e-06, - "loss": 1.1788, - "step": 883500 - }, - { - "epoch": 2.1, - "learning_rate": 5.974431897061216e-06, - "loss": 1.1859, - "step": 884000 - }, - { - "epoch": 2.11, - "learning_rate": 5.9664988834283325e-06, - "loss": 1.1857, - "step": 884500 - }, - { - "epoch": 2.11, - "learning_rate": 5.9585658697954476e-06, - "loss": 1.1705, - "step": 885000 - }, - { - "epoch": 2.11, - "learning_rate": 5.950632856162563e-06, - "loss": 1.1769, - "step": 885500 - }, - { - "epoch": 2.11, - "learning_rate": 5.94269984252968e-06, - "loss": 1.1752, - "step": 886000 - }, - { - "epoch": 2.11, - "learning_rate": 5.934766828896796e-06, - "loss": 1.172, - "step": 886500 - }, - { - "epoch": 2.11, - "learning_rate": 5.926833815263912e-06, - "loss": 1.1983, - "step": 887000 - }, - { - "epoch": 2.11, - "learning_rate": 5.918900801631028e-06, - "loss": 1.1735, - "step": 887500 - }, - { - "epoch": 2.11, - "learning_rate": 5.9109677879981444e-06, - "loss": 1.1638, - "step": 888000 - }, - { - "epoch": 2.11, - "learning_rate": 5.90303477436526e-06, - "loss": 1.1754, - "step": 888500 - }, - { - "epoch": 2.12, - "learning_rate": 5.895101760732377e-06, - "loss": 1.1755, - "step": 889000 - }, - { - "epoch": 2.12, - "learning_rate": 5.887168747099492e-06, - "loss": 1.1851, - "step": 889500 - }, - { - "epoch": 2.12, - "learning_rate": 5.879235733466609e-06, - "loss": 1.184, - "step": 890000 - }, - { - "epoch": 2.12, - "learning_rate": 5.871302719833725e-06, - "loss": 1.1843, - "step": 890500 - }, - { - "epoch": 2.12, - "learning_rate": 5.863369706200841e-06, - "loss": 1.1807, - "step": 891000 - }, - { - "epoch": 2.12, - "learning_rate": 5.855436692567956e-06, - "loss": 1.1601, - "step": 891500 - }, - { - "epoch": 2.12, - "learning_rate": 5.847503678935072e-06, - "loss": 1.185, - "step": 892000 - }, - { - "epoch": 2.12, - "learning_rate": 5.839570665302189e-06, - "loss": 1.1869, - "step": 892500 - }, - { - "epoch": 2.13, - "learning_rate": 5.831637651669305e-06, - "loss": 1.1678, - "step": 893000 - }, - { - "epoch": 2.13, - "learning_rate": 5.823704638036421e-06, - "loss": 1.1667, - "step": 893500 - }, - { - "epoch": 2.13, - "learning_rate": 5.8157716244035365e-06, - "loss": 1.1733, - "step": 894000 - }, - { - "epoch": 2.13, - "learning_rate": 5.807838610770653e-06, - "loss": 1.1803, - "step": 894500 - }, - { - "epoch": 2.13, - "learning_rate": 5.799905597137769e-06, - "loss": 1.177, - "step": 895000 - }, - { - "epoch": 2.13, - "learning_rate": 5.791972583504886e-06, - "loss": 1.1802, - "step": 895500 - }, - { - "epoch": 2.13, - "learning_rate": 5.784039569872001e-06, - "loss": 1.1784, - "step": 896000 - }, - { - "epoch": 2.13, - "learning_rate": 5.7761065562391175e-06, - "loss": 1.181, - "step": 896500 - }, - { - "epoch": 2.13, - "learning_rate": 5.768173542606233e-06, - "loss": 1.1664, - "step": 897000 - }, - { - "epoch": 2.14, - "learning_rate": 5.76024052897335e-06, - "loss": 1.1781, - "step": 897500 - }, - { - "epoch": 2.14, - "learning_rate": 5.752307515340465e-06, - "loss": 1.1857, - "step": 898000 - }, - { - "epoch": 2.14, - "learning_rate": 5.744374501707581e-06, - "loss": 1.1846, - "step": 898500 - }, - { - "epoch": 2.14, - "learning_rate": 5.736441488074698e-06, - "loss": 1.1582, - "step": 899000 - }, - { - "epoch": 2.14, - "learning_rate": 5.728508474441814e-06, - "loss": 1.1878, - "step": 899500 - }, - { - "epoch": 2.14, - "learning_rate": 5.72057546080893e-06, - "loss": 1.1752, - "step": 900000 - }, - { - "epoch": 2.14, - "learning_rate": 5.712642447176045e-06, - "loss": 1.1841, - "step": 900500 - }, - { - "epoch": 2.14, - "learning_rate": 5.704709433543162e-06, - "loss": 1.1902, - "step": 901000 - }, - { - "epoch": 2.15, - "learning_rate": 5.696776419910278e-06, - "loss": 1.1786, - "step": 901500 - }, - { - "epoch": 2.15, - "learning_rate": 5.688843406277395e-06, - "loss": 1.1815, - "step": 902000 - }, - { - "epoch": 2.15, - "learning_rate": 5.68091039264451e-06, - "loss": 1.193, - "step": 902500 - }, - { - "epoch": 2.15, - "learning_rate": 5.672977379011626e-06, - "loss": 1.1724, - "step": 903000 - }, - { - "epoch": 2.15, - "learning_rate": 5.665044365378742e-06, - "loss": 1.1818, - "step": 903500 - }, - { - "epoch": 2.15, - "learning_rate": 5.657111351745859e-06, - "loss": 1.1881, - "step": 904000 - }, - { - "epoch": 2.15, - "learning_rate": 5.649178338112975e-06, - "loss": 1.1733, - "step": 904500 - }, - { - "epoch": 2.15, - "learning_rate": 5.641245324480091e-06, - "loss": 1.1959, - "step": 905000 - }, - { - "epoch": 2.16, - "learning_rate": 5.6333123108472065e-06, - "loss": 1.1908, - "step": 905500 - }, - { - "epoch": 2.16, - "learning_rate": 5.625379297214323e-06, - "loss": 1.1697, - "step": 906000 - }, - { - "epoch": 2.16, - "learning_rate": 5.617446283581439e-06, - "loss": 1.1973, - "step": 906500 - }, - { - "epoch": 2.16, - "learning_rate": 5.609513269948554e-06, - "loss": 1.1845, - "step": 907000 - }, - { - "epoch": 2.16, - "learning_rate": 5.601580256315671e-06, - "loss": 1.1933, - "step": 907500 - }, - { - "epoch": 2.16, - "learning_rate": 5.593647242682787e-06, - "loss": 1.1832, - "step": 908000 - }, - { - "epoch": 2.16, - "learning_rate": 5.585714229049903e-06, - "loss": 1.1727, - "step": 908500 - }, - { - "epoch": 2.16, - "learning_rate": 5.57778121541702e-06, - "loss": 1.1759, - "step": 909000 - }, - { - "epoch": 2.16, - "learning_rate": 5.569848201784135e-06, - "loss": 1.162, - "step": 909500 - }, - { - "epoch": 2.17, - "learning_rate": 5.561915188151251e-06, - "loss": 1.1837, - "step": 910000 - }, - { - "epoch": 2.17, - "learning_rate": 5.553982174518368e-06, - "loss": 1.176, - "step": 910500 - }, - { - "epoch": 2.17, - "learning_rate": 5.5460491608854835e-06, - "loss": 1.1852, - "step": 911000 - }, - { - "epoch": 2.17, - "learning_rate": 5.538116147252599e-06, - "loss": 1.1746, - "step": 911500 - }, - { - "epoch": 2.17, - "learning_rate": 5.530183133619715e-06, - "loss": 1.1885, - "step": 912000 - }, - { - "epoch": 2.17, - "learning_rate": 5.522250119986832e-06, - "loss": 1.1591, - "step": 912500 - }, - { - "epoch": 2.17, - "learning_rate": 5.514317106353948e-06, - "loss": 1.1838, - "step": 913000 - }, - { - "epoch": 2.17, - "learning_rate": 5.5063840927210646e-06, - "loss": 1.1825, - "step": 913500 - }, - { - "epoch": 2.18, - "learning_rate": 5.4984510790881796e-06, - "loss": 1.1765, - "step": 914000 - }, - { - "epoch": 2.18, - "learning_rate": 5.490518065455296e-06, - "loss": 1.182, - "step": 914500 - }, - { - "epoch": 2.18, - "learning_rate": 5.482585051822412e-06, - "loss": 1.1725, - "step": 915000 - }, - { - "epoch": 2.18, - "learning_rate": 5.474652038189529e-06, - "loss": 1.1765, - "step": 915500 - }, - { - "epoch": 2.18, - "learning_rate": 5.466719024556644e-06, - "loss": 1.1798, - "step": 916000 - }, - { - "epoch": 2.18, - "learning_rate": 5.45878601092376e-06, - "loss": 1.1747, - "step": 916500 - }, - { - "epoch": 2.18, - "learning_rate": 5.4508529972908765e-06, - "loss": 1.1802, - "step": 917000 - }, - { - "epoch": 2.18, - "learning_rate": 5.442919983657992e-06, - "loss": 1.1751, - "step": 917500 - }, - { - "epoch": 2.18, - "learning_rate": 5.434986970025108e-06, - "loss": 1.1739, - "step": 918000 - }, - { - "epoch": 2.19, - "learning_rate": 5.427053956392224e-06, - "loss": 1.1917, - "step": 918500 - }, - { - "epoch": 2.19, - "learning_rate": 5.419120942759341e-06, - "loss": 1.177, - "step": 919000 - }, - { - "epoch": 2.19, - "learning_rate": 5.411187929126457e-06, - "loss": 1.1866, - "step": 919500 - }, - { - "epoch": 2.19, - "learning_rate": 5.403254915493573e-06, - "loss": 1.1553, - "step": 920000 - }, - { - "epoch": 2.19, - "learning_rate": 5.395321901860688e-06, - "loss": 1.1725, - "step": 920500 - }, - { - "epoch": 2.19, - "learning_rate": 5.387388888227805e-06, - "loss": 1.1702, - "step": 921000 - }, - { - "epoch": 2.19, - "learning_rate": 5.379455874594921e-06, - "loss": 1.1863, - "step": 921500 - }, - { - "epoch": 2.19, - "learning_rate": 5.371522860962038e-06, - "loss": 1.178, - "step": 922000 - }, - { - "epoch": 2.2, - "learning_rate": 5.363589847329153e-06, - "loss": 1.1789, - "step": 922500 - }, - { - "epoch": 2.2, - "learning_rate": 5.3556568336962685e-06, - "loss": 1.167, - "step": 923000 - }, - { - "epoch": 2.2, - "learning_rate": 5.347723820063385e-06, - "loss": 1.1732, - "step": 923500 - }, - { - "epoch": 2.2, - "learning_rate": 5.339790806430501e-06, - "loss": 1.1585, - "step": 924000 - }, - { - "epoch": 2.2, - "learning_rate": 5.331857792797618e-06, - "loss": 1.1692, - "step": 924500 - }, - { - "epoch": 2.2, - "learning_rate": 5.323924779164733e-06, - "loss": 1.1929, - "step": 925000 - }, - { - "epoch": 2.2, - "learning_rate": 5.3159917655318495e-06, - "loss": 1.1731, - "step": 925500 - }, - { - "epoch": 2.2, - "learning_rate": 5.308058751898965e-06, - "loss": 1.1743, - "step": 926000 - }, - { - "epoch": 2.2, - "learning_rate": 5.300125738266082e-06, - "loss": 1.1681, - "step": 926500 - }, - { - "epoch": 2.21, - "learning_rate": 5.292192724633197e-06, - "loss": 1.1739, - "step": 927000 - }, - { - "epoch": 2.21, - "learning_rate": 5.284259711000314e-06, - "loss": 1.1711, - "step": 927500 - }, - { - "epoch": 2.21, - "learning_rate": 5.27632669736743e-06, - "loss": 1.1751, - "step": 928000 - }, - { - "epoch": 2.21, - "learning_rate": 5.2683936837345464e-06, - "loss": 1.1826, - "step": 928500 - }, - { - "epoch": 2.21, - "learning_rate": 5.260460670101662e-06, - "loss": 1.1761, - "step": 929000 - }, - { - "epoch": 2.21, - "learning_rate": 5.252527656468777e-06, - "loss": 1.1812, - "step": 929500 - }, - { - "epoch": 2.21, - "learning_rate": 5.244594642835894e-06, - "loss": 1.1699, - "step": 930000 - }, - { - "epoch": 2.21, - "learning_rate": 5.236661629203011e-06, - "loss": 1.1799, - "step": 930500 - }, - { - "epoch": 2.22, - "learning_rate": 5.228728615570127e-06, - "loss": 1.1826, - "step": 931000 - }, - { - "epoch": 2.22, - "learning_rate": 5.220795601937242e-06, - "loss": 1.1762, - "step": 931500 - }, - { - "epoch": 2.22, - "learning_rate": 5.212862588304358e-06, - "loss": 1.1777, - "step": 932000 - }, - { - "epoch": 2.22, - "learning_rate": 5.204929574671474e-06, - "loss": 1.1801, - "step": 932500 - }, - { - "epoch": 2.22, - "learning_rate": 5.196996561038591e-06, - "loss": 1.1674, - "step": 933000 - }, - { - "epoch": 2.22, - "learning_rate": 5.189063547405707e-06, - "loss": 1.1584, - "step": 933500 - }, - { - "epoch": 2.22, - "learning_rate": 5.181130533772823e-06, - "loss": 1.1535, - "step": 934000 - }, - { - "epoch": 2.22, - "learning_rate": 5.1731975201399385e-06, - "loss": 1.1742, - "step": 934500 - }, - { - "epoch": 2.23, - "learning_rate": 5.165264506507055e-06, - "loss": 1.1859, - "step": 935000 - }, - { - "epoch": 2.23, - "learning_rate": 5.157331492874171e-06, - "loss": 1.1815, - "step": 935500 - }, - { - "epoch": 2.23, - "learning_rate": 5.149398479241287e-06, - "loss": 1.1693, - "step": 936000 - }, - { - "epoch": 2.23, - "learning_rate": 5.141465465608403e-06, - "loss": 1.169, - "step": 936500 - }, - { - "epoch": 2.23, - "learning_rate": 5.1335324519755195e-06, - "loss": 1.1595, - "step": 937000 - }, - { - "epoch": 2.23, - "learning_rate": 5.125599438342635e-06, - "loss": 1.1743, - "step": 937500 - }, - { - "epoch": 2.23, - "learning_rate": 5.117666424709752e-06, - "loss": 1.1576, - "step": 938000 - }, - { - "epoch": 2.23, - "learning_rate": 5.109733411076867e-06, - "loss": 1.1905, - "step": 938500 - }, - { - "epoch": 2.23, - "learning_rate": 5.101800397443983e-06, - "loss": 1.1705, - "step": 939000 - }, - { - "epoch": 2.24, - "learning_rate": 5.0938673838111e-06, - "loss": 1.1682, - "step": 939500 - }, - { - "epoch": 2.24, - "learning_rate": 5.085934370178216e-06, - "loss": 1.1637, - "step": 940000 - }, - { - "epoch": 2.24, - "learning_rate": 5.078001356545331e-06, - "loss": 1.1584, - "step": 940500 - }, - { - "epoch": 2.24, - "learning_rate": 5.070068342912447e-06, - "loss": 1.1787, - "step": 941000 - }, - { - "epoch": 2.24, - "learning_rate": 5.062135329279564e-06, - "loss": 1.1805, - "step": 941500 - }, - { - "epoch": 2.24, - "learning_rate": 5.05420231564668e-06, - "loss": 1.1684, - "step": 942000 - }, - { - "epoch": 2.24, - "learning_rate": 5.046269302013796e-06, - "loss": 1.1735, - "step": 942500 - }, - { - "epoch": 2.24, - "learning_rate": 5.038336288380912e-06, - "loss": 1.1674, - "step": 943000 - }, - { - "epoch": 2.25, - "learning_rate": 5.030403274748028e-06, - "loss": 1.1652, - "step": 943500 - }, - { - "epoch": 2.25, - "learning_rate": 5.022470261115144e-06, - "loss": 1.171, - "step": 944000 - }, - { - "epoch": 2.25, - "learning_rate": 5.014537247482261e-06, - "loss": 1.1813, - "step": 944500 - }, - { - "epoch": 2.25, - "learning_rate": 5.006604233849376e-06, - "loss": 1.1689, - "step": 945000 - }, - { - "epoch": 2.25, - "learning_rate": 4.998671220216493e-06, - "loss": 1.1807, - "step": 945500 - }, - { - "epoch": 2.25, - "learning_rate": 4.9907382065836085e-06, - "loss": 1.1766, - "step": 946000 - }, - { - "epoch": 2.25, - "learning_rate": 4.982805192950724e-06, - "loss": 1.1727, - "step": 946500 - }, - { - "epoch": 2.25, - "learning_rate": 4.97487217931784e-06, - "loss": 1.1717, - "step": 947000 - }, - { - "epoch": 2.25, - "learning_rate": 4.966939165684956e-06, - "loss": 1.1744, - "step": 947500 - }, - { - "epoch": 2.26, - "learning_rate": 4.959006152052073e-06, - "loss": 1.1776, - "step": 948000 - }, - { - "epoch": 2.26, - "learning_rate": 4.951073138419189e-06, - "loss": 1.1788, - "step": 948500 - }, - { - "epoch": 2.26, - "learning_rate": 4.9431401247863045e-06, - "loss": 1.1792, - "step": 949000 - }, - { - "epoch": 2.26, - "learning_rate": 4.935207111153421e-06, - "loss": 1.1628, - "step": 949500 - }, - { - "epoch": 2.26, - "learning_rate": 4.927274097520537e-06, - "loss": 1.176, - "step": 950000 - }, - { - "epoch": 2.26, - "learning_rate": 4.919341083887653e-06, - "loss": 1.1735, - "step": 950500 - }, - { - "epoch": 2.26, - "learning_rate": 4.911408070254769e-06, - "loss": 1.175, - "step": 951000 - }, - { - "epoch": 2.26, - "learning_rate": 4.9034750566218855e-06, - "loss": 1.1689, - "step": 951500 - }, - { - "epoch": 2.27, - "learning_rate": 4.895542042989001e-06, - "loss": 1.1726, - "step": 952000 - }, - { - "epoch": 2.27, - "learning_rate": 4.887609029356117e-06, - "loss": 1.1653, - "step": 952500 - }, - { - "epoch": 2.27, - "learning_rate": 4.879676015723234e-06, - "loss": 1.1516, - "step": 953000 - }, - { - "epoch": 2.27, - "learning_rate": 4.87174300209035e-06, - "loss": 1.1582, - "step": 953500 - }, - { - "epoch": 2.27, - "learning_rate": 4.863809988457466e-06, - "loss": 1.1732, - "step": 954000 - }, - { - "epoch": 2.27, - "learning_rate": 4.8558769748245816e-06, - "loss": 1.164, - "step": 954500 - }, - { - "epoch": 2.27, - "learning_rate": 4.8479439611916974e-06, - "loss": 1.1866, - "step": 955000 - }, - { - "epoch": 2.27, - "learning_rate": 4.840010947558813e-06, - "loss": 1.1639, - "step": 955500 - }, - { - "epoch": 2.28, - "learning_rate": 4.83207793392593e-06, - "loss": 1.1672, - "step": 956000 - }, - { - "epoch": 2.28, - "learning_rate": 4.824144920293046e-06, - "loss": 1.1631, - "step": 956500 - }, - { - "epoch": 2.28, - "learning_rate": 4.816211906660162e-06, - "loss": 1.1629, - "step": 957000 - }, - { - "epoch": 2.28, - "learning_rate": 4.808278893027278e-06, - "loss": 1.1709, - "step": 957500 - }, - { - "epoch": 2.28, - "learning_rate": 4.800345879394394e-06, - "loss": 1.1668, - "step": 958000 - }, - { - "epoch": 2.28, - "learning_rate": 4.79241286576151e-06, - "loss": 1.1825, - "step": 958500 - }, - { - "epoch": 2.28, - "learning_rate": 4.784479852128626e-06, - "loss": 1.1643, - "step": 959000 - }, - { - "epoch": 2.28, - "learning_rate": 4.776546838495743e-06, - "loss": 1.176, - "step": 959500 - }, - { - "epoch": 2.28, - "learning_rate": 4.768613824862859e-06, - "loss": 1.1738, - "step": 960000 - }, - { - "epoch": 2.29, - "learning_rate": 4.7606808112299745e-06, - "loss": 1.1737, - "step": 960500 - }, - { - "epoch": 2.29, - "learning_rate": 4.75274779759709e-06, - "loss": 1.1634, - "step": 961000 - }, - { - "epoch": 2.29, - "learning_rate": 4.744814783964207e-06, - "loss": 1.1711, - "step": 961500 - }, - { - "epoch": 2.29, - "learning_rate": 4.736881770331322e-06, - "loss": 1.1768, - "step": 962000 - }, - { - "epoch": 2.29, - "learning_rate": 4.728948756698439e-06, - "loss": 1.1614, - "step": 962500 - }, - { - "epoch": 2.29, - "learning_rate": 4.7210157430655555e-06, - "loss": 1.168, - "step": 963000 - }, - { - "epoch": 2.29, - "learning_rate": 4.7130827294326705e-06, - "loss": 1.1608, - "step": 963500 - }, - { - "epoch": 2.29, - "learning_rate": 4.705149715799787e-06, - "loss": 1.177, - "step": 964000 - }, - { - "epoch": 2.3, - "learning_rate": 4.697216702166903e-06, - "loss": 1.1692, - "step": 964500 - }, - { - "epoch": 2.3, - "learning_rate": 4.689283688534019e-06, - "loss": 1.1644, - "step": 965000 - }, - { - "epoch": 2.3, - "learning_rate": 4.681350674901135e-06, - "loss": 1.1563, - "step": 965500 - }, - { - "epoch": 2.3, - "learning_rate": 4.6734176612682515e-06, - "loss": 1.1629, - "step": 966000 - }, - { - "epoch": 2.3, - "learning_rate": 4.665484647635367e-06, - "loss": 1.1587, - "step": 966500 - }, - { - "epoch": 2.3, - "learning_rate": 4.657551634002483e-06, - "loss": 1.1762, - "step": 967000 - }, - { - "epoch": 2.3, - "learning_rate": 4.649618620369599e-06, - "loss": 1.1616, - "step": 967500 - }, - { - "epoch": 2.3, - "learning_rate": 4.641685606736716e-06, - "loss": 1.1682, - "step": 968000 - }, - { - "epoch": 2.3, - "learning_rate": 4.633752593103832e-06, - "loss": 1.1561, - "step": 968500 - }, - { - "epoch": 2.31, - "learning_rate": 4.625819579470948e-06, - "loss": 1.1736, - "step": 969000 - }, - { - "epoch": 2.31, - "learning_rate": 4.617886565838064e-06, - "loss": 1.161, - "step": 969500 - }, - { - "epoch": 2.31, - "learning_rate": 4.609953552205179e-06, - "loss": 1.1766, - "step": 970000 - }, - { - "epoch": 2.31, - "learning_rate": 4.602020538572296e-06, - "loss": 1.1677, - "step": 970500 - }, - { - "epoch": 2.31, - "learning_rate": 4.594087524939412e-06, - "loss": 1.1856, - "step": 971000 - }, - { - "epoch": 2.31, - "learning_rate": 4.586154511306528e-06, - "loss": 1.1703, - "step": 971500 - }, - { - "epoch": 2.31, - "learning_rate": 4.578221497673644e-06, - "loss": 1.1735, - "step": 972000 - }, - { - "epoch": 2.31, - "learning_rate": 4.57028848404076e-06, - "loss": 1.1751, - "step": 972500 - }, - { - "epoch": 2.32, - "learning_rate": 4.562355470407876e-06, - "loss": 1.1728, - "step": 973000 - }, - { - "epoch": 2.32, - "learning_rate": 4.554422456774992e-06, - "loss": 1.1845, - "step": 973500 - }, - { - "epoch": 2.32, - "learning_rate": 4.546489443142109e-06, - "loss": 1.1732, - "step": 974000 - }, - { - "epoch": 2.32, - "learning_rate": 4.538556429509225e-06, - "loss": 1.1657, - "step": 974500 - }, - { - "epoch": 2.32, - "learning_rate": 4.5306234158763405e-06, - "loss": 1.1675, - "step": 975000 - }, - { - "epoch": 2.32, - "learning_rate": 4.522690402243456e-06, - "loss": 1.1611, - "step": 975500 - }, - { - "epoch": 2.32, - "learning_rate": 4.514757388610573e-06, - "loss": 1.1631, - "step": 976000 - }, - { - "epoch": 2.32, - "learning_rate": 4.506824374977689e-06, - "loss": 1.1663, - "step": 976500 - }, - { - "epoch": 2.33, - "learning_rate": 4.498891361344805e-06, - "loss": 1.164, - "step": 977000 - }, - { - "epoch": 2.33, - "learning_rate": 4.4909583477119215e-06, - "loss": 1.1705, - "step": 977500 - }, - { - "epoch": 2.33, - "learning_rate": 4.4830253340790365e-06, - "loss": 1.1648, - "step": 978000 - }, - { - "epoch": 2.33, - "learning_rate": 4.475092320446153e-06, - "loss": 1.1627, - "step": 978500 - }, - { - "epoch": 2.33, - "learning_rate": 4.467159306813269e-06, - "loss": 1.1514, - "step": 979000 - }, - { - "epoch": 2.33, - "learning_rate": 4.459226293180385e-06, - "loss": 1.156, - "step": 979500 - }, - { - "epoch": 2.33, - "learning_rate": 4.451293279547501e-06, - "loss": 1.1699, - "step": 980000 - }, - { - "epoch": 2.33, - "learning_rate": 4.4433602659146176e-06, - "loss": 1.1801, - "step": 980500 - }, - { - "epoch": 2.33, - "learning_rate": 4.435427252281733e-06, - "loss": 1.1514, - "step": 981000 - }, - { - "epoch": 2.34, - "learning_rate": 4.427494238648849e-06, - "loss": 1.1701, - "step": 981500 - }, - { - "epoch": 2.34, - "learning_rate": 4.419561225015965e-06, - "loss": 1.1529, - "step": 982000 - }, - { - "epoch": 2.34, - "learning_rate": 4.411628211383082e-06, - "loss": 1.16, - "step": 982500 - }, - { - "epoch": 2.34, - "learning_rate": 4.403695197750198e-06, - "loss": 1.1811, - "step": 983000 - }, - { - "epoch": 2.34, - "learning_rate": 4.395762184117314e-06, - "loss": 1.1824, - "step": 983500 - }, - { - "epoch": 2.34, - "learning_rate": 4.38782917048443e-06, - "loss": 1.1603, - "step": 984000 - }, - { - "epoch": 2.34, - "learning_rate": 4.379896156851546e-06, - "loss": 1.1484, - "step": 984500 - }, - { - "epoch": 2.34, - "learning_rate": 4.371963143218662e-06, - "loss": 1.1672, - "step": 985000 - }, - { - "epoch": 2.35, - "learning_rate": 4.364030129585778e-06, - "loss": 1.172, - "step": 985500 - }, - { - "epoch": 2.35, - "learning_rate": 4.356097115952895e-06, - "loss": 1.1569, - "step": 986000 - }, - { - "epoch": 2.35, - "learning_rate": 4.34816410232001e-06, - "loss": 1.1726, - "step": 986500 - }, - { - "epoch": 2.35, - "learning_rate": 4.340231088687126e-06, - "loss": 1.1626, - "step": 987000 - }, - { - "epoch": 2.35, - "learning_rate": 4.332298075054242e-06, - "loss": 1.1794, - "step": 987500 - }, - { - "epoch": 2.35, - "learning_rate": 4.324365061421358e-06, - "loss": 1.1578, - "step": 988000 - }, - { - "epoch": 2.35, - "learning_rate": 4.316432047788475e-06, - "loss": 1.1586, - "step": 988500 - }, - { - "epoch": 2.35, - "learning_rate": 4.308499034155591e-06, - "loss": 1.1594, - "step": 989000 - }, - { - "epoch": 2.35, - "learning_rate": 4.3005660205227065e-06, - "loss": 1.1714, - "step": 989500 - }, - { - "epoch": 2.36, - "learning_rate": 4.292633006889822e-06, - "loss": 1.1657, - "step": 990000 - }, - { - "epoch": 2.36, - "learning_rate": 4.284699993256939e-06, - "loss": 1.1736, - "step": 990500 - }, - { - "epoch": 2.36, - "learning_rate": 4.276766979624055e-06, - "loss": 1.1616, - "step": 991000 - }, - { - "epoch": 2.36, - "learning_rate": 4.268833965991171e-06, - "loss": 1.1556, - "step": 991500 - }, - { - "epoch": 2.36, - "learning_rate": 4.260900952358287e-06, - "loss": 1.1796, - "step": 992000 - }, - { - "epoch": 2.36, - "learning_rate": 4.252967938725403e-06, - "loss": 1.1476, - "step": 992500 - }, - { - "epoch": 2.36, - "learning_rate": 4.245034925092519e-06, - "loss": 1.1616, - "step": 993000 - }, - { - "epoch": 2.36, - "learning_rate": 4.237101911459635e-06, - "loss": 1.1571, - "step": 993500 - }, - { - "epoch": 2.37, - "learning_rate": 4.229168897826752e-06, - "loss": 1.1862, - "step": 994000 - }, - { - "epoch": 2.37, - "learning_rate": 4.221235884193867e-06, - "loss": 1.1714, - "step": 994500 - }, - { - "epoch": 2.37, - "learning_rate": 4.2133028705609836e-06, - "loss": 1.1653, - "step": 995000 - }, - { - "epoch": 2.37, - "learning_rate": 4.2053698569280994e-06, - "loss": 1.159, - "step": 995500 - }, - { - "epoch": 2.37, - "learning_rate": 4.197436843295215e-06, - "loss": 1.1627, - "step": 996000 - }, - { - "epoch": 2.37, - "learning_rate": 4.189503829662331e-06, - "loss": 1.1754, - "step": 996500 - }, - { - "epoch": 2.37, - "learning_rate": 4.181570816029448e-06, - "loss": 1.1775, - "step": 997000 - }, - { - "epoch": 2.37, - "learning_rate": 4.173637802396564e-06, - "loss": 1.1588, - "step": 997500 - }, - { - "epoch": 2.38, - "learning_rate": 4.16570478876368e-06, - "loss": 1.1677, - "step": 998000 - }, - { - "epoch": 2.38, - "learning_rate": 4.157771775130796e-06, - "loss": 1.1699, - "step": 998500 - }, - { - "epoch": 2.38, - "learning_rate": 4.149838761497912e-06, - "loss": 1.1826, - "step": 999000 - }, - { - "epoch": 2.38, - "learning_rate": 4.141905747865028e-06, - "loss": 1.1611, - "step": 999500 - }, - { - "epoch": 2.38, - "learning_rate": 4.133972734232144e-06, - "loss": 1.1621, - "step": 1000000 - }, - { - "epoch": 2.38, - "learning_rate": 4.126039720599261e-06, - "loss": 1.1675, - "step": 1000500 - }, - { - "epoch": 2.38, - "learning_rate": 4.118106706966376e-06, - "loss": 1.1795, - "step": 1001000 - }, - { - "epoch": 2.38, - "learning_rate": 4.110173693333492e-06, - "loss": 1.1591, - "step": 1001500 - }, - { - "epoch": 2.38, - "learning_rate": 4.102240679700608e-06, - "loss": 1.1839, - "step": 1002000 - }, - { - "epoch": 2.39, - "learning_rate": 4.094307666067724e-06, - "loss": 1.1603, - "step": 1002500 - }, - { - "epoch": 2.39, - "learning_rate": 4.086374652434841e-06, - "loss": 1.1582, - "step": 1003000 - }, - { - "epoch": 2.39, - "learning_rate": 4.078441638801957e-06, - "loss": 1.1804, - "step": 1003500 - }, - { - "epoch": 2.39, - "learning_rate": 4.0705086251690725e-06, - "loss": 1.1517, - "step": 1004000 - }, - { - "epoch": 2.39, - "learning_rate": 4.062575611536188e-06, - "loss": 1.1675, - "step": 1004500 - }, - { - "epoch": 2.39, - "learning_rate": 4.054642597903305e-06, - "loss": 1.1616, - "step": 1005000 - }, - { - "epoch": 2.39, - "learning_rate": 4.046709584270421e-06, - "loss": 1.1809, - "step": 1005500 - }, - { - "epoch": 2.39, - "learning_rate": 4.038776570637537e-06, - "loss": 1.1575, - "step": 1006000 - }, - { - "epoch": 2.4, - "learning_rate": 4.030843557004653e-06, - "loss": 1.1477, - "step": 1006500 - }, - { - "epoch": 2.4, - "learning_rate": 4.022910543371769e-06, - "loss": 1.1603, - "step": 1007000 - }, - { - "epoch": 2.4, - "learning_rate": 4.014977529738885e-06, - "loss": 1.1656, - "step": 1007500 - }, - { - "epoch": 2.4, - "learning_rate": 4.007044516106001e-06, - "loss": 1.1509, - "step": 1008000 - }, - { - "epoch": 2.4, - "learning_rate": 3.999111502473118e-06, - "loss": 1.1531, - "step": 1008500 - }, - { - "epoch": 2.4, - "learning_rate": 3.991178488840234e-06, - "loss": 1.18, - "step": 1009000 - }, - { - "epoch": 2.4, - "learning_rate": 3.9832454752073496e-06, - "loss": 1.1605, - "step": 1009500 - }, - { - "epoch": 2.4, - "learning_rate": 3.9753124615744654e-06, - "loss": 1.1566, - "step": 1010000 - }, - { - "epoch": 2.4, - "learning_rate": 3.967379447941581e-06, - "loss": 1.1533, - "step": 1010500 - }, - { - "epoch": 2.41, - "learning_rate": 3.959446434308697e-06, - "loss": 1.172, - "step": 1011000 - }, - { - "epoch": 2.41, - "learning_rate": 3.951513420675814e-06, - "loss": 1.1685, - "step": 1011500 - }, - { - "epoch": 2.41, - "learning_rate": 3.94358040704293e-06, - "loss": 1.1672, - "step": 1012000 - }, - { - "epoch": 2.41, - "learning_rate": 3.935647393410046e-06, - "loss": 1.1782, - "step": 1012500 - }, - { - "epoch": 2.41, - "learning_rate": 3.927714379777162e-06, - "loss": 1.1696, - "step": 1013000 - }, - { - "epoch": 2.41, - "learning_rate": 3.919781366144278e-06, - "loss": 1.1686, - "step": 1013500 - }, - { - "epoch": 2.41, - "learning_rate": 3.911848352511394e-06, - "loss": 1.1603, - "step": 1014000 - }, - { - "epoch": 2.41, - "learning_rate": 3.90391533887851e-06, - "loss": 1.1643, - "step": 1014500 - }, - { - "epoch": 2.42, - "learning_rate": 3.895982325245627e-06, - "loss": 1.1517, - "step": 1015000 - }, - { - "epoch": 2.42, - "learning_rate": 3.8880493116127425e-06, - "loss": 1.1589, - "step": 1015500 - }, - { - "epoch": 2.42, - "learning_rate": 3.880116297979858e-06, - "loss": 1.1743, - "step": 1016000 - }, - { - "epoch": 2.42, - "learning_rate": 3.872183284346974e-06, - "loss": 1.162, - "step": 1016500 - }, - { - "epoch": 2.42, - "learning_rate": 3.864250270714091e-06, - "loss": 1.1791, - "step": 1017000 - }, - { - "epoch": 2.42, - "learning_rate": 3.856317257081207e-06, - "loss": 1.1741, - "step": 1017500 - }, - { - "epoch": 2.42, - "learning_rate": 3.848384243448323e-06, - "loss": 1.1521, - "step": 1018000 - }, - { - "epoch": 2.42, - "learning_rate": 3.8404512298154385e-06, - "loss": 1.1583, - "step": 1018500 - }, - { - "epoch": 2.43, - "learning_rate": 3.832518216182554e-06, - "loss": 1.1573, - "step": 1019000 - }, - { - "epoch": 2.43, - "learning_rate": 3.824585202549671e-06, - "loss": 1.1499, - "step": 1019500 - }, - { - "epoch": 2.43, - "learning_rate": 3.816652188916787e-06, - "loss": 1.1533, - "step": 1020000 - }, - { - "epoch": 2.43, - "learning_rate": 3.8087191752839033e-06, - "loss": 1.1518, - "step": 1020500 - }, - { - "epoch": 2.43, - "learning_rate": 3.8007861616510187e-06, - "loss": 1.1769, - "step": 1021000 - }, - { - "epoch": 2.43, - "learning_rate": 3.792853148018135e-06, - "loss": 1.1597, - "step": 1021500 - }, - { - "epoch": 2.43, - "learning_rate": 3.7849201343852517e-06, - "loss": 1.1713, - "step": 1022000 - }, - { - "epoch": 2.43, - "learning_rate": 3.776987120752367e-06, - "loss": 1.1516, - "step": 1022500 - }, - { - "epoch": 2.43, - "learning_rate": 3.7690541071194834e-06, - "loss": 1.1585, - "step": 1023000 - }, - { - "epoch": 2.44, - "learning_rate": 3.7611210934865993e-06, - "loss": 1.1438, - "step": 1023500 - }, - { - "epoch": 2.44, - "learning_rate": 3.7531880798537156e-06, - "loss": 1.1602, - "step": 1024000 - }, - { - "epoch": 2.44, - "learning_rate": 3.7452550662208315e-06, - "loss": 1.1579, - "step": 1024500 - }, - { - "epoch": 2.44, - "learning_rate": 3.7373220525879477e-06, - "loss": 1.1639, - "step": 1025000 - }, - { - "epoch": 2.44, - "learning_rate": 3.7293890389550636e-06, - "loss": 1.1612, - "step": 1025500 - }, - { - "epoch": 2.44, - "learning_rate": 3.72145602532218e-06, - "loss": 1.172, - "step": 1026000 - }, - { - "epoch": 2.44, - "learning_rate": 3.7135230116892958e-06, - "loss": 1.1449, - "step": 1026500 - }, - { - "epoch": 2.44, - "learning_rate": 3.705589998056412e-06, - "loss": 1.1701, - "step": 1027000 - }, - { - "epoch": 2.45, - "learning_rate": 3.6976569844235283e-06, - "loss": 1.1665, - "step": 1027500 - }, - { - "epoch": 2.45, - "learning_rate": 3.689723970790644e-06, - "loss": 1.1765, - "step": 1028000 - }, - { - "epoch": 2.45, - "learning_rate": 3.6817909571577605e-06, - "loss": 1.1455, - "step": 1028500 - }, - { - "epoch": 2.45, - "learning_rate": 3.673857943524876e-06, - "loss": 1.1789, - "step": 1029000 - }, - { - "epoch": 2.45, - "learning_rate": 3.6659249298919926e-06, - "loss": 1.1603, - "step": 1029500 - }, - { - "epoch": 2.45, - "learning_rate": 3.657991916259108e-06, - "loss": 1.1704, - "step": 1030000 - }, - { - "epoch": 2.45, - "learning_rate": 3.6500589026262244e-06, - "loss": 1.1654, - "step": 1030500 - }, - { - "epoch": 2.45, - "learning_rate": 3.6421258889933402e-06, - "loss": 1.1568, - "step": 1031000 - }, - { - "epoch": 2.45, - "learning_rate": 3.6341928753604565e-06, - "loss": 1.1505, - "step": 1031500 - }, - { - "epoch": 2.46, - "learning_rate": 3.626259861727573e-06, - "loss": 1.1562, - "step": 1032000 - }, - { - "epoch": 2.46, - "learning_rate": 3.6183268480946887e-06, - "loss": 1.163, - "step": 1032500 - }, - { - "epoch": 2.46, - "learning_rate": 3.610393834461805e-06, - "loss": 1.156, - "step": 1033000 - }, - { - "epoch": 2.46, - "learning_rate": 3.602460820828921e-06, - "loss": 1.1745, - "step": 1033500 - }, - { - "epoch": 2.46, - "learning_rate": 3.594527807196037e-06, - "loss": 1.1505, - "step": 1034000 - }, - { - "epoch": 2.46, - "learning_rate": 3.586594793563153e-06, - "loss": 1.1707, - "step": 1034500 - }, - { - "epoch": 2.46, - "learning_rate": 3.5786617799302693e-06, - "loss": 1.1594, - "step": 1035000 - }, - { - "epoch": 2.46, - "learning_rate": 3.570728766297385e-06, - "loss": 1.1612, - "step": 1035500 - }, - { - "epoch": 2.47, - "learning_rate": 3.5627957526645014e-06, - "loss": 1.1601, - "step": 1036000 - }, - { - "epoch": 2.47, - "learning_rate": 3.554862739031617e-06, - "loss": 1.1685, - "step": 1036500 - }, - { - "epoch": 2.47, - "learning_rate": 3.5469297253987336e-06, - "loss": 1.1675, - "step": 1037000 - }, - { - "epoch": 2.47, - "learning_rate": 3.53899671176585e-06, - "loss": 1.1472, - "step": 1037500 - }, - { - "epoch": 2.47, - "learning_rate": 3.5310636981329653e-06, - "loss": 1.1618, - "step": 1038000 - }, - { - "epoch": 2.47, - "learning_rate": 3.5231306845000816e-06, - "loss": 1.1558, - "step": 1038500 - }, - { - "epoch": 2.47, - "learning_rate": 3.5151976708671975e-06, - "loss": 1.1641, - "step": 1039000 - }, - { - "epoch": 2.47, - "learning_rate": 3.5072646572343138e-06, - "loss": 1.1571, - "step": 1039500 - }, - { - "epoch": 2.48, - "learning_rate": 3.4993316436014296e-06, - "loss": 1.1632, - "step": 1040000 - }, - { - "epoch": 2.48, - "learning_rate": 3.491398629968546e-06, - "loss": 1.1586, - "step": 1040500 - }, - { - "epoch": 2.48, - "learning_rate": 3.4834656163356618e-06, - "loss": 1.166, - "step": 1041000 - }, - { - "epoch": 2.48, - "learning_rate": 3.475532602702778e-06, - "loss": 1.1631, - "step": 1041500 - }, - { - "epoch": 2.48, - "learning_rate": 3.4675995890698943e-06, - "loss": 1.1777, - "step": 1042000 - }, - { - "epoch": 2.48, - "learning_rate": 3.45966657543701e-06, - "loss": 1.168, - "step": 1042500 - }, - { - "epoch": 2.48, - "learning_rate": 3.4517335618041265e-06, - "loss": 1.164, - "step": 1043000 - }, - { - "epoch": 2.48, - "learning_rate": 3.4438005481712424e-06, - "loss": 1.1374, - "step": 1043500 - }, - { - "epoch": 2.48, - "learning_rate": 3.4358675345383587e-06, - "loss": 1.1615, - "step": 1044000 - }, - { - "epoch": 2.49, - "learning_rate": 3.427934520905474e-06, - "loss": 1.1612, - "step": 1044500 - }, - { - "epoch": 2.49, - "learning_rate": 3.420001507272591e-06, - "loss": 1.1565, - "step": 1045000 - }, - { - "epoch": 2.49, - "learning_rate": 3.4120684936397062e-06, - "loss": 1.1597, - "step": 1045500 - }, - { - "epoch": 2.49, - "learning_rate": 3.4041354800068225e-06, - "loss": 1.1464, - "step": 1046000 - }, - { - "epoch": 2.49, - "learning_rate": 3.3962024663739384e-06, - "loss": 1.138, - "step": 1046500 - }, - { - "epoch": 2.49, - "learning_rate": 3.3882694527410547e-06, - "loss": 1.1457, - "step": 1047000 - }, - { - "epoch": 2.49, - "learning_rate": 3.380336439108171e-06, - "loss": 1.1774, - "step": 1047500 - }, - { - "epoch": 2.49, - "learning_rate": 3.372403425475287e-06, - "loss": 1.1501, - "step": 1048000 - }, - { - "epoch": 2.5, - "learning_rate": 3.364470411842403e-06, - "loss": 1.1656, - "step": 1048500 - }, - { - "epoch": 2.5, - "learning_rate": 3.356537398209519e-06, - "loss": 1.1614, - "step": 1049000 - }, - { - "epoch": 2.5, - "learning_rate": 3.3486043845766353e-06, - "loss": 1.1633, - "step": 1049500 - }, - { - "epoch": 2.5, - "learning_rate": 3.340671370943751e-06, - "loss": 1.1512, - "step": 1050000 - }, - { - "epoch": 2.5, - "learning_rate": 3.3327383573108674e-06, - "loss": 1.1414, - "step": 1050500 - }, - { - "epoch": 2.5, - "learning_rate": 3.3248053436779833e-06, - "loss": 1.1622, - "step": 1051000 - }, - { - "epoch": 2.5, - "learning_rate": 3.3168723300450996e-06, - "loss": 1.1565, - "step": 1051500 - }, - { - "epoch": 2.5, - "learning_rate": 3.308939316412216e-06, - "loss": 1.1566, - "step": 1052000 - }, - { - "epoch": 2.5, - "learning_rate": 3.3010063027793317e-06, - "loss": 1.1606, - "step": 1052500 - }, - { - "epoch": 2.51, - "learning_rate": 3.293073289146448e-06, - "loss": 1.1589, - "step": 1053000 - }, - { - "epoch": 2.51, - "learning_rate": 3.2851402755135635e-06, - "loss": 1.1599, - "step": 1053500 - }, - { - "epoch": 2.51, - "learning_rate": 3.2772072618806798e-06, - "loss": 1.1522, - "step": 1054000 - }, - { - "epoch": 2.51, - "learning_rate": 3.2692742482477956e-06, - "loss": 1.1559, - "step": 1054500 - }, - { - "epoch": 2.51, - "learning_rate": 3.261341234614912e-06, - "loss": 1.1468, - "step": 1055000 - }, - { - "epoch": 2.51, - "learning_rate": 3.2534082209820278e-06, - "loss": 1.1578, - "step": 1055500 - }, - { - "epoch": 2.51, - "learning_rate": 3.245475207349144e-06, - "loss": 1.1604, - "step": 1056000 - }, - { - "epoch": 2.51, - "learning_rate": 3.2375421937162604e-06, - "loss": 1.1723, - "step": 1056500 - }, - { - "epoch": 2.52, - "learning_rate": 3.2296091800833762e-06, - "loss": 1.155, - "step": 1057000 - }, - { - "epoch": 2.52, - "learning_rate": 3.2216761664504925e-06, - "loss": 1.1568, - "step": 1057500 - }, - { - "epoch": 2.52, - "learning_rate": 3.2137431528176084e-06, - "loss": 1.1568, - "step": 1058000 - }, - { - "epoch": 2.52, - "learning_rate": 3.2058101391847247e-06, - "loss": 1.1539, - "step": 1058500 - }, - { - "epoch": 2.52, - "learning_rate": 3.1978771255518405e-06, - "loss": 1.1582, - "step": 1059000 - }, - { - "epoch": 2.52, - "learning_rate": 3.189944111918957e-06, - "loss": 1.1531, - "step": 1059500 - }, - { - "epoch": 2.52, - "learning_rate": 3.1820110982860723e-06, - "loss": 1.1547, - "step": 1060000 - }, - { - "epoch": 2.52, - "learning_rate": 3.174078084653189e-06, - "loss": 1.1332, - "step": 1060500 - }, - { - "epoch": 2.53, - "learning_rate": 3.1661450710203044e-06, - "loss": 1.17, - "step": 1061000 - }, - { - "epoch": 2.53, - "learning_rate": 3.1582120573874207e-06, - "loss": 1.1534, - "step": 1061500 - }, - { - "epoch": 2.53, - "learning_rate": 3.150279043754537e-06, - "loss": 1.1561, - "step": 1062000 - }, - { - "epoch": 2.53, - "learning_rate": 3.142346030121653e-06, - "loss": 1.1375, - "step": 1062500 - }, - { - "epoch": 2.53, - "learning_rate": 3.134413016488769e-06, - "loss": 1.1432, - "step": 1063000 - }, - { - "epoch": 2.53, - "learning_rate": 3.126480002855885e-06, - "loss": 1.1522, - "step": 1063500 - }, - { - "epoch": 2.53, - "learning_rate": 3.1185469892230013e-06, - "loss": 1.1614, - "step": 1064000 - }, - { - "epoch": 2.53, - "learning_rate": 3.110613975590117e-06, - "loss": 1.1685, - "step": 1064500 - }, - { - "epoch": 2.53, - "learning_rate": 3.1026809619572334e-06, - "loss": 1.1501, - "step": 1065000 - }, - { - "epoch": 2.54, - "learning_rate": 3.0947479483243493e-06, - "loss": 1.1492, - "step": 1065500 - }, - { - "epoch": 2.54, - "learning_rate": 3.0868149346914656e-06, - "loss": 1.1668, - "step": 1066000 - }, - { - "epoch": 2.54, - "learning_rate": 3.078881921058582e-06, - "loss": 1.1526, - "step": 1066500 - }, - { - "epoch": 2.54, - "learning_rate": 3.0709489074256978e-06, - "loss": 1.1527, - "step": 1067000 - }, - { - "epoch": 2.54, - "learning_rate": 3.063015893792814e-06, - "loss": 1.1548, - "step": 1067500 - }, - { - "epoch": 2.54, - "learning_rate": 3.05508288015993e-06, - "loss": 1.1463, - "step": 1068000 - }, - { - "epoch": 2.54, - "learning_rate": 3.047149866527046e-06, - "loss": 1.1696, - "step": 1068500 - }, - { - "epoch": 2.54, - "learning_rate": 3.0392168528941616e-06, - "loss": 1.1562, - "step": 1069000 - }, - { - "epoch": 2.55, - "learning_rate": 3.031283839261278e-06, - "loss": 1.1556, - "step": 1069500 - }, - { - "epoch": 2.55, - "learning_rate": 3.023350825628394e-06, - "loss": 1.1552, - "step": 1070000 - }, - { - "epoch": 2.55, - "learning_rate": 3.01541781199551e-06, - "loss": 1.1538, - "step": 1070500 - }, - { - "epoch": 2.55, - "learning_rate": 3.007484798362626e-06, - "loss": 1.1532, - "step": 1071000 - }, - { - "epoch": 2.55, - "learning_rate": 2.9995517847297422e-06, - "loss": 1.1498, - "step": 1071500 - }, - { - "epoch": 2.55, - "learning_rate": 2.9916187710968585e-06, - "loss": 1.146, - "step": 1072000 - }, - { - "epoch": 2.55, - "learning_rate": 2.9836857574639744e-06, - "loss": 1.154, - "step": 1072500 - }, - { - "epoch": 2.55, - "learning_rate": 2.9757527438310907e-06, - "loss": 1.1584, - "step": 1073000 - }, - { - "epoch": 2.55, - "learning_rate": 2.9678197301982065e-06, - "loss": 1.1418, - "step": 1073500 - }, - { - "epoch": 2.56, - "learning_rate": 2.959886716565323e-06, - "loss": 1.1458, - "step": 1074000 - }, - { - "epoch": 2.56, - "learning_rate": 2.9519537029324387e-06, - "loss": 1.1503, - "step": 1074500 - }, - { - "epoch": 2.56, - "learning_rate": 2.944020689299555e-06, - "loss": 1.1475, - "step": 1075000 - }, - { - "epoch": 2.56, - "learning_rate": 2.9360876756666704e-06, - "loss": 1.1629, - "step": 1075500 - }, - { - "epoch": 2.56, - "learning_rate": 2.928154662033787e-06, - "loss": 1.1565, - "step": 1076000 - }, - { - "epoch": 2.56, - "learning_rate": 2.9202216484009034e-06, - "loss": 1.1727, - "step": 1076500 - }, - { - "epoch": 2.56, - "learning_rate": 2.912288634768019e-06, - "loss": 1.1451, - "step": 1077000 - }, - { - "epoch": 2.56, - "learning_rate": 2.904355621135135e-06, - "loss": 1.1485, - "step": 1077500 - }, - { - "epoch": 2.57, - "learning_rate": 2.896422607502251e-06, - "loss": 1.1484, - "step": 1078000 - }, - { - "epoch": 2.57, - "learning_rate": 2.8884895938693673e-06, - "loss": 1.1559, - "step": 1078500 - }, - { - "epoch": 2.57, - "learning_rate": 2.880556580236483e-06, - "loss": 1.1353, - "step": 1079000 - }, - { - "epoch": 2.57, - "learning_rate": 2.8726235666035995e-06, - "loss": 1.1427, - "step": 1079500 - }, - { - "epoch": 2.57, - "learning_rate": 2.8646905529707153e-06, - "loss": 1.1653, - "step": 1080000 - }, - { - "epoch": 2.57, - "learning_rate": 2.8567575393378316e-06, - "loss": 1.1541, - "step": 1080500 - }, - { - "epoch": 2.57, - "learning_rate": 2.8488245257049475e-06, - "loss": 1.1528, - "step": 1081000 - }, - { - "epoch": 2.57, - "learning_rate": 2.8408915120720638e-06, - "loss": 1.1531, - "step": 1081500 - }, - { - "epoch": 2.58, - "learning_rate": 2.83295849843918e-06, - "loss": 1.1671, - "step": 1082000 - }, - { - "epoch": 2.58, - "learning_rate": 2.825025484806296e-06, - "loss": 1.1541, - "step": 1082500 - }, - { - "epoch": 2.58, - "learning_rate": 2.817092471173412e-06, - "loss": 1.1451, - "step": 1083000 - }, - { - "epoch": 2.58, - "learning_rate": 2.809159457540528e-06, - "loss": 1.1609, - "step": 1083500 - }, - { - "epoch": 2.58, - "learning_rate": 2.8012264439076444e-06, - "loss": 1.1614, - "step": 1084000 - }, - { - "epoch": 2.58, - "learning_rate": 2.79329343027476e-06, - "loss": 1.1722, - "step": 1084500 - }, - { - "epoch": 2.58, - "learning_rate": 2.785360416641876e-06, - "loss": 1.1521, - "step": 1085000 - }, - { - "epoch": 2.58, - "learning_rate": 2.777427403008992e-06, - "loss": 1.1569, - "step": 1085500 - }, - { - "epoch": 2.58, - "learning_rate": 2.7694943893761082e-06, - "loss": 1.1467, - "step": 1086000 - }, - { - "epoch": 2.59, - "learning_rate": 2.7615613757432245e-06, - "loss": 1.1583, - "step": 1086500 - }, - { - "epoch": 2.59, - "learning_rate": 2.7536283621103404e-06, - "loss": 1.1665, - "step": 1087000 - }, - { - "epoch": 2.59, - "learning_rate": 2.7456953484774567e-06, - "loss": 1.1646, - "step": 1087500 - }, - { - "epoch": 2.59, - "learning_rate": 2.7377623348445725e-06, - "loss": 1.1541, - "step": 1088000 - }, - { - "epoch": 2.59, - "learning_rate": 2.729829321211689e-06, - "loss": 1.1588, - "step": 1088500 - }, - { - "epoch": 2.59, - "learning_rate": 2.7218963075788047e-06, - "loss": 1.1405, - "step": 1089000 - }, - { - "epoch": 2.59, - "learning_rate": 2.713963293945921e-06, - "loss": 1.1502, - "step": 1089500 - }, - { - "epoch": 2.59, - "learning_rate": 2.706030280313037e-06, - "loss": 1.1474, - "step": 1090000 - }, - { - "epoch": 2.6, - "learning_rate": 2.698097266680153e-06, - "loss": 1.1505, - "step": 1090500 - }, - { - "epoch": 2.6, - "learning_rate": 2.6901642530472694e-06, - "loss": 1.1579, - "step": 1091000 - }, - { - "epoch": 2.6, - "learning_rate": 2.6822312394143853e-06, - "loss": 1.1499, - "step": 1091500 - }, - { - "epoch": 2.6, - "learning_rate": 2.6742982257815016e-06, - "loss": 1.1668, - "step": 1092000 - }, - { - "epoch": 2.6, - "learning_rate": 2.666365212148617e-06, - "loss": 1.15, - "step": 1092500 - }, - { - "epoch": 2.6, - "learning_rate": 2.6584321985157337e-06, - "loss": 1.152, - "step": 1093000 - }, - { - "epoch": 2.6, - "learning_rate": 2.650499184882849e-06, - "loss": 1.1495, - "step": 1093500 - }, - { - "epoch": 2.6, - "learning_rate": 2.6425661712499655e-06, - "loss": 1.1559, - "step": 1094000 - }, - { - "epoch": 2.6, - "learning_rate": 2.6346331576170813e-06, - "loss": 1.1531, - "step": 1094500 - }, - { - "epoch": 2.61, - "learning_rate": 2.6267001439841976e-06, - "loss": 1.1509, - "step": 1095000 - }, - { - "epoch": 2.61, - "learning_rate": 2.6187671303513135e-06, - "loss": 1.1694, - "step": 1095500 - }, - { - "epoch": 2.61, - "learning_rate": 2.6108341167184298e-06, - "loss": 1.1527, - "step": 1096000 - }, - { - "epoch": 2.61, - "learning_rate": 2.602901103085546e-06, - "loss": 1.1524, - "step": 1096500 - }, - { - "epoch": 2.61, - "learning_rate": 2.594968089452662e-06, - "loss": 1.1582, - "step": 1097000 - }, - { - "epoch": 2.61, - "learning_rate": 2.5870350758197782e-06, - "loss": 1.1525, - "step": 1097500 - }, - { - "epoch": 2.61, - "learning_rate": 2.579102062186894e-06, - "loss": 1.1551, - "step": 1098000 - }, - { - "epoch": 2.61, - "learning_rate": 2.5711690485540104e-06, - "loss": 1.1551, - "step": 1098500 - }, - { - "epoch": 2.62, - "learning_rate": 2.5632360349211262e-06, - "loss": 1.1389, - "step": 1099000 - }, - { - "epoch": 2.62, - "learning_rate": 2.5553030212882425e-06, - "loss": 1.1448, - "step": 1099500 - }, - { - "epoch": 2.62, - "learning_rate": 2.547370007655358e-06, - "loss": 1.1498, - "step": 1100000 - }, - { - "epoch": 2.62, - "learning_rate": 2.5394369940224743e-06, - "loss": 1.1473, - "step": 1100500 - }, - { - "epoch": 2.62, - "learning_rate": 2.531503980389591e-06, - "loss": 1.1536, - "step": 1101000 - }, - { - "epoch": 2.62, - "learning_rate": 2.5235709667567064e-06, - "loss": 1.1498, - "step": 1101500 - }, - { - "epoch": 2.62, - "learning_rate": 2.5156379531238227e-06, - "loss": 1.1545, - "step": 1102000 - }, - { - "epoch": 2.62, - "learning_rate": 2.5077049394909386e-06, - "loss": 1.1536, - "step": 1102500 - }, - { - "epoch": 2.63, - "learning_rate": 2.499771925858055e-06, - "loss": 1.1677, - "step": 1103000 - }, - { - "epoch": 2.63, - "learning_rate": 2.491838912225171e-06, - "loss": 1.1516, - "step": 1103500 - }, - { - "epoch": 2.63, - "learning_rate": 2.483905898592287e-06, - "loss": 1.1645, - "step": 1104000 - }, - { - "epoch": 2.63, - "learning_rate": 2.475972884959403e-06, - "loss": 1.134, - "step": 1104500 - }, - { - "epoch": 2.63, - "learning_rate": 2.468039871326519e-06, - "loss": 1.1678, - "step": 1105000 - }, - { - "epoch": 2.63, - "learning_rate": 2.460106857693635e-06, - "loss": 1.1544, - "step": 1105500 - }, - { - "epoch": 2.63, - "learning_rate": 2.4521738440607513e-06, - "loss": 1.1343, - "step": 1106000 - }, - { - "epoch": 2.63, - "learning_rate": 2.444240830427867e-06, - "loss": 1.1478, - "step": 1106500 - }, - { - "epoch": 2.63, - "learning_rate": 2.4363078167949835e-06, - "loss": 1.152, - "step": 1107000 - }, - { - "epoch": 2.64, - "learning_rate": 2.4283748031620993e-06, - "loss": 1.1585, - "step": 1107500 - }, - { - "epoch": 2.64, - "learning_rate": 2.420441789529215e-06, - "loss": 1.1586, - "step": 1108000 - }, - { - "epoch": 2.64, - "learning_rate": 2.412508775896332e-06, - "loss": 1.1608, - "step": 1108500 - }, - { - "epoch": 2.64, - "learning_rate": 2.4045757622634478e-06, - "loss": 1.1614, - "step": 1109000 - }, - { - "epoch": 2.64, - "learning_rate": 2.3966427486305636e-06, - "loss": 1.1655, - "step": 1109500 - }, - { - "epoch": 2.64, - "learning_rate": 2.38870973499768e-06, - "loss": 1.162, - "step": 1110000 - }, - { - "epoch": 2.64, - "learning_rate": 2.3807767213647958e-06, - "loss": 1.157, - "step": 1110500 - }, - { - "epoch": 2.64, - "learning_rate": 2.372843707731912e-06, - "loss": 1.1472, - "step": 1111000 - }, - { - "epoch": 2.65, - "learning_rate": 2.364910694099028e-06, - "loss": 1.1376, - "step": 1111500 - }, - { - "epoch": 2.65, - "learning_rate": 2.356977680466144e-06, - "loss": 1.1512, - "step": 1112000 - }, - { - "epoch": 2.65, - "learning_rate": 2.34904466683326e-06, - "loss": 1.1498, - "step": 1112500 - }, - { - "epoch": 2.65, - "learning_rate": 2.341111653200376e-06, - "loss": 1.1588, - "step": 1113000 - }, - { - "epoch": 2.65, - "learning_rate": 2.3331786395674922e-06, - "loss": 1.1418, - "step": 1113500 - }, - { - "epoch": 2.65, - "learning_rate": 2.3252456259346085e-06, - "loss": 1.1655, - "step": 1114000 - }, - { - "epoch": 2.65, - "learning_rate": 2.3173126123017244e-06, - "loss": 1.151, - "step": 1114500 - }, - { - "epoch": 2.65, - "learning_rate": 2.3093795986688407e-06, - "loss": 1.141, - "step": 1115000 - }, - { - "epoch": 2.65, - "learning_rate": 2.3014465850359566e-06, - "loss": 1.1537, - "step": 1115500 - }, - { - "epoch": 2.66, - "learning_rate": 2.2935135714030724e-06, - "loss": 1.142, - "step": 1116000 - }, - { - "epoch": 2.66, - "learning_rate": 2.2855805577701887e-06, - "loss": 1.1444, - "step": 1116500 - }, - { - "epoch": 2.66, - "learning_rate": 2.2776475441373046e-06, - "loss": 1.1461, - "step": 1117000 - }, - { - "epoch": 2.66, - "learning_rate": 2.269714530504421e-06, - "loss": 1.1479, - "step": 1117500 - }, - { - "epoch": 2.66, - "learning_rate": 2.2617815168715367e-06, - "loss": 1.1634, - "step": 1118000 - }, - { - "epoch": 2.66, - "learning_rate": 2.253848503238653e-06, - "loss": 1.1528, - "step": 1118500 - }, - { - "epoch": 2.66, - "learning_rate": 2.2459154896057693e-06, - "loss": 1.1513, - "step": 1119000 - }, - { - "epoch": 2.66, - "learning_rate": 2.237982475972885e-06, - "loss": 1.1547, - "step": 1119500 - }, - { - "epoch": 2.67, - "learning_rate": 2.2300494623400015e-06, - "loss": 1.159, - "step": 1120000 - }, - { - "epoch": 2.67, - "learning_rate": 2.2221164487071173e-06, - "loss": 1.1388, - "step": 1120500 - }, - { - "epoch": 2.67, - "learning_rate": 2.214183435074233e-06, - "loss": 1.1616, - "step": 1121000 - }, - { - "epoch": 2.67, - "learning_rate": 2.2062504214413495e-06, - "loss": 1.1555, - "step": 1121500 - }, - { - "epoch": 2.67, - "learning_rate": 2.1983174078084653e-06, - "loss": 1.1521, - "step": 1122000 - }, - { - "epoch": 2.67, - "learning_rate": 2.1903843941755816e-06, - "loss": 1.1381, - "step": 1122500 - }, - { - "epoch": 2.67, - "learning_rate": 2.1824513805426975e-06, - "loss": 1.1495, - "step": 1123000 - }, - { - "epoch": 2.67, - "learning_rate": 2.1745183669098138e-06, - "loss": 1.1394, - "step": 1123500 - }, - { - "epoch": 2.68, - "learning_rate": 2.16658535327693e-06, - "loss": 1.1362, - "step": 1124000 - }, - { - "epoch": 2.68, - "learning_rate": 2.158652339644046e-06, - "loss": 1.1356, - "step": 1124500 - }, - { - "epoch": 2.68, - "learning_rate": 2.150719326011162e-06, - "loss": 1.1565, - "step": 1125000 - }, - { - "epoch": 2.68, - "learning_rate": 2.142786312378278e-06, - "loss": 1.1576, - "step": 1125500 - }, - { - "epoch": 2.68, - "learning_rate": 2.134853298745394e-06, - "loss": 1.1514, - "step": 1126000 - }, - { - "epoch": 2.68, - "learning_rate": 2.1269202851125102e-06, - "loss": 1.1508, - "step": 1126500 - }, - { - "epoch": 2.68, - "learning_rate": 2.118987271479626e-06, - "loss": 1.142, - "step": 1127000 - }, - { - "epoch": 2.68, - "learning_rate": 2.111054257846742e-06, - "loss": 1.1526, - "step": 1127500 - }, - { - "epoch": 2.68, - "learning_rate": 2.1031212442138583e-06, - "loss": 1.1457, - "step": 1128000 - }, - { - "epoch": 2.69, - "learning_rate": 2.0951882305809745e-06, - "loss": 1.1467, - "step": 1128500 - }, - { - "epoch": 2.69, - "learning_rate": 2.0872552169480904e-06, - "loss": 1.1466, - "step": 1129000 - }, - { - "epoch": 2.69, - "learning_rate": 2.0793222033152067e-06, - "loss": 1.1427, - "step": 1129500 - }, - { - "epoch": 2.69, - "learning_rate": 2.0713891896823226e-06, - "loss": 1.1412, - "step": 1130000 - }, - { - "epoch": 2.69, - "learning_rate": 2.063456176049439e-06, - "loss": 1.1154, - "step": 1130500 - }, - { - "epoch": 2.69, - "learning_rate": 2.0555231624165547e-06, - "loss": 1.1585, - "step": 1131000 - }, - { - "epoch": 2.69, - "learning_rate": 2.047590148783671e-06, - "loss": 1.1434, - "step": 1131500 - }, - { - "epoch": 2.69, - "learning_rate": 2.039657135150787e-06, - "loss": 1.1517, - "step": 1132000 - }, - { - "epoch": 2.7, - "learning_rate": 2.0317241215179027e-06, - "loss": 1.1624, - "step": 1132500 - }, - { - "epoch": 2.7, - "learning_rate": 2.023791107885019e-06, - "loss": 1.1429, - "step": 1133000 - }, - { - "epoch": 2.7, - "learning_rate": 2.0158580942521353e-06, - "loss": 1.1583, - "step": 1133500 - }, - { - "epoch": 2.7, - "learning_rate": 2.007925080619251e-06, - "loss": 1.1542, - "step": 1134000 - }, - { - "epoch": 2.7, - "learning_rate": 1.9999920669863675e-06, - "loss": 1.165, - "step": 1134500 - }, - { - "epoch": 2.7, - "learning_rate": 1.9920590533534833e-06, - "loss": 1.1383, - "step": 1135000 - }, - { - "epoch": 2.7, - "learning_rate": 1.9841260397205996e-06, - "loss": 1.1618, - "step": 1135500 - }, - { - "epoch": 2.7, - "learning_rate": 1.9761930260877155e-06, - "loss": 1.1569, - "step": 1136000 - }, - { - "epoch": 2.7, - "learning_rate": 1.9682600124548313e-06, - "loss": 1.1444, - "step": 1136500 - }, - { - "epoch": 2.71, - "learning_rate": 1.9603269988219476e-06, - "loss": 1.1771, - "step": 1137000 - }, - { - "epoch": 2.71, - "learning_rate": 1.9523939851890635e-06, - "loss": 1.1612, - "step": 1137500 - }, - { - "epoch": 2.71, - "learning_rate": 1.94446097155618e-06, - "loss": 1.1523, - "step": 1138000 - }, - { - "epoch": 2.71, - "learning_rate": 1.936527957923296e-06, - "loss": 1.1509, - "step": 1138500 - }, - { - "epoch": 2.71, - "learning_rate": 1.928594944290412e-06, - "loss": 1.1395, - "step": 1139000 - }, - { - "epoch": 2.71, - "learning_rate": 1.9206619306575282e-06, - "loss": 1.1454, - "step": 1139500 - }, - { - "epoch": 2.71, - "learning_rate": 1.912728917024644e-06, - "loss": 1.1569, - "step": 1140000 - }, - { - "epoch": 2.71, - "learning_rate": 1.9047959033917602e-06, - "loss": 1.1469, - "step": 1140500 - }, - { - "epoch": 2.72, - "learning_rate": 1.8968628897588763e-06, - "loss": 1.137, - "step": 1141000 - }, - { - "epoch": 2.72, - "learning_rate": 1.8889298761259921e-06, - "loss": 1.1388, - "step": 1141500 - }, - { - "epoch": 2.72, - "learning_rate": 1.8809968624931082e-06, - "loss": 1.1416, - "step": 1142000 - }, - { - "epoch": 2.72, - "learning_rate": 1.8730638488602243e-06, - "loss": 1.1556, - "step": 1142500 - }, - { - "epoch": 2.72, - "learning_rate": 1.8651308352273406e-06, - "loss": 1.1506, - "step": 1143000 - }, - { - "epoch": 2.72, - "learning_rate": 1.8571978215944566e-06, - "loss": 1.154, - "step": 1143500 - }, - { - "epoch": 2.72, - "learning_rate": 1.8492648079615727e-06, - "loss": 1.1644, - "step": 1144000 - }, - { - "epoch": 2.72, - "learning_rate": 1.8413317943286888e-06, - "loss": 1.1385, - "step": 1144500 - }, - { - "epoch": 2.72, - "learning_rate": 1.8333987806958049e-06, - "loss": 1.1396, - "step": 1145000 - }, - { - "epoch": 2.73, - "learning_rate": 1.825465767062921e-06, - "loss": 1.1317, - "step": 1145500 - }, - { - "epoch": 2.73, - "learning_rate": 1.8175327534300368e-06, - "loss": 1.1523, - "step": 1146000 - }, - { - "epoch": 2.73, - "learning_rate": 1.8095997397971529e-06, - "loss": 1.1546, - "step": 1146500 - }, - { - "epoch": 2.73, - "learning_rate": 1.801666726164269e-06, - "loss": 1.1568, - "step": 1147000 - }, - { - "epoch": 2.73, - "learning_rate": 1.793733712531385e-06, - "loss": 1.144, - "step": 1147500 - }, - { - "epoch": 2.73, - "learning_rate": 1.7858006988985013e-06, - "loss": 1.1573, - "step": 1148000 - }, - { - "epoch": 2.73, - "learning_rate": 1.7778676852656174e-06, - "loss": 1.1468, - "step": 1148500 - }, - { - "epoch": 2.73, - "learning_rate": 1.7699346716327335e-06, - "loss": 1.1495, - "step": 1149000 - }, - { - "epoch": 2.74, - "learning_rate": 1.7620016579998496e-06, - "loss": 1.14, - "step": 1149500 - }, - { - "epoch": 2.74, - "learning_rate": 1.7540686443669654e-06, - "loss": 1.1577, - "step": 1150000 - }, - { - "epoch": 2.74, - "learning_rate": 1.7461356307340815e-06, - "loss": 1.1323, - "step": 1150500 - }, - { - "epoch": 2.74, - "learning_rate": 1.7382026171011976e-06, - "loss": 1.1407, - "step": 1151000 - }, - { - "epoch": 2.74, - "learning_rate": 1.7302696034683136e-06, - "loss": 1.1487, - "step": 1151500 - }, - { - "epoch": 2.74, - "learning_rate": 1.7223365898354297e-06, - "loss": 1.1521, - "step": 1152000 - }, - { - "epoch": 2.74, - "learning_rate": 1.7144035762025458e-06, - "loss": 1.1614, - "step": 1152500 - }, - { - "epoch": 2.74, - "learning_rate": 1.706470562569662e-06, - "loss": 1.1469, - "step": 1153000 - }, - { - "epoch": 2.75, - "learning_rate": 1.6985375489367782e-06, - "loss": 1.155, - "step": 1153500 - }, - { - "epoch": 2.75, - "learning_rate": 1.690604535303894e-06, - "loss": 1.1583, - "step": 1154000 - }, - { - "epoch": 2.75, - "learning_rate": 1.6826715216710101e-06, - "loss": 1.1408, - "step": 1154500 - }, - { - "epoch": 2.75, - "learning_rate": 1.6747385080381262e-06, - "loss": 1.152, - "step": 1155000 - }, - { - "epoch": 2.75, - "learning_rate": 1.6668054944052423e-06, - "loss": 1.1549, - "step": 1155500 - }, - { - "epoch": 2.75, - "learning_rate": 1.6588724807723583e-06, - "loss": 1.1553, - "step": 1156000 - }, - { - "epoch": 2.75, - "learning_rate": 1.6509394671394744e-06, - "loss": 1.1451, - "step": 1156500 - }, - { - "epoch": 2.75, - "learning_rate": 1.6430064535065903e-06, - "loss": 1.1408, - "step": 1157000 - }, - { - "epoch": 2.75, - "learning_rate": 1.6350734398737064e-06, - "loss": 1.151, - "step": 1157500 - }, - { - "epoch": 2.76, - "learning_rate": 1.6271404262408226e-06, - "loss": 1.1568, - "step": 1158000 - }, - { - "epoch": 2.76, - "learning_rate": 1.6192074126079387e-06, - "loss": 1.1528, - "step": 1158500 - }, - { - "epoch": 2.76, - "learning_rate": 1.6112743989750548e-06, - "loss": 1.152, - "step": 1159000 - }, - { - "epoch": 2.76, - "learning_rate": 1.6033413853421709e-06, - "loss": 1.149, - "step": 1159500 - }, - { - "epoch": 2.76, - "learning_rate": 1.595408371709287e-06, - "loss": 1.1235, - "step": 1160000 - }, - { - "epoch": 2.76, - "learning_rate": 1.587475358076403e-06, - "loss": 1.1339, - "step": 1160500 - }, - { - "epoch": 2.76, - "learning_rate": 1.579542344443519e-06, - "loss": 1.1548, - "step": 1161000 - }, - { - "epoch": 2.76, - "learning_rate": 1.571609330810635e-06, - "loss": 1.1462, - "step": 1161500 - }, - { - "epoch": 2.77, - "learning_rate": 1.563676317177751e-06, - "loss": 1.1392, - "step": 1162000 - }, - { - "epoch": 2.77, - "learning_rate": 1.5557433035448671e-06, - "loss": 1.1461, - "step": 1162500 - }, - { - "epoch": 2.77, - "learning_rate": 1.5478102899119834e-06, - "loss": 1.1394, - "step": 1163000 - }, - { - "epoch": 2.77, - "learning_rate": 1.5398772762790995e-06, - "loss": 1.1375, - "step": 1163500 - }, - { - "epoch": 2.77, - "learning_rate": 1.5319442626462156e-06, - "loss": 1.1464, - "step": 1164000 - }, - { - "epoch": 2.77, - "learning_rate": 1.5240112490133316e-06, - "loss": 1.1496, - "step": 1164500 - }, - { - "epoch": 2.77, - "learning_rate": 1.5160782353804477e-06, - "loss": 1.156, - "step": 1165000 - }, - { - "epoch": 2.77, - "learning_rate": 1.5081452217475636e-06, - "loss": 1.1427, - "step": 1165500 - }, - { - "epoch": 2.77, - "learning_rate": 1.5002122081146797e-06, - "loss": 1.1399, - "step": 1166000 - }, - { - "epoch": 2.78, - "learning_rate": 1.4922791944817957e-06, - "loss": 1.1335, - "step": 1166500 - }, - { - "epoch": 2.78, - "learning_rate": 1.4843461808489118e-06, - "loss": 1.133, - "step": 1167000 - }, - { - "epoch": 2.78, - "learning_rate": 1.4764131672160279e-06, - "loss": 1.1617, - "step": 1167500 - }, - { - "epoch": 2.78, - "learning_rate": 1.4684801535831442e-06, - "loss": 1.1388, - "step": 1168000 - }, - { - "epoch": 2.78, - "learning_rate": 1.4605471399502603e-06, - "loss": 1.1378, - "step": 1168500 - }, - { - "epoch": 2.78, - "learning_rate": 1.4526141263173763e-06, - "loss": 1.1484, - "step": 1169000 - }, - { - "epoch": 2.78, - "learning_rate": 1.4446811126844922e-06, - "loss": 1.1449, - "step": 1169500 - }, - { - "epoch": 2.78, - "learning_rate": 1.4367480990516083e-06, - "loss": 1.1451, - "step": 1170000 - }, - { - "epoch": 2.79, - "learning_rate": 1.4288150854187243e-06, - "loss": 1.1632, - "step": 1170500 - }, - { - "epoch": 2.79, - "learning_rate": 1.4208820717858404e-06, - "loss": 1.1472, - "step": 1171000 - }, - { - "epoch": 2.79, - "learning_rate": 1.4129490581529565e-06, - "loss": 1.1508, - "step": 1171500 - }, - { - "epoch": 2.79, - "learning_rate": 1.4050160445200726e-06, - "loss": 1.1391, - "step": 1172000 - }, - { - "epoch": 2.79, - "learning_rate": 1.3970830308871889e-06, - "loss": 1.1599, - "step": 1172500 - }, - { - "epoch": 2.79, - "learning_rate": 1.389150017254305e-06, - "loss": 1.1427, - "step": 1173000 - }, - { - "epoch": 2.79, - "learning_rate": 1.381217003621421e-06, - "loss": 1.15, - "step": 1173500 - }, - { - "epoch": 2.79, - "learning_rate": 1.3732839899885369e-06, - "loss": 1.1542, - "step": 1174000 - }, - { - "epoch": 2.8, - "learning_rate": 1.365350976355653e-06, - "loss": 1.1528, - "step": 1174500 - }, - { - "epoch": 2.8, - "learning_rate": 1.357417962722769e-06, - "loss": 1.1424, - "step": 1175000 - }, - { - "epoch": 2.8, - "learning_rate": 1.3494849490898851e-06, - "loss": 1.1195, - "step": 1175500 - }, - { - "epoch": 2.8, - "learning_rate": 1.3415519354570012e-06, - "loss": 1.1542, - "step": 1176000 - }, - { - "epoch": 2.8, - "learning_rate": 1.3336189218241173e-06, - "loss": 1.1437, - "step": 1176500 - }, - { - "epoch": 2.8, - "learning_rate": 1.3256859081912331e-06, - "loss": 1.1434, - "step": 1177000 - }, - { - "epoch": 2.8, - "learning_rate": 1.3177528945583496e-06, - "loss": 1.1357, - "step": 1177500 - }, - { - "epoch": 2.8, - "learning_rate": 1.3098198809254655e-06, - "loss": 1.15, - "step": 1178000 - }, - { - "epoch": 2.8, - "learning_rate": 1.3018868672925816e-06, - "loss": 1.1593, - "step": 1178500 - }, - { - "epoch": 2.81, - "learning_rate": 1.2939538536596977e-06, - "loss": 1.1398, - "step": 1179000 - }, - { - "epoch": 2.81, - "learning_rate": 1.2860208400268137e-06, - "loss": 1.142, - "step": 1179500 - }, - { - "epoch": 2.81, - "learning_rate": 1.2780878263939298e-06, - "loss": 1.1422, - "step": 1180000 - }, - { - "epoch": 2.81, - "learning_rate": 1.2701548127610459e-06, - "loss": 1.1652, - "step": 1180500 - }, - { - "epoch": 2.81, - "learning_rate": 1.2622217991281617e-06, - "loss": 1.1397, - "step": 1181000 - }, - { - "epoch": 2.81, - "learning_rate": 1.2542887854952778e-06, - "loss": 1.1315, - "step": 1181500 - }, - { - "epoch": 2.81, - "learning_rate": 1.2463557718623941e-06, - "loss": 1.1462, - "step": 1182000 - }, - { - "epoch": 2.81, - "learning_rate": 1.2384227582295102e-06, - "loss": 1.1303, - "step": 1182500 - }, - { - "epoch": 2.82, - "learning_rate": 1.230489744596626e-06, - "loss": 1.1381, - "step": 1183000 - }, - { - "epoch": 2.82, - "learning_rate": 1.2225567309637421e-06, - "loss": 1.1645, - "step": 1183500 - }, - { - "epoch": 2.82, - "learning_rate": 1.2146237173308584e-06, - "loss": 1.1457, - "step": 1184000 - }, - { - "epoch": 2.82, - "learning_rate": 1.2066907036979745e-06, - "loss": 1.1397, - "step": 1184500 - }, - { - "epoch": 2.82, - "learning_rate": 1.1987576900650904e-06, - "loss": 1.1473, - "step": 1185000 - }, - { - "epoch": 2.82, - "learning_rate": 1.1908246764322064e-06, - "loss": 1.1547, - "step": 1185500 - }, - { - "epoch": 2.82, - "learning_rate": 1.1828916627993225e-06, - "loss": 1.1382, - "step": 1186000 - }, - { - "epoch": 2.82, - "learning_rate": 1.1749586491664388e-06, - "loss": 1.144, - "step": 1186500 - }, - { - "epoch": 2.82, - "learning_rate": 1.1670256355335549e-06, - "loss": 1.1461, - "step": 1187000 - }, - { - "epoch": 2.83, - "learning_rate": 1.1590926219006707e-06, - "loss": 1.1598, - "step": 1187500 - }, - { - "epoch": 2.83, - "learning_rate": 1.1511596082677868e-06, - "loss": 1.1352, - "step": 1188000 - }, - { - "epoch": 2.83, - "learning_rate": 1.1432265946349031e-06, - "loss": 1.1369, - "step": 1188500 - }, - { - "epoch": 2.83, - "learning_rate": 1.1352935810020192e-06, - "loss": 1.1548, - "step": 1189000 - }, - { - "epoch": 2.83, - "learning_rate": 1.127360567369135e-06, - "loss": 1.1429, - "step": 1189500 - }, - { - "epoch": 2.83, - "learning_rate": 1.1194275537362511e-06, - "loss": 1.1349, - "step": 1190000 - }, - { - "epoch": 2.83, - "learning_rate": 1.1114945401033672e-06, - "loss": 1.15, - "step": 1190500 - }, - { - "epoch": 2.83, - "learning_rate": 1.1035615264704835e-06, - "loss": 1.1365, - "step": 1191000 - }, - { - "epoch": 2.84, - "learning_rate": 1.0956285128375994e-06, - "loss": 1.1535, - "step": 1191500 - }, - { - "epoch": 2.84, - "learning_rate": 1.0876954992047154e-06, - "loss": 1.144, - "step": 1192000 - }, - { - "epoch": 2.84, - "learning_rate": 1.0797624855718315e-06, - "loss": 1.153, - "step": 1192500 - }, - { - "epoch": 2.84, - "learning_rate": 1.0718294719389476e-06, - "loss": 1.155, - "step": 1193000 - }, - { - "epoch": 2.84, - "learning_rate": 1.0638964583060637e-06, - "loss": 1.149, - "step": 1193500 - }, - { - "epoch": 2.84, - "learning_rate": 1.0559634446731797e-06, - "loss": 1.1501, - "step": 1194000 - }, - { - "epoch": 2.84, - "learning_rate": 1.0480304310402958e-06, - "loss": 1.1324, - "step": 1194500 - }, - { - "epoch": 2.84, - "learning_rate": 1.0400974174074119e-06, - "loss": 1.1415, - "step": 1195000 - }, - { - "epoch": 2.85, - "learning_rate": 1.032164403774528e-06, - "loss": 1.1606, - "step": 1195500 - }, - { - "epoch": 2.85, - "learning_rate": 1.024231390141644e-06, - "loss": 1.1448, - "step": 1196000 - }, - { - "epoch": 2.85, - "learning_rate": 1.0162983765087601e-06, - "loss": 1.1527, - "step": 1196500 - }, - { - "epoch": 2.85, - "learning_rate": 1.0083653628758762e-06, - "loss": 1.1392, - "step": 1197000 - }, - { - "epoch": 2.85, - "learning_rate": 1.0004323492429923e-06, - "loss": 1.1395, - "step": 1197500 - }, - { - "epoch": 2.85, - "learning_rate": 9.924993356101084e-07, - "loss": 1.1415, - "step": 1198000 - }, - { - "epoch": 2.85, - "learning_rate": 9.845663219772244e-07, - "loss": 1.1452, - "step": 1198500 - }, - { - "epoch": 2.85, - "learning_rate": 9.766333083443405e-07, - "loss": 1.1166, - "step": 1199000 - }, - { - "epoch": 2.85, - "learning_rate": 9.687002947114566e-07, - "loss": 1.1494, - "step": 1199500 - }, - { - "epoch": 2.86, - "learning_rate": 9.607672810785727e-07, - "loss": 1.1373, - "step": 1200000 - }, - { - "epoch": 2.86, - "learning_rate": 9.528342674456886e-07, - "loss": 1.1255, - "step": 1200500 - }, - { - "epoch": 2.86, - "learning_rate": 9.449012538128048e-07, - "loss": 1.1561, - "step": 1201000 - }, - { - "epoch": 2.86, - "learning_rate": 9.369682401799209e-07, - "loss": 1.1426, - "step": 1201500 - }, - { - "epoch": 2.86, - "learning_rate": 9.290352265470369e-07, - "loss": 1.1391, - "step": 1202000 - }, - { - "epoch": 2.86, - "learning_rate": 9.211022129141529e-07, - "loss": 1.1552, - "step": 1202500 - }, - { - "epoch": 2.86, - "learning_rate": 9.13169199281269e-07, - "loss": 1.1487, - "step": 1203000 - }, - { - "epoch": 2.86, - "learning_rate": 9.052361856483852e-07, - "loss": 1.1381, - "step": 1203500 - }, - { - "epoch": 2.87, - "learning_rate": 8.973031720155012e-07, - "loss": 1.1335, - "step": 1204000 - }, - { - "epoch": 2.87, - "learning_rate": 8.893701583826172e-07, - "loss": 1.1576, - "step": 1204500 - }, - { - "epoch": 2.87, - "learning_rate": 8.814371447497333e-07, - "loss": 1.1522, - "step": 1205000 - }, - { - "epoch": 2.87, - "learning_rate": 8.735041311168493e-07, - "loss": 1.1431, - "step": 1205500 - }, - { - "epoch": 2.87, - "learning_rate": 8.655711174839655e-07, - "loss": 1.1496, - "step": 1206000 - }, - { - "epoch": 2.87, - "learning_rate": 8.576381038510815e-07, - "loss": 1.1481, - "step": 1206500 - }, - { - "epoch": 2.87, - "learning_rate": 8.497050902181976e-07, - "loss": 1.1362, - "step": 1207000 - }, - { - "epoch": 2.87, - "learning_rate": 8.417720765853137e-07, - "loss": 1.1626, - "step": 1207500 - }, - { - "epoch": 2.87, - "learning_rate": 8.338390629524297e-07, - "loss": 1.1314, - "step": 1208000 - }, - { - "epoch": 2.88, - "learning_rate": 8.259060493195459e-07, - "loss": 1.1469, - "step": 1208500 - }, - { - "epoch": 2.88, - "learning_rate": 8.179730356866619e-07, - "loss": 1.1418, - "step": 1209000 - }, - { - "epoch": 2.88, - "learning_rate": 8.10040022053778e-07, - "loss": 1.1524, - "step": 1209500 - }, - { - "epoch": 2.88, - "learning_rate": 8.02107008420894e-07, - "loss": 1.1509, - "step": 1210000 - }, - { - "epoch": 2.88, - "learning_rate": 7.941739947880101e-07, - "loss": 1.1392, - "step": 1210500 - }, - { - "epoch": 2.88, - "learning_rate": 7.862409811551262e-07, - "loss": 1.1414, - "step": 1211000 - }, - { - "epoch": 2.88, - "learning_rate": 7.783079675222423e-07, - "loss": 1.1357, - "step": 1211500 - }, - { - "epoch": 2.88, - "learning_rate": 7.703749538893583e-07, - "loss": 1.1481, - "step": 1212000 - }, - { - "epoch": 2.89, - "learning_rate": 7.624419402564744e-07, - "loss": 1.1617, - "step": 1212500 - }, - { - "epoch": 2.89, - "learning_rate": 7.545089266235904e-07, - "loss": 1.1444, - "step": 1213000 - }, - { - "epoch": 2.89, - "learning_rate": 7.465759129907066e-07, - "loss": 1.155, - "step": 1213500 - }, - { - "epoch": 2.89, - "learning_rate": 7.386428993578226e-07, - "loss": 1.129, - "step": 1214000 - }, - { - "epoch": 2.89, - "learning_rate": 7.307098857249387e-07, - "loss": 1.154, - "step": 1214500 - }, - { - "epoch": 2.89, - "learning_rate": 7.227768720920547e-07, - "loss": 1.1496, - "step": 1215000 - }, - { - "epoch": 2.89, - "learning_rate": 7.148438584591707e-07, - "loss": 1.1322, - "step": 1215500 - }, - { - "epoch": 2.89, - "learning_rate": 7.069108448262869e-07, - "loss": 1.1548, - "step": 1216000 - }, - { - "epoch": 2.9, - "learning_rate": 6.98977831193403e-07, - "loss": 1.1403, - "step": 1216500 - }, - { - "epoch": 2.9, - "learning_rate": 6.91044817560519e-07, - "loss": 1.1508, - "step": 1217000 - }, - { - "epoch": 2.9, - "learning_rate": 6.83111803927635e-07, - "loss": 1.1455, - "step": 1217500 - }, - { - "epoch": 2.9, - "learning_rate": 6.751787902947511e-07, - "loss": 1.1381, - "step": 1218000 - }, - { - "epoch": 2.9, - "learning_rate": 6.672457766618673e-07, - "loss": 1.1509, - "step": 1218500 - }, - { - "epoch": 2.9, - "learning_rate": 6.593127630289834e-07, - "loss": 1.1373, - "step": 1219000 - }, - { - "epoch": 2.9, - "learning_rate": 6.513797493960993e-07, - "loss": 1.1482, - "step": 1219500 - }, - { - "epoch": 2.9, - "learning_rate": 6.434467357632154e-07, - "loss": 1.1388, - "step": 1220000 - }, - { - "epoch": 2.9, - "learning_rate": 6.355137221303315e-07, - "loss": 1.1422, - "step": 1220500 - }, - { - "epoch": 2.91, - "learning_rate": 6.275807084974477e-07, - "loss": 1.1342, - "step": 1221000 - }, - { - "epoch": 2.91, - "learning_rate": 6.196476948645637e-07, - "loss": 1.1466, - "step": 1221500 - }, - { - "epoch": 2.91, - "learning_rate": 6.117146812316797e-07, - "loss": 1.1359, - "step": 1222000 - }, - { - "epoch": 2.91, - "learning_rate": 6.037816675987959e-07, - "loss": 1.1504, - "step": 1222500 - }, - { - "epoch": 2.91, - "learning_rate": 5.958486539659119e-07, - "loss": 1.1369, - "step": 1223000 - }, - { - "epoch": 2.91, - "learning_rate": 5.879156403330279e-07, - "loss": 1.1404, - "step": 1223500 - }, - { - "epoch": 2.91, - "learning_rate": 5.79982626700144e-07, - "loss": 1.1438, - "step": 1224000 - }, - { - "epoch": 2.91, - "learning_rate": 5.720496130672601e-07, - "loss": 1.1355, - "step": 1224500 - }, - { - "epoch": 2.92, - "learning_rate": 5.641165994343762e-07, - "loss": 1.1538, - "step": 1225000 - }, - { - "epoch": 2.92, - "learning_rate": 5.561835858014922e-07, - "loss": 1.1396, - "step": 1225500 - }, - { - "epoch": 2.92, - "learning_rate": 5.482505721686083e-07, - "loss": 1.1504, - "step": 1226000 - }, - { - "epoch": 2.92, - "learning_rate": 5.403175585357244e-07, - "loss": 1.1428, - "step": 1226500 - }, - { - "epoch": 2.92, - "learning_rate": 5.323845449028405e-07, - "loss": 1.1513, - "step": 1227000 - }, - { - "epoch": 2.92, - "learning_rate": 5.244515312699566e-07, - "loss": 1.1489, - "step": 1227500 - }, - { - "epoch": 2.92, - "learning_rate": 5.165185176370726e-07, - "loss": 1.1469, - "step": 1228000 - }, - { - "epoch": 2.92, - "learning_rate": 5.085855040041886e-07, - "loss": 1.1604, - "step": 1228500 - }, - { - "epoch": 2.92, - "learning_rate": 5.006524903713048e-07, - "loss": 1.1446, - "step": 1229000 - }, - { - "epoch": 2.93, - "learning_rate": 4.927194767384208e-07, - "loss": 1.1386, - "step": 1229500 - }, - { - "epoch": 2.93, - "learning_rate": 4.847864631055369e-07, - "loss": 1.1435, - "step": 1230000 - }, - { - "epoch": 2.93, - "learning_rate": 4.768534494726529e-07, - "loss": 1.1456, - "step": 1230500 - }, - { - "epoch": 2.93, - "learning_rate": 4.68920435839769e-07, - "loss": 1.1422, - "step": 1231000 - }, - { - "epoch": 2.93, - "learning_rate": 4.609874222068851e-07, - "loss": 1.1365, - "step": 1231500 - }, - { - "epoch": 2.93, - "learning_rate": 4.5305440857400114e-07, - "loss": 1.1592, - "step": 1232000 - }, - { - "epoch": 2.93, - "learning_rate": 4.4512139494111727e-07, - "loss": 1.171, - "step": 1232500 - }, - { - "epoch": 2.93, - "learning_rate": 4.371883813082333e-07, - "loss": 1.1478, - "step": 1233000 - }, - { - "epoch": 2.94, - "learning_rate": 4.2925536767534937e-07, - "loss": 1.1486, - "step": 1233500 - }, - { - "epoch": 2.94, - "learning_rate": 4.2132235404246545e-07, - "loss": 1.1485, - "step": 1234000 - }, - { - "epoch": 2.94, - "learning_rate": 4.133893404095815e-07, - "loss": 1.131, - "step": 1234500 - }, - { - "epoch": 2.94, - "learning_rate": 4.054563267766976e-07, - "loss": 1.1415, - "step": 1235000 - }, - { - "epoch": 2.94, - "learning_rate": 3.975233131438137e-07, - "loss": 1.1492, - "step": 1235500 - }, - { - "epoch": 2.94, - "learning_rate": 3.8959029951092975e-07, - "loss": 1.1399, - "step": 1236000 - }, - { - "epoch": 2.94, - "learning_rate": 3.8165728587804583e-07, - "loss": 1.1325, - "step": 1236500 - }, - { - "epoch": 2.94, - "learning_rate": 3.7372427224516185e-07, - "loss": 1.1632, - "step": 1237000 - }, - { - "epoch": 2.95, - "learning_rate": 3.65791258612278e-07, - "loss": 1.1534, - "step": 1237500 - }, - { - "epoch": 2.95, - "learning_rate": 3.57858244979394e-07, - "loss": 1.1527, - "step": 1238000 - }, - { - "epoch": 2.95, - "learning_rate": 3.4992523134651014e-07, - "loss": 1.1318, - "step": 1238500 - }, - { - "epoch": 2.95, - "learning_rate": 3.4199221771362616e-07, - "loss": 1.1535, - "step": 1239000 - }, - { - "epoch": 2.95, - "learning_rate": 3.3405920408074224e-07, - "loss": 1.1368, - "step": 1239500 - }, - { - "epoch": 2.95, - "learning_rate": 3.261261904478583e-07, - "loss": 1.1448, - "step": 1240000 - }, - { - "epoch": 2.95, - "learning_rate": 3.181931768149744e-07, - "loss": 1.1597, - "step": 1240500 - }, - { - "epoch": 2.95, - "learning_rate": 3.1026016318209047e-07, - "loss": 1.149, - "step": 1241000 - }, - { - "epoch": 2.95, - "learning_rate": 3.0232714954920654e-07, - "loss": 1.1383, - "step": 1241500 - }, - { - "epoch": 2.96, - "learning_rate": 2.943941359163226e-07, - "loss": 1.1288, - "step": 1242000 - }, - { - "epoch": 2.96, - "learning_rate": 2.864611222834387e-07, - "loss": 1.1397, - "step": 1242500 - }, - { - "epoch": 2.96, - "learning_rate": 2.785281086505547e-07, - "loss": 1.1452, - "step": 1243000 - }, - { - "epoch": 2.96, - "learning_rate": 2.705950950176708e-07, - "loss": 1.1439, - "step": 1243500 - }, - { - "epoch": 2.96, - "learning_rate": 2.626620813847869e-07, - "loss": 1.144, - "step": 1244000 - }, - { - "epoch": 2.96, - "learning_rate": 2.5472906775190295e-07, - "loss": 1.139, - "step": 1244500 - }, - { - "epoch": 2.96, - "learning_rate": 2.4679605411901903e-07, - "loss": 1.138, - "step": 1245000 - }, - { - "epoch": 2.96, - "learning_rate": 2.388630404861351e-07, - "loss": 1.1524, - "step": 1245500 - }, - { - "epoch": 2.97, - "learning_rate": 2.3093002685325115e-07, - "loss": 1.145, - "step": 1246000 - }, - { - "epoch": 2.97, - "learning_rate": 2.2299701322036723e-07, - "loss": 1.1471, - "step": 1246500 - }, - { - "epoch": 2.97, - "learning_rate": 2.150639995874833e-07, - "loss": 1.1336, - "step": 1247000 - }, - { - "epoch": 2.97, - "learning_rate": 2.0713098595459938e-07, - "loss": 1.1451, - "step": 1247500 - }, - { - "epoch": 2.97, - "learning_rate": 1.9919797232171543e-07, - "loss": 1.1405, - "step": 1248000 - }, - { - "epoch": 2.97, - "learning_rate": 1.912649586888315e-07, - "loss": 1.1174, - "step": 1248500 - }, - { - "epoch": 2.97, - "learning_rate": 1.833319450559476e-07, - "loss": 1.1498, - "step": 1249000 - }, - { - "epoch": 2.97, - "learning_rate": 1.7539893142306366e-07, - "loss": 1.1536, - "step": 1249500 - }, - { - "epoch": 2.97, - "learning_rate": 1.6746591779017974e-07, - "loss": 1.1362, - "step": 1250000 - }, - { - "epoch": 2.98, - "learning_rate": 1.5953290415729582e-07, - "loss": 1.1453, - "step": 1250500 - }, - { - "epoch": 2.98, - "learning_rate": 1.515998905244119e-07, - "loss": 1.133, - "step": 1251000 - }, - { - "epoch": 2.98, - "learning_rate": 1.4366687689152794e-07, - "loss": 1.1544, - "step": 1251500 - }, - { - "epoch": 2.98, - "learning_rate": 1.3573386325864402e-07, - "loss": 1.1403, - "step": 1252000 - }, - { - "epoch": 2.98, - "learning_rate": 1.2780084962576007e-07, - "loss": 1.1533, - "step": 1252500 - }, - { - "epoch": 2.98, - "learning_rate": 1.1986783599287617e-07, - "loss": 1.1512, - "step": 1253000 - }, - { - "epoch": 2.98, - "learning_rate": 1.1193482235999224e-07, - "loss": 1.1531, - "step": 1253500 - }, - { - "epoch": 2.98, - "learning_rate": 1.040018087271083e-07, - "loss": 1.1366, - "step": 1254000 - }, - { - "epoch": 2.99, - "learning_rate": 9.606879509422438e-08, - "loss": 1.1419, - "step": 1254500 - }, - { - "epoch": 2.99, - "learning_rate": 8.813578146134045e-08, - "loss": 1.1226, - "step": 1255000 - }, - { - "epoch": 2.99, - "learning_rate": 8.020276782845652e-08, - "loss": 1.1279, - "step": 1255500 - }, - { - "epoch": 2.99, - "learning_rate": 7.22697541955726e-08, - "loss": 1.1539, - "step": 1256000 - }, - { - "epoch": 2.99, - "learning_rate": 6.433674056268866e-08, - "loss": 1.1498, - "step": 1256500 - }, - { - "epoch": 2.99, - "learning_rate": 5.6403726929804734e-08, - "loss": 1.133, - "step": 1257000 - }, - { - "epoch": 2.99, - "learning_rate": 4.8470713296920804e-08, - "loss": 1.1432, - "step": 1257500 - }, - { - "epoch": 2.99, - "learning_rate": 4.053769966403688e-08, - "loss": 1.1493, - "step": 1258000 - }, - { - "epoch": 3.0, - "learning_rate": 3.260468603115295e-08, - "loss": 1.1356, - "step": 1258500 - }, - { - "epoch": 3.0, - "learning_rate": 2.4671672398269018e-08, - "loss": 1.1405, - "step": 1259000 - }, - { - "epoch": 3.0, - "learning_rate": 1.6738658765385088e-08, - "loss": 1.148, - "step": 1259500 - }, - { - "epoch": 3.0, - "learning_rate": 8.805645132501161e-09, - "loss": 1.1409, - "step": 1260000 - }, - { - "epoch": 3.0, - "learning_rate": 8.726314996172322e-10, - "loss": 1.1348, - "step": 1260500 - }, - { - "epoch": 3.0, - "eval_loss": 1.1232123374938965, - "eval_runtime": 3625.9093, - "eval_samples_per_second": 366.127, - "eval_steps_per_second": 22.883, - "step": 1260555 + "epoch": 5.0, + "eval_loss": 1.0943782329559326, + "eval_runtime": 105.3282, + "eval_samples_per_second": 127.345, + "eval_steps_per_second": 1.994, + "step": 625000 } ], - "max_steps": 1260555, - "num_train_epochs": 3, - "total_flos": 2.738376510217363e+18, + "max_steps": 2500000, + "num_train_epochs": 20, + "total_flos": 7.883799881847431e+18, "trial_name": null, "trial_params": null }