{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9988175009854157, "eval_steps": 500, "global_step": 3804, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007883326763894363, "grad_norm": 7.40625, "learning_rate": 5.249343832020998e-08, "loss": 1.5552, "step": 1 }, { "epoch": 0.0015766653527788726, "grad_norm": 7.625, "learning_rate": 1.0498687664041996e-07, "loss": 1.571, "step": 2 }, { "epoch": 0.002364998029168309, "grad_norm": 7.1875, "learning_rate": 1.5748031496062994e-07, "loss": 1.5962, "step": 3 }, { "epoch": 0.0031533307055577452, "grad_norm": 7.375, "learning_rate": 2.0997375328083992e-07, "loss": 1.5272, "step": 4 }, { "epoch": 0.003941663381947182, "grad_norm": 7.84375, "learning_rate": 2.624671916010499e-07, "loss": 1.5807, "step": 5 }, { "epoch": 0.004729996058336618, "grad_norm": 6.9375, "learning_rate": 3.149606299212599e-07, "loss": 1.5745, "step": 6 }, { "epoch": 0.005518328734726055, "grad_norm": 7.21875, "learning_rate": 3.6745406824146983e-07, "loss": 1.581, "step": 7 }, { "epoch": 0.0063066614111154905, "grad_norm": 7.125, "learning_rate": 4.1994750656167983e-07, "loss": 1.5976, "step": 8 }, { "epoch": 0.007094994087504927, "grad_norm": 7.46875, "learning_rate": 4.724409448818898e-07, "loss": 1.5635, "step": 9 }, { "epoch": 0.007883326763894364, "grad_norm": 7.875, "learning_rate": 5.249343832020998e-07, "loss": 1.6039, "step": 10 }, { "epoch": 0.008671659440283799, "grad_norm": 7.4375, "learning_rate": 5.774278215223097e-07, "loss": 1.5753, "step": 11 }, { "epoch": 0.009459992116673236, "grad_norm": 7.78125, "learning_rate": 6.299212598425198e-07, "loss": 1.5941, "step": 12 }, { "epoch": 0.010248324793062672, "grad_norm": 7.21875, "learning_rate": 6.824146981627297e-07, "loss": 1.5215, "step": 13 }, { "epoch": 0.01103665746945211, "grad_norm": 7.34375, "learning_rate": 7.349081364829397e-07, "loss": 1.5621, "step": 14 }, { "epoch": 0.011824990145841546, "grad_norm": 7.09375, "learning_rate": 7.874015748031496e-07, "loss": 1.5786, "step": 15 }, { "epoch": 0.012613322822230981, "grad_norm": 7.28125, "learning_rate": 8.398950131233597e-07, "loss": 1.5547, "step": 16 }, { "epoch": 0.013401655498620418, "grad_norm": 7.375, "learning_rate": 8.923884514435696e-07, "loss": 1.5608, "step": 17 }, { "epoch": 0.014189988175009854, "grad_norm": 7.125, "learning_rate": 9.448818897637796e-07, "loss": 1.5304, "step": 18 }, { "epoch": 0.014978320851399291, "grad_norm": 7.6875, "learning_rate": 9.973753280839895e-07, "loss": 1.5914, "step": 19 }, { "epoch": 0.015766653527788728, "grad_norm": 7.5, "learning_rate": 1.0498687664041996e-06, "loss": 1.5736, "step": 20 }, { "epoch": 0.016554986204178165, "grad_norm": 7.34375, "learning_rate": 1.1023622047244096e-06, "loss": 1.5563, "step": 21 }, { "epoch": 0.017343318880567598, "grad_norm": 7.3125, "learning_rate": 1.1548556430446194e-06, "loss": 1.5337, "step": 22 }, { "epoch": 0.018131651556957035, "grad_norm": 7.125, "learning_rate": 1.2073490813648295e-06, "loss": 1.531, "step": 23 }, { "epoch": 0.01891998423334647, "grad_norm": 6.96875, "learning_rate": 1.2598425196850396e-06, "loss": 1.5528, "step": 24 }, { "epoch": 0.019708316909735908, "grad_norm": 6.75, "learning_rate": 1.3123359580052494e-06, "loss": 1.5142, "step": 25 }, { "epoch": 0.020496649586125345, "grad_norm": 6.6875, "learning_rate": 1.3648293963254594e-06, "loss": 1.5307, "step": 26 }, { "epoch": 0.02128498226251478, "grad_norm": 6.5625, "learning_rate": 1.4173228346456693e-06, "loss": 1.5154, "step": 27 }, { "epoch": 0.02207331493890422, "grad_norm": 6.65625, "learning_rate": 1.4698162729658793e-06, "loss": 1.4851, "step": 28 }, { "epoch": 0.022861647615293655, "grad_norm": 6.625, "learning_rate": 1.5223097112860894e-06, "loss": 1.5212, "step": 29 }, { "epoch": 0.023649980291683092, "grad_norm": 7.0625, "learning_rate": 1.5748031496062992e-06, "loss": 1.5181, "step": 30 }, { "epoch": 0.024438312968072525, "grad_norm": 6.21875, "learning_rate": 1.6272965879265093e-06, "loss": 1.5331, "step": 31 }, { "epoch": 0.025226645644461962, "grad_norm": 6.34375, "learning_rate": 1.6797900262467193e-06, "loss": 1.5017, "step": 32 }, { "epoch": 0.0260149783208514, "grad_norm": 6.40625, "learning_rate": 1.7322834645669292e-06, "loss": 1.495, "step": 33 }, { "epoch": 0.026803310997240835, "grad_norm": 6.0, "learning_rate": 1.7847769028871392e-06, "loss": 1.4716, "step": 34 }, { "epoch": 0.027591643673630272, "grad_norm": 6.15625, "learning_rate": 1.8372703412073493e-06, "loss": 1.4553, "step": 35 }, { "epoch": 0.02837997635001971, "grad_norm": 6.25, "learning_rate": 1.8897637795275591e-06, "loss": 1.4745, "step": 36 }, { "epoch": 0.029168309026409146, "grad_norm": 5.75, "learning_rate": 1.942257217847769e-06, "loss": 1.4979, "step": 37 }, { "epoch": 0.029956641702798582, "grad_norm": 6.0, "learning_rate": 1.994750656167979e-06, "loss": 1.5335, "step": 38 }, { "epoch": 0.030744974379188016, "grad_norm": 5.6875, "learning_rate": 2.0472440944881893e-06, "loss": 1.4554, "step": 39 }, { "epoch": 0.031533307055577456, "grad_norm": 5.78125, "learning_rate": 2.099737532808399e-06, "loss": 1.4293, "step": 40 }, { "epoch": 0.03232163973196689, "grad_norm": 5.4375, "learning_rate": 2.152230971128609e-06, "loss": 1.4583, "step": 41 }, { "epoch": 0.03310997240835633, "grad_norm": 5.65625, "learning_rate": 2.2047244094488192e-06, "loss": 1.4475, "step": 42 }, { "epoch": 0.03389830508474576, "grad_norm": 5.28125, "learning_rate": 2.257217847769029e-06, "loss": 1.4319, "step": 43 }, { "epoch": 0.034686637761135196, "grad_norm": 5.3125, "learning_rate": 2.309711286089239e-06, "loss": 1.4623, "step": 44 }, { "epoch": 0.035474970437524636, "grad_norm": 4.875, "learning_rate": 2.362204724409449e-06, "loss": 1.4554, "step": 45 }, { "epoch": 0.03626330311391407, "grad_norm": 5.125, "learning_rate": 2.414698162729659e-06, "loss": 1.4275, "step": 46 }, { "epoch": 0.03705163579030351, "grad_norm": 5.28125, "learning_rate": 2.467191601049869e-06, "loss": 1.3967, "step": 47 }, { "epoch": 0.03783996846669294, "grad_norm": 5.03125, "learning_rate": 2.519685039370079e-06, "loss": 1.368, "step": 48 }, { "epoch": 0.03862830114308238, "grad_norm": 5.15625, "learning_rate": 2.5721784776902894e-06, "loss": 1.4013, "step": 49 }, { "epoch": 0.039416633819471816, "grad_norm": 5.3125, "learning_rate": 2.6246719160104988e-06, "loss": 1.3803, "step": 50 }, { "epoch": 0.04020496649586126, "grad_norm": 5.125, "learning_rate": 2.677165354330709e-06, "loss": 1.3857, "step": 51 }, { "epoch": 0.04099329917225069, "grad_norm": 4.96875, "learning_rate": 2.729658792650919e-06, "loss": 1.3482, "step": 52 }, { "epoch": 0.04178163184864012, "grad_norm": 4.875, "learning_rate": 2.782152230971129e-06, "loss": 1.3454, "step": 53 }, { "epoch": 0.04256996452502956, "grad_norm": 5.0625, "learning_rate": 2.8346456692913386e-06, "loss": 1.3634, "step": 54 }, { "epoch": 0.043358297201419, "grad_norm": 4.875, "learning_rate": 2.887139107611549e-06, "loss": 1.3518, "step": 55 }, { "epoch": 0.04414662987780844, "grad_norm": 4.59375, "learning_rate": 2.9396325459317587e-06, "loss": 1.3198, "step": 56 }, { "epoch": 0.04493496255419787, "grad_norm": 4.65625, "learning_rate": 2.992125984251969e-06, "loss": 1.287, "step": 57 }, { "epoch": 0.04572329523058731, "grad_norm": 4.15625, "learning_rate": 3.0446194225721788e-06, "loss": 1.2587, "step": 58 }, { "epoch": 0.046511627906976744, "grad_norm": 4.25, "learning_rate": 3.097112860892389e-06, "loss": 1.2667, "step": 59 }, { "epoch": 0.047299960583366184, "grad_norm": 4.09375, "learning_rate": 3.1496062992125985e-06, "loss": 1.2318, "step": 60 }, { "epoch": 0.04808829325975562, "grad_norm": 4.15625, "learning_rate": 3.2020997375328087e-06, "loss": 1.2682, "step": 61 }, { "epoch": 0.04887662593614505, "grad_norm": 3.6875, "learning_rate": 3.2545931758530186e-06, "loss": 1.2062, "step": 62 }, { "epoch": 0.04966495861253449, "grad_norm": 3.4375, "learning_rate": 3.307086614173229e-06, "loss": 1.1985, "step": 63 }, { "epoch": 0.050453291288923924, "grad_norm": 3.34375, "learning_rate": 3.3595800524934387e-06, "loss": 1.2015, "step": 64 }, { "epoch": 0.051241623965313364, "grad_norm": 3.21875, "learning_rate": 3.412073490813649e-06, "loss": 1.1729, "step": 65 }, { "epoch": 0.0520299566417028, "grad_norm": 2.90625, "learning_rate": 3.4645669291338583e-06, "loss": 1.1661, "step": 66 }, { "epoch": 0.05281828931809224, "grad_norm": 3.109375, "learning_rate": 3.5170603674540686e-06, "loss": 1.1845, "step": 67 }, { "epoch": 0.05360662199448167, "grad_norm": 2.53125, "learning_rate": 3.5695538057742785e-06, "loss": 1.1604, "step": 68 }, { "epoch": 0.05439495467087111, "grad_norm": 2.265625, "learning_rate": 3.6220472440944887e-06, "loss": 1.1426, "step": 69 }, { "epoch": 0.055183287347260544, "grad_norm": 2.203125, "learning_rate": 3.6745406824146986e-06, "loss": 1.1399, "step": 70 }, { "epoch": 0.05597162002364998, "grad_norm": 1.953125, "learning_rate": 3.727034120734909e-06, "loss": 1.1339, "step": 71 }, { "epoch": 0.05675995270003942, "grad_norm": 1.8359375, "learning_rate": 3.7795275590551182e-06, "loss": 1.1533, "step": 72 }, { "epoch": 0.05754828537642885, "grad_norm": 1.9921875, "learning_rate": 3.8320209973753285e-06, "loss": 1.1007, "step": 73 }, { "epoch": 0.05833661805281829, "grad_norm": 1.4453125, "learning_rate": 3.884514435695538e-06, "loss": 1.0949, "step": 74 }, { "epoch": 0.059124950729207724, "grad_norm": 1.5390625, "learning_rate": 3.937007874015748e-06, "loss": 1.1107, "step": 75 }, { "epoch": 0.059913283405597165, "grad_norm": 1.453125, "learning_rate": 3.989501312335958e-06, "loss": 1.139, "step": 76 }, { "epoch": 0.0607016160819866, "grad_norm": 1.3671875, "learning_rate": 4.041994750656169e-06, "loss": 1.099, "step": 77 }, { "epoch": 0.06148994875837603, "grad_norm": 1.2890625, "learning_rate": 4.0944881889763785e-06, "loss": 1.1238, "step": 78 }, { "epoch": 0.06227828143476547, "grad_norm": 1.359375, "learning_rate": 4.146981627296588e-06, "loss": 1.1086, "step": 79 }, { "epoch": 0.06306661411115491, "grad_norm": 1.15625, "learning_rate": 4.199475065616798e-06, "loss": 1.0995, "step": 80 }, { "epoch": 0.06385494678754434, "grad_norm": 1.046875, "learning_rate": 4.251968503937008e-06, "loss": 1.0924, "step": 81 }, { "epoch": 0.06464327946393378, "grad_norm": 1.0078125, "learning_rate": 4.304461942257218e-06, "loss": 1.0925, "step": 82 }, { "epoch": 0.06543161214032321, "grad_norm": 0.94921875, "learning_rate": 4.356955380577429e-06, "loss": 1.0445, "step": 83 }, { "epoch": 0.06621994481671266, "grad_norm": 0.94921875, "learning_rate": 4.4094488188976384e-06, "loss": 1.0413, "step": 84 }, { "epoch": 0.06700827749310209, "grad_norm": 0.96875, "learning_rate": 4.461942257217848e-06, "loss": 1.034, "step": 85 }, { "epoch": 0.06779661016949153, "grad_norm": 0.98828125, "learning_rate": 4.514435695538058e-06, "loss": 1.0771, "step": 86 }, { "epoch": 0.06858494284588096, "grad_norm": 0.79296875, "learning_rate": 4.566929133858268e-06, "loss": 1.0459, "step": 87 }, { "epoch": 0.06937327552227039, "grad_norm": 1.2578125, "learning_rate": 4.619422572178478e-06, "loss": 1.0432, "step": 88 }, { "epoch": 0.07016160819865984, "grad_norm": 0.66015625, "learning_rate": 4.6719160104986885e-06, "loss": 1.0872, "step": 89 }, { "epoch": 0.07094994087504927, "grad_norm": 0.73046875, "learning_rate": 4.724409448818898e-06, "loss": 1.0918, "step": 90 }, { "epoch": 0.0717382735514387, "grad_norm": 0.65625, "learning_rate": 4.776902887139108e-06, "loss": 1.0813, "step": 91 }, { "epoch": 0.07252660622782814, "grad_norm": 0.53515625, "learning_rate": 4.829396325459318e-06, "loss": 1.0563, "step": 92 }, { "epoch": 0.07331493890421759, "grad_norm": 0.63671875, "learning_rate": 4.881889763779528e-06, "loss": 1.0821, "step": 93 }, { "epoch": 0.07410327158060702, "grad_norm": 0.5625, "learning_rate": 4.934383202099738e-06, "loss": 1.0597, "step": 94 }, { "epoch": 0.07489160425699645, "grad_norm": 0.56640625, "learning_rate": 4.986876640419948e-06, "loss": 1.0416, "step": 95 }, { "epoch": 0.07567993693338589, "grad_norm": 0.55078125, "learning_rate": 5.039370078740158e-06, "loss": 1.063, "step": 96 }, { "epoch": 0.07646826960977532, "grad_norm": 0.48828125, "learning_rate": 5.091863517060368e-06, "loss": 1.0482, "step": 97 }, { "epoch": 0.07725660228616477, "grad_norm": 0.54296875, "learning_rate": 5.144356955380579e-06, "loss": 1.0493, "step": 98 }, { "epoch": 0.0780449349625542, "grad_norm": 0.5078125, "learning_rate": 5.196850393700788e-06, "loss": 1.0284, "step": 99 }, { "epoch": 0.07883326763894363, "grad_norm": 0.61328125, "learning_rate": 5.2493438320209976e-06, "loss": 1.0025, "step": 100 }, { "epoch": 0.07962160031533307, "grad_norm": 0.51953125, "learning_rate": 5.301837270341208e-06, "loss": 1.0533, "step": 101 }, { "epoch": 0.08040993299172251, "grad_norm": 0.455078125, "learning_rate": 5.354330708661418e-06, "loss": 1.0221, "step": 102 }, { "epoch": 0.08119826566811195, "grad_norm": 0.478515625, "learning_rate": 5.406824146981627e-06, "loss": 1.0562, "step": 103 }, { "epoch": 0.08198659834450138, "grad_norm": 0.482421875, "learning_rate": 5.459317585301838e-06, "loss": 1.0994, "step": 104 }, { "epoch": 0.08277493102089081, "grad_norm": 0.48046875, "learning_rate": 5.511811023622048e-06, "loss": 1.0455, "step": 105 }, { "epoch": 0.08356326369728025, "grad_norm": 0.490234375, "learning_rate": 5.564304461942258e-06, "loss": 1.0903, "step": 106 }, { "epoch": 0.0843515963736697, "grad_norm": 0.4296875, "learning_rate": 5.616797900262467e-06, "loss": 1.0723, "step": 107 }, { "epoch": 0.08513992905005913, "grad_norm": 0.5625, "learning_rate": 5.669291338582677e-06, "loss": 1.0343, "step": 108 }, { "epoch": 0.08592826172644856, "grad_norm": 0.474609375, "learning_rate": 5.721784776902888e-06, "loss": 1.0331, "step": 109 }, { "epoch": 0.086716594402838, "grad_norm": 0.453125, "learning_rate": 5.774278215223098e-06, "loss": 0.9828, "step": 110 }, { "epoch": 0.08750492707922744, "grad_norm": 0.5, "learning_rate": 5.8267716535433075e-06, "loss": 1.0289, "step": 111 }, { "epoch": 0.08829325975561687, "grad_norm": 0.42578125, "learning_rate": 5.879265091863517e-06, "loss": 1.0485, "step": 112 }, { "epoch": 0.0890815924320063, "grad_norm": 0.51171875, "learning_rate": 5.931758530183728e-06, "loss": 1.0692, "step": 113 }, { "epoch": 0.08986992510839574, "grad_norm": 0.5, "learning_rate": 5.984251968503938e-06, "loss": 1.0364, "step": 114 }, { "epoch": 0.09065825778478517, "grad_norm": 0.431640625, "learning_rate": 6.036745406824147e-06, "loss": 1.0514, "step": 115 }, { "epoch": 0.09144659046117462, "grad_norm": 0.462890625, "learning_rate": 6.0892388451443576e-06, "loss": 0.9912, "step": 116 }, { "epoch": 0.09223492313756405, "grad_norm": 0.47265625, "learning_rate": 6.141732283464567e-06, "loss": 1.0265, "step": 117 }, { "epoch": 0.09302325581395349, "grad_norm": 0.4375, "learning_rate": 6.194225721784778e-06, "loss": 1.0739, "step": 118 }, { "epoch": 0.09381158849034292, "grad_norm": 0.474609375, "learning_rate": 6.246719160104987e-06, "loss": 1.0317, "step": 119 }, { "epoch": 0.09459992116673237, "grad_norm": 0.46484375, "learning_rate": 6.299212598425197e-06, "loss": 1.0289, "step": 120 }, { "epoch": 0.0953882538431218, "grad_norm": 0.4140625, "learning_rate": 6.351706036745408e-06, "loss": 1.0246, "step": 121 }, { "epoch": 0.09617658651951123, "grad_norm": 0.49609375, "learning_rate": 6.4041994750656174e-06, "loss": 1.0441, "step": 122 }, { "epoch": 0.09696491919590067, "grad_norm": 0.46875, "learning_rate": 6.456692913385827e-06, "loss": 1.0159, "step": 123 }, { "epoch": 0.0977532518722901, "grad_norm": 0.435546875, "learning_rate": 6.509186351706037e-06, "loss": 1.0384, "step": 124 }, { "epoch": 0.09854158454867955, "grad_norm": 0.5, "learning_rate": 6.561679790026248e-06, "loss": 1.0444, "step": 125 }, { "epoch": 0.09932991722506898, "grad_norm": 0.494140625, "learning_rate": 6.614173228346458e-06, "loss": 1.0304, "step": 126 }, { "epoch": 0.10011824990145841, "grad_norm": 0.447265625, "learning_rate": 6.666666666666667e-06, "loss": 1.0294, "step": 127 }, { "epoch": 0.10090658257784785, "grad_norm": 0.462890625, "learning_rate": 6.719160104986877e-06, "loss": 1.0412, "step": 128 }, { "epoch": 0.1016949152542373, "grad_norm": 0.515625, "learning_rate": 6.771653543307087e-06, "loss": 1.0679, "step": 129 }, { "epoch": 0.10248324793062673, "grad_norm": 0.4765625, "learning_rate": 6.824146981627298e-06, "loss": 1.0195, "step": 130 }, { "epoch": 0.10327158060701616, "grad_norm": 0.53125, "learning_rate": 6.876640419947507e-06, "loss": 1.0505, "step": 131 }, { "epoch": 0.1040599132834056, "grad_norm": 0.44921875, "learning_rate": 6.929133858267717e-06, "loss": 1.0319, "step": 132 }, { "epoch": 0.10484824595979503, "grad_norm": 0.412109375, "learning_rate": 6.981627296587927e-06, "loss": 1.0406, "step": 133 }, { "epoch": 0.10563657863618447, "grad_norm": 0.4140625, "learning_rate": 7.034120734908137e-06, "loss": 1.0565, "step": 134 }, { "epoch": 0.10642491131257391, "grad_norm": 0.4453125, "learning_rate": 7.086614173228347e-06, "loss": 1.0515, "step": 135 }, { "epoch": 0.10721324398896334, "grad_norm": 0.404296875, "learning_rate": 7.139107611548557e-06, "loss": 1.0038, "step": 136 }, { "epoch": 0.10800157666535277, "grad_norm": 0.400390625, "learning_rate": 7.191601049868768e-06, "loss": 1.0236, "step": 137 }, { "epoch": 0.10878990934174222, "grad_norm": 0.42578125, "learning_rate": 7.2440944881889774e-06, "loss": 1.0258, "step": 138 }, { "epoch": 0.10957824201813166, "grad_norm": 0.404296875, "learning_rate": 7.2965879265091864e-06, "loss": 1.0438, "step": 139 }, { "epoch": 0.11036657469452109, "grad_norm": 0.408203125, "learning_rate": 7.349081364829397e-06, "loss": 1.0076, "step": 140 }, { "epoch": 0.11115490737091052, "grad_norm": 0.46484375, "learning_rate": 7.401574803149607e-06, "loss": 1.062, "step": 141 }, { "epoch": 0.11194324004729996, "grad_norm": 0.40625, "learning_rate": 7.454068241469818e-06, "loss": 1.0203, "step": 142 }, { "epoch": 0.1127315727236894, "grad_norm": 0.421875, "learning_rate": 7.506561679790027e-06, "loss": 1.0061, "step": 143 }, { "epoch": 0.11351990540007884, "grad_norm": 0.44921875, "learning_rate": 7.5590551181102365e-06, "loss": 1.0477, "step": 144 }, { "epoch": 0.11430823807646827, "grad_norm": 0.40234375, "learning_rate": 7.611548556430447e-06, "loss": 1.0303, "step": 145 }, { "epoch": 0.1150965707528577, "grad_norm": 0.455078125, "learning_rate": 7.664041994750657e-06, "loss": 0.9997, "step": 146 }, { "epoch": 0.11588490342924715, "grad_norm": 0.419921875, "learning_rate": 7.716535433070867e-06, "loss": 1.0597, "step": 147 }, { "epoch": 0.11667323610563658, "grad_norm": 0.421875, "learning_rate": 7.769028871391077e-06, "loss": 1.0717, "step": 148 }, { "epoch": 0.11746156878202602, "grad_norm": 0.4921875, "learning_rate": 7.821522309711287e-06, "loss": 1.0063, "step": 149 }, { "epoch": 0.11824990145841545, "grad_norm": 0.419921875, "learning_rate": 7.874015748031496e-06, "loss": 1.0444, "step": 150 }, { "epoch": 0.11903823413480488, "grad_norm": 0.42578125, "learning_rate": 7.926509186351706e-06, "loss": 1.0361, "step": 151 }, { "epoch": 0.11982656681119433, "grad_norm": 0.4453125, "learning_rate": 7.979002624671916e-06, "loss": 1.034, "step": 152 }, { "epoch": 0.12061489948758376, "grad_norm": 0.408203125, "learning_rate": 8.031496062992128e-06, "loss": 1.036, "step": 153 }, { "epoch": 0.1214032321639732, "grad_norm": 0.404296875, "learning_rate": 8.083989501312337e-06, "loss": 1.0458, "step": 154 }, { "epoch": 0.12219156484036263, "grad_norm": 0.404296875, "learning_rate": 8.136482939632546e-06, "loss": 0.9731, "step": 155 }, { "epoch": 0.12297989751675206, "grad_norm": 0.40234375, "learning_rate": 8.188976377952757e-06, "loss": 1.0208, "step": 156 }, { "epoch": 0.12376823019314151, "grad_norm": 0.427734375, "learning_rate": 8.241469816272967e-06, "loss": 1.0294, "step": 157 }, { "epoch": 0.12455656286953094, "grad_norm": 0.455078125, "learning_rate": 8.293963254593177e-06, "loss": 1.0406, "step": 158 }, { "epoch": 0.12534489554592038, "grad_norm": 0.439453125, "learning_rate": 8.346456692913387e-06, "loss": 1.0032, "step": 159 }, { "epoch": 0.12613322822230982, "grad_norm": 0.466796875, "learning_rate": 8.398950131233596e-06, "loss": 1.0163, "step": 160 }, { "epoch": 0.12692156089869924, "grad_norm": 0.380859375, "learning_rate": 8.451443569553806e-06, "loss": 1.0421, "step": 161 }, { "epoch": 0.1277098935750887, "grad_norm": 0.3984375, "learning_rate": 8.503937007874016e-06, "loss": 0.9828, "step": 162 }, { "epoch": 0.12849822625147814, "grad_norm": 0.46875, "learning_rate": 8.556430446194226e-06, "loss": 1.0062, "step": 163 }, { "epoch": 0.12928655892786756, "grad_norm": 0.498046875, "learning_rate": 8.608923884514436e-06, "loss": 0.9982, "step": 164 }, { "epoch": 0.130074891604257, "grad_norm": 0.41015625, "learning_rate": 8.661417322834647e-06, "loss": 0.9824, "step": 165 }, { "epoch": 0.13086322428064642, "grad_norm": 0.396484375, "learning_rate": 8.713910761154857e-06, "loss": 1.0277, "step": 166 }, { "epoch": 0.13165155695703587, "grad_norm": 0.4453125, "learning_rate": 8.766404199475065e-06, "loss": 1.0119, "step": 167 }, { "epoch": 0.13243988963342532, "grad_norm": 0.384765625, "learning_rate": 8.818897637795277e-06, "loss": 1.0193, "step": 168 }, { "epoch": 0.13322822230981474, "grad_norm": 0.435546875, "learning_rate": 8.871391076115487e-06, "loss": 0.9782, "step": 169 }, { "epoch": 0.13401655498620418, "grad_norm": 0.4140625, "learning_rate": 8.923884514435697e-06, "loss": 1.0144, "step": 170 }, { "epoch": 0.1348048876625936, "grad_norm": 0.474609375, "learning_rate": 8.976377952755906e-06, "loss": 1.0344, "step": 171 }, { "epoch": 0.13559322033898305, "grad_norm": 0.431640625, "learning_rate": 9.028871391076116e-06, "loss": 1.0592, "step": 172 }, { "epoch": 0.1363815530153725, "grad_norm": 0.4453125, "learning_rate": 9.081364829396326e-06, "loss": 1.0221, "step": 173 }, { "epoch": 0.13716988569176192, "grad_norm": 0.42578125, "learning_rate": 9.133858267716536e-06, "loss": 1.017, "step": 174 }, { "epoch": 0.13795821836815136, "grad_norm": 0.41015625, "learning_rate": 9.186351706036746e-06, "loss": 1.0527, "step": 175 }, { "epoch": 0.13874655104454078, "grad_norm": 0.419921875, "learning_rate": 9.238845144356956e-06, "loss": 1.0462, "step": 176 }, { "epoch": 0.13953488372093023, "grad_norm": 0.421875, "learning_rate": 9.291338582677165e-06, "loss": 1.0399, "step": 177 }, { "epoch": 0.14032321639731968, "grad_norm": 0.4375, "learning_rate": 9.343832020997377e-06, "loss": 0.9774, "step": 178 }, { "epoch": 0.1411115490737091, "grad_norm": 0.380859375, "learning_rate": 9.396325459317585e-06, "loss": 0.9792, "step": 179 }, { "epoch": 0.14189988175009854, "grad_norm": 0.421875, "learning_rate": 9.448818897637797e-06, "loss": 1.0209, "step": 180 }, { "epoch": 0.142688214426488, "grad_norm": 0.40234375, "learning_rate": 9.501312335958006e-06, "loss": 1.0291, "step": 181 }, { "epoch": 0.1434765471028774, "grad_norm": 0.419921875, "learning_rate": 9.553805774278216e-06, "loss": 1.0026, "step": 182 }, { "epoch": 0.14426487977926686, "grad_norm": 0.50390625, "learning_rate": 9.606299212598426e-06, "loss": 0.9988, "step": 183 }, { "epoch": 0.14505321245565628, "grad_norm": 0.400390625, "learning_rate": 9.658792650918636e-06, "loss": 1.0169, "step": 184 }, { "epoch": 0.14584154513204572, "grad_norm": 0.416015625, "learning_rate": 9.711286089238846e-06, "loss": 1.0076, "step": 185 }, { "epoch": 0.14662987780843517, "grad_norm": 0.42578125, "learning_rate": 9.763779527559056e-06, "loss": 1.0571, "step": 186 }, { "epoch": 0.1474182104848246, "grad_norm": 0.4921875, "learning_rate": 9.816272965879266e-06, "loss": 0.9648, "step": 187 }, { "epoch": 0.14820654316121404, "grad_norm": 0.4375, "learning_rate": 9.868766404199475e-06, "loss": 1.0119, "step": 188 }, { "epoch": 0.14899487583760346, "grad_norm": 0.443359375, "learning_rate": 9.921259842519685e-06, "loss": 0.9939, "step": 189 }, { "epoch": 0.1497832085139929, "grad_norm": 0.466796875, "learning_rate": 9.973753280839897e-06, "loss": 1.0267, "step": 190 }, { "epoch": 0.15057154119038235, "grad_norm": 0.40234375, "learning_rate": 1.0026246719160107e-05, "loss": 0.9898, "step": 191 }, { "epoch": 0.15135987386677177, "grad_norm": 0.443359375, "learning_rate": 1.0078740157480316e-05, "loss": 1.0504, "step": 192 }, { "epoch": 0.15214820654316122, "grad_norm": 0.400390625, "learning_rate": 1.0131233595800525e-05, "loss": 1.0126, "step": 193 }, { "epoch": 0.15293653921955064, "grad_norm": 0.392578125, "learning_rate": 1.0183727034120736e-05, "loss": 1.0576, "step": 194 }, { "epoch": 0.15372487189594009, "grad_norm": 0.462890625, "learning_rate": 1.0236220472440946e-05, "loss": 0.9992, "step": 195 }, { "epoch": 0.15451320457232953, "grad_norm": 0.4296875, "learning_rate": 1.0288713910761157e-05, "loss": 1.0325, "step": 196 }, { "epoch": 0.15530153724871895, "grad_norm": 0.43359375, "learning_rate": 1.0341207349081366e-05, "loss": 0.9883, "step": 197 }, { "epoch": 0.1560898699251084, "grad_norm": 0.41015625, "learning_rate": 1.0393700787401575e-05, "loss": 1.0054, "step": 198 }, { "epoch": 0.15687820260149785, "grad_norm": 0.404296875, "learning_rate": 1.0446194225721787e-05, "loss": 0.9867, "step": 199 }, { "epoch": 0.15766653527788727, "grad_norm": 0.470703125, "learning_rate": 1.0498687664041995e-05, "loss": 1.0184, "step": 200 }, { "epoch": 0.1584548679542767, "grad_norm": 0.41796875, "learning_rate": 1.0551181102362205e-05, "loss": 1.0285, "step": 201 }, { "epoch": 0.15924320063066613, "grad_norm": 0.3984375, "learning_rate": 1.0603674540682417e-05, "loss": 1.0003, "step": 202 }, { "epoch": 0.16003153330705558, "grad_norm": 0.400390625, "learning_rate": 1.0656167979002625e-05, "loss": 1.0113, "step": 203 }, { "epoch": 0.16081986598344503, "grad_norm": 0.40234375, "learning_rate": 1.0708661417322836e-05, "loss": 1.0439, "step": 204 }, { "epoch": 0.16160819865983445, "grad_norm": 0.376953125, "learning_rate": 1.0761154855643046e-05, "loss": 0.9971, "step": 205 }, { "epoch": 0.1623965313362239, "grad_norm": 0.404296875, "learning_rate": 1.0813648293963254e-05, "loss": 0.9851, "step": 206 }, { "epoch": 0.1631848640126133, "grad_norm": 0.423828125, "learning_rate": 1.0866141732283466e-05, "loss": 1.0168, "step": 207 }, { "epoch": 0.16397319668900276, "grad_norm": 0.453125, "learning_rate": 1.0918635170603676e-05, "loss": 1.0231, "step": 208 }, { "epoch": 0.1647615293653922, "grad_norm": 0.40625, "learning_rate": 1.0971128608923884e-05, "loss": 1.0015, "step": 209 }, { "epoch": 0.16554986204178163, "grad_norm": 0.427734375, "learning_rate": 1.1023622047244095e-05, "loss": 0.9867, "step": 210 }, { "epoch": 0.16633819471817107, "grad_norm": 0.447265625, "learning_rate": 1.1076115485564305e-05, "loss": 1.0285, "step": 211 }, { "epoch": 0.1671265273945605, "grad_norm": 0.384765625, "learning_rate": 1.1128608923884517e-05, "loss": 1.0083, "step": 212 }, { "epoch": 0.16791486007094994, "grad_norm": 0.46875, "learning_rate": 1.1181102362204725e-05, "loss": 0.9951, "step": 213 }, { "epoch": 0.1687031927473394, "grad_norm": 0.404296875, "learning_rate": 1.1233595800524935e-05, "loss": 1.0157, "step": 214 }, { "epoch": 0.1694915254237288, "grad_norm": 0.3984375, "learning_rate": 1.1286089238845146e-05, "loss": 1.0336, "step": 215 }, { "epoch": 0.17027985810011825, "grad_norm": 0.400390625, "learning_rate": 1.1338582677165354e-05, "loss": 1.0273, "step": 216 }, { "epoch": 0.17106819077650767, "grad_norm": 0.421875, "learning_rate": 1.1391076115485564e-05, "loss": 1.0069, "step": 217 }, { "epoch": 0.17185652345289712, "grad_norm": 0.419921875, "learning_rate": 1.1443569553805776e-05, "loss": 1.0406, "step": 218 }, { "epoch": 0.17264485612928657, "grad_norm": 0.400390625, "learning_rate": 1.1496062992125985e-05, "loss": 1.0294, "step": 219 }, { "epoch": 0.173433188805676, "grad_norm": 0.3984375, "learning_rate": 1.1548556430446195e-05, "loss": 1.0782, "step": 220 }, { "epoch": 0.17422152148206543, "grad_norm": 0.3828125, "learning_rate": 1.1601049868766405e-05, "loss": 1.0033, "step": 221 }, { "epoch": 0.17500985415845488, "grad_norm": 0.44921875, "learning_rate": 1.1653543307086615e-05, "loss": 0.9869, "step": 222 }, { "epoch": 0.1757981868348443, "grad_norm": 0.42578125, "learning_rate": 1.1706036745406827e-05, "loss": 0.9926, "step": 223 }, { "epoch": 0.17658651951123375, "grad_norm": 0.41015625, "learning_rate": 1.1758530183727035e-05, "loss": 1.0091, "step": 224 }, { "epoch": 0.17737485218762317, "grad_norm": 0.451171875, "learning_rate": 1.1811023622047245e-05, "loss": 1.0029, "step": 225 }, { "epoch": 0.1781631848640126, "grad_norm": 0.400390625, "learning_rate": 1.1863517060367456e-05, "loss": 1.021, "step": 226 }, { "epoch": 0.17895151754040206, "grad_norm": 0.408203125, "learning_rate": 1.1916010498687664e-05, "loss": 0.9986, "step": 227 }, { "epoch": 0.17973985021679148, "grad_norm": 0.443359375, "learning_rate": 1.1968503937007876e-05, "loss": 1.0043, "step": 228 }, { "epoch": 0.18052818289318093, "grad_norm": 0.4296875, "learning_rate": 1.2020997375328086e-05, "loss": 1.0149, "step": 229 }, { "epoch": 0.18131651556957035, "grad_norm": 0.41796875, "learning_rate": 1.2073490813648294e-05, "loss": 1.02, "step": 230 }, { "epoch": 0.1821048482459598, "grad_norm": 0.4296875, "learning_rate": 1.2125984251968505e-05, "loss": 1.0239, "step": 231 }, { "epoch": 0.18289318092234924, "grad_norm": 0.41796875, "learning_rate": 1.2178477690288715e-05, "loss": 1.0165, "step": 232 }, { "epoch": 0.18368151359873866, "grad_norm": 0.408203125, "learning_rate": 1.2230971128608923e-05, "loss": 1.0226, "step": 233 }, { "epoch": 0.1844698462751281, "grad_norm": 0.404296875, "learning_rate": 1.2283464566929135e-05, "loss": 1.0122, "step": 234 }, { "epoch": 0.18525817895151753, "grad_norm": 0.400390625, "learning_rate": 1.2335958005249345e-05, "loss": 1.0171, "step": 235 }, { "epoch": 0.18604651162790697, "grad_norm": 0.515625, "learning_rate": 1.2388451443569556e-05, "loss": 1.0222, "step": 236 }, { "epoch": 0.18683484430429642, "grad_norm": 0.376953125, "learning_rate": 1.2440944881889764e-05, "loss": 1.0187, "step": 237 }, { "epoch": 0.18762317698068584, "grad_norm": 0.435546875, "learning_rate": 1.2493438320209974e-05, "loss": 0.9899, "step": 238 }, { "epoch": 0.1884115096570753, "grad_norm": 0.38671875, "learning_rate": 1.2545931758530186e-05, "loss": 0.9659, "step": 239 }, { "epoch": 0.18919984233346474, "grad_norm": 0.4296875, "learning_rate": 1.2598425196850394e-05, "loss": 1.0132, "step": 240 }, { "epoch": 0.18998817500985415, "grad_norm": 0.400390625, "learning_rate": 1.2650918635170604e-05, "loss": 1.0605, "step": 241 }, { "epoch": 0.1907765076862436, "grad_norm": 0.546875, "learning_rate": 1.2703412073490815e-05, "loss": 0.9796, "step": 242 }, { "epoch": 0.19156484036263302, "grad_norm": 0.41015625, "learning_rate": 1.2755905511811025e-05, "loss": 1.0097, "step": 243 }, { "epoch": 0.19235317303902247, "grad_norm": 0.404296875, "learning_rate": 1.2808398950131235e-05, "loss": 0.9917, "step": 244 }, { "epoch": 0.19314150571541192, "grad_norm": 0.39453125, "learning_rate": 1.2860892388451445e-05, "loss": 0.9958, "step": 245 }, { "epoch": 0.19392983839180133, "grad_norm": 0.439453125, "learning_rate": 1.2913385826771655e-05, "loss": 1.012, "step": 246 }, { "epoch": 0.19471817106819078, "grad_norm": 0.39453125, "learning_rate": 1.2965879265091864e-05, "loss": 1.0232, "step": 247 }, { "epoch": 0.1955065037445802, "grad_norm": 0.435546875, "learning_rate": 1.3018372703412074e-05, "loss": 0.9983, "step": 248 }, { "epoch": 0.19629483642096965, "grad_norm": 0.4609375, "learning_rate": 1.3070866141732284e-05, "loss": 1.0027, "step": 249 }, { "epoch": 0.1970831690973591, "grad_norm": 0.4609375, "learning_rate": 1.3123359580052496e-05, "loss": 1.0297, "step": 250 }, { "epoch": 0.19787150177374851, "grad_norm": 0.443359375, "learning_rate": 1.3175853018372704e-05, "loss": 1.0626, "step": 251 }, { "epoch": 0.19865983445013796, "grad_norm": 0.453125, "learning_rate": 1.3228346456692915e-05, "loss": 1.0253, "step": 252 }, { "epoch": 0.19944816712652738, "grad_norm": 0.59375, "learning_rate": 1.3280839895013125e-05, "loss": 1.007, "step": 253 }, { "epoch": 0.20023649980291683, "grad_norm": 0.3828125, "learning_rate": 1.3333333333333333e-05, "loss": 0.9976, "step": 254 }, { "epoch": 0.20102483247930628, "grad_norm": 0.51171875, "learning_rate": 1.3385826771653545e-05, "loss": 1.0159, "step": 255 }, { "epoch": 0.2018131651556957, "grad_norm": 0.427734375, "learning_rate": 1.3438320209973755e-05, "loss": 0.9926, "step": 256 }, { "epoch": 0.20260149783208514, "grad_norm": 0.47265625, "learning_rate": 1.3490813648293963e-05, "loss": 1.0161, "step": 257 }, { "epoch": 0.2033898305084746, "grad_norm": 0.421875, "learning_rate": 1.3543307086614174e-05, "loss": 1.0056, "step": 258 }, { "epoch": 0.204178163184864, "grad_norm": 0.490234375, "learning_rate": 1.3595800524934384e-05, "loss": 1.001, "step": 259 }, { "epoch": 0.20496649586125346, "grad_norm": 0.458984375, "learning_rate": 1.3648293963254596e-05, "loss": 1.0386, "step": 260 }, { "epoch": 0.20575482853764288, "grad_norm": 0.40234375, "learning_rate": 1.3700787401574804e-05, "loss": 0.9631, "step": 261 }, { "epoch": 0.20654316121403232, "grad_norm": 0.412109375, "learning_rate": 1.3753280839895014e-05, "loss": 0.9699, "step": 262 }, { "epoch": 0.20733149389042177, "grad_norm": 0.375, "learning_rate": 1.3805774278215225e-05, "loss": 0.9761, "step": 263 }, { "epoch": 0.2081198265668112, "grad_norm": 0.40625, "learning_rate": 1.3858267716535433e-05, "loss": 0.9955, "step": 264 }, { "epoch": 0.20890815924320064, "grad_norm": 0.392578125, "learning_rate": 1.3910761154855643e-05, "loss": 1.0333, "step": 265 }, { "epoch": 0.20969649191959006, "grad_norm": 0.404296875, "learning_rate": 1.3963254593175855e-05, "loss": 1.0126, "step": 266 }, { "epoch": 0.2104848245959795, "grad_norm": 0.375, "learning_rate": 1.4015748031496063e-05, "loss": 1.0209, "step": 267 }, { "epoch": 0.21127315727236895, "grad_norm": 0.380859375, "learning_rate": 1.4068241469816274e-05, "loss": 1.0327, "step": 268 }, { "epoch": 0.21206148994875837, "grad_norm": 0.392578125, "learning_rate": 1.4120734908136484e-05, "loss": 0.9772, "step": 269 }, { "epoch": 0.21284982262514782, "grad_norm": 0.396484375, "learning_rate": 1.4173228346456694e-05, "loss": 0.9935, "step": 270 }, { "epoch": 0.21363815530153724, "grad_norm": 0.396484375, "learning_rate": 1.4225721784776904e-05, "loss": 1.0421, "step": 271 }, { "epoch": 0.21442648797792668, "grad_norm": 0.396484375, "learning_rate": 1.4278215223097114e-05, "loss": 1.0297, "step": 272 }, { "epoch": 0.21521482065431613, "grad_norm": 0.416015625, "learning_rate": 1.4330708661417324e-05, "loss": 0.9722, "step": 273 }, { "epoch": 0.21600315333070555, "grad_norm": 0.388671875, "learning_rate": 1.4383202099737535e-05, "loss": 1.01, "step": 274 }, { "epoch": 0.216791486007095, "grad_norm": 0.3828125, "learning_rate": 1.4435695538057743e-05, "loss": 0.9912, "step": 275 }, { "epoch": 0.21757981868348444, "grad_norm": 0.40625, "learning_rate": 1.4488188976377955e-05, "loss": 0.9993, "step": 276 }, { "epoch": 0.21836815135987386, "grad_norm": 0.51953125, "learning_rate": 1.4540682414698165e-05, "loss": 0.9692, "step": 277 }, { "epoch": 0.2191564840362633, "grad_norm": 0.4296875, "learning_rate": 1.4593175853018373e-05, "loss": 1.0384, "step": 278 }, { "epoch": 0.21994481671265273, "grad_norm": 0.384765625, "learning_rate": 1.4645669291338584e-05, "loss": 1.0104, "step": 279 }, { "epoch": 0.22073314938904218, "grad_norm": 0.421875, "learning_rate": 1.4698162729658794e-05, "loss": 1.0115, "step": 280 }, { "epoch": 0.22152148206543162, "grad_norm": 0.37109375, "learning_rate": 1.4750656167979002e-05, "loss": 1.0325, "step": 281 }, { "epoch": 0.22230981474182104, "grad_norm": 0.419921875, "learning_rate": 1.4803149606299214e-05, "loss": 0.9788, "step": 282 }, { "epoch": 0.2230981474182105, "grad_norm": 0.40234375, "learning_rate": 1.4855643044619424e-05, "loss": 0.9662, "step": 283 }, { "epoch": 0.2238864800945999, "grad_norm": 0.4453125, "learning_rate": 1.4908136482939635e-05, "loss": 1.0126, "step": 284 }, { "epoch": 0.22467481277098936, "grad_norm": 0.43359375, "learning_rate": 1.4960629921259843e-05, "loss": 1.0336, "step": 285 }, { "epoch": 0.2254631454473788, "grad_norm": 0.41796875, "learning_rate": 1.5013123359580053e-05, "loss": 1.0055, "step": 286 }, { "epoch": 0.22625147812376822, "grad_norm": 0.44921875, "learning_rate": 1.5065616797900265e-05, "loss": 0.9964, "step": 287 }, { "epoch": 0.22703981080015767, "grad_norm": 0.43359375, "learning_rate": 1.5118110236220473e-05, "loss": 0.9858, "step": 288 }, { "epoch": 0.2278281434765471, "grad_norm": 0.46875, "learning_rate": 1.5170603674540683e-05, "loss": 0.989, "step": 289 }, { "epoch": 0.22861647615293654, "grad_norm": 0.4453125, "learning_rate": 1.5223097112860894e-05, "loss": 0.9741, "step": 290 }, { "epoch": 0.22940480882932598, "grad_norm": 0.3984375, "learning_rate": 1.5275590551181102e-05, "loss": 0.9986, "step": 291 }, { "epoch": 0.2301931415057154, "grad_norm": 0.419921875, "learning_rate": 1.5328083989501314e-05, "loss": 0.9919, "step": 292 }, { "epoch": 0.23098147418210485, "grad_norm": 0.427734375, "learning_rate": 1.5380577427821522e-05, "loss": 0.981, "step": 293 }, { "epoch": 0.2317698068584943, "grad_norm": 0.44921875, "learning_rate": 1.5433070866141734e-05, "loss": 1.0194, "step": 294 }, { "epoch": 0.23255813953488372, "grad_norm": 0.4140625, "learning_rate": 1.5485564304461945e-05, "loss": 0.9942, "step": 295 }, { "epoch": 0.23334647221127316, "grad_norm": 0.4375, "learning_rate": 1.5538057742782153e-05, "loss": 1.0136, "step": 296 }, { "epoch": 0.23413480488766258, "grad_norm": 0.404296875, "learning_rate": 1.559055118110236e-05, "loss": 0.9982, "step": 297 }, { "epoch": 0.23492313756405203, "grad_norm": 0.43359375, "learning_rate": 1.5643044619422573e-05, "loss": 1.011, "step": 298 }, { "epoch": 0.23571147024044148, "grad_norm": 0.404296875, "learning_rate": 1.5695538057742785e-05, "loss": 0.9744, "step": 299 }, { "epoch": 0.2364998029168309, "grad_norm": 0.419921875, "learning_rate": 1.5748031496062993e-05, "loss": 0.9997, "step": 300 }, { "epoch": 0.23728813559322035, "grad_norm": 0.41015625, "learning_rate": 1.5800524934383204e-05, "loss": 0.9924, "step": 301 }, { "epoch": 0.23807646826960976, "grad_norm": 0.427734375, "learning_rate": 1.5853018372703412e-05, "loss": 0.9761, "step": 302 }, { "epoch": 0.2388648009459992, "grad_norm": 0.3828125, "learning_rate": 1.5905511811023624e-05, "loss": 1.0081, "step": 303 }, { "epoch": 0.23965313362238866, "grad_norm": 0.3984375, "learning_rate": 1.5958005249343832e-05, "loss": 1.0839, "step": 304 }, { "epoch": 0.24044146629877808, "grad_norm": 0.39453125, "learning_rate": 1.6010498687664044e-05, "loss": 1.0126, "step": 305 }, { "epoch": 0.24122979897516753, "grad_norm": 0.384765625, "learning_rate": 1.6062992125984255e-05, "loss": 0.9882, "step": 306 }, { "epoch": 0.24201813165155694, "grad_norm": 0.400390625, "learning_rate": 1.6115485564304463e-05, "loss": 1.0306, "step": 307 }, { "epoch": 0.2428064643279464, "grad_norm": 0.408203125, "learning_rate": 1.6167979002624675e-05, "loss": 1.0075, "step": 308 }, { "epoch": 0.24359479700433584, "grad_norm": 0.388671875, "learning_rate": 1.6220472440944883e-05, "loss": 1.0078, "step": 309 }, { "epoch": 0.24438312968072526, "grad_norm": 0.392578125, "learning_rate": 1.627296587926509e-05, "loss": 0.9944, "step": 310 }, { "epoch": 0.2451714623571147, "grad_norm": 0.41796875, "learning_rate": 1.6325459317585303e-05, "loss": 0.9811, "step": 311 }, { "epoch": 0.24595979503350412, "grad_norm": 0.390625, "learning_rate": 1.6377952755905514e-05, "loss": 0.9903, "step": 312 }, { "epoch": 0.24674812770989357, "grad_norm": 0.416015625, "learning_rate": 1.6430446194225722e-05, "loss": 0.9757, "step": 313 }, { "epoch": 0.24753646038628302, "grad_norm": 0.427734375, "learning_rate": 1.6482939632545934e-05, "loss": 0.9899, "step": 314 }, { "epoch": 0.24832479306267244, "grad_norm": 0.5078125, "learning_rate": 1.6535433070866142e-05, "loss": 1.0039, "step": 315 }, { "epoch": 0.24911312573906189, "grad_norm": 0.3828125, "learning_rate": 1.6587926509186354e-05, "loss": 0.9697, "step": 316 }, { "epoch": 0.24990145841545133, "grad_norm": 0.44140625, "learning_rate": 1.6640419947506562e-05, "loss": 1.0356, "step": 317 }, { "epoch": 0.25068979109184075, "grad_norm": 0.37890625, "learning_rate": 1.6692913385826773e-05, "loss": 0.9999, "step": 318 }, { "epoch": 0.2514781237682302, "grad_norm": 0.41796875, "learning_rate": 1.6745406824146985e-05, "loss": 0.983, "step": 319 }, { "epoch": 0.25226645644461965, "grad_norm": 0.4375, "learning_rate": 1.6797900262467193e-05, "loss": 1.0024, "step": 320 }, { "epoch": 0.25305478912100904, "grad_norm": 0.421875, "learning_rate": 1.68503937007874e-05, "loss": 1.0105, "step": 321 }, { "epoch": 0.2538431217973985, "grad_norm": 0.41796875, "learning_rate": 1.6902887139107613e-05, "loss": 0.9893, "step": 322 }, { "epoch": 0.25463145447378793, "grad_norm": 0.61328125, "learning_rate": 1.695538057742782e-05, "loss": 1.0008, "step": 323 }, { "epoch": 0.2554197871501774, "grad_norm": 0.376953125, "learning_rate": 1.7007874015748032e-05, "loss": 0.9703, "step": 324 }, { "epoch": 0.2562081198265668, "grad_norm": 0.41015625, "learning_rate": 1.7060367454068244e-05, "loss": 0.9707, "step": 325 }, { "epoch": 0.2569964525029563, "grad_norm": 0.4140625, "learning_rate": 1.7112860892388452e-05, "loss": 0.9699, "step": 326 }, { "epoch": 0.25778478517934567, "grad_norm": 0.388671875, "learning_rate": 1.7165354330708663e-05, "loss": 0.9962, "step": 327 }, { "epoch": 0.2585731178557351, "grad_norm": 0.4140625, "learning_rate": 1.721784776902887e-05, "loss": 0.9907, "step": 328 }, { "epoch": 0.25936145053212456, "grad_norm": 0.416015625, "learning_rate": 1.7270341207349083e-05, "loss": 0.997, "step": 329 }, { "epoch": 0.260149783208514, "grad_norm": 0.3828125, "learning_rate": 1.7322834645669295e-05, "loss": 1.016, "step": 330 }, { "epoch": 0.26093811588490345, "grad_norm": 0.369140625, "learning_rate": 1.7375328083989503e-05, "loss": 1.0139, "step": 331 }, { "epoch": 0.26172644856129285, "grad_norm": 0.388671875, "learning_rate": 1.7427821522309714e-05, "loss": 0.9988, "step": 332 }, { "epoch": 0.2625147812376823, "grad_norm": 0.384765625, "learning_rate": 1.7480314960629923e-05, "loss": 0.9918, "step": 333 }, { "epoch": 0.26330311391407174, "grad_norm": 0.4375, "learning_rate": 1.753280839895013e-05, "loss": 1.026, "step": 334 }, { "epoch": 0.2640914465904612, "grad_norm": 0.3828125, "learning_rate": 1.7585301837270342e-05, "loss": 1.0105, "step": 335 }, { "epoch": 0.26487977926685063, "grad_norm": 0.396484375, "learning_rate": 1.7637795275590554e-05, "loss": 0.9827, "step": 336 }, { "epoch": 0.26566811194324, "grad_norm": 0.40234375, "learning_rate": 1.7690288713910762e-05, "loss": 0.9552, "step": 337 }, { "epoch": 0.2664564446196295, "grad_norm": 0.380859375, "learning_rate": 1.7742782152230973e-05, "loss": 1.01, "step": 338 }, { "epoch": 0.2672447772960189, "grad_norm": 0.38671875, "learning_rate": 1.779527559055118e-05, "loss": 0.9779, "step": 339 }, { "epoch": 0.26803310997240837, "grad_norm": 0.369140625, "learning_rate": 1.7847769028871393e-05, "loss": 0.9594, "step": 340 }, { "epoch": 0.2688214426487978, "grad_norm": 0.44140625, "learning_rate": 1.79002624671916e-05, "loss": 0.9976, "step": 341 }, { "epoch": 0.2696097753251872, "grad_norm": 0.419921875, "learning_rate": 1.7952755905511813e-05, "loss": 0.9706, "step": 342 }, { "epoch": 0.27039810800157665, "grad_norm": 0.37109375, "learning_rate": 1.8005249343832024e-05, "loss": 1.005, "step": 343 }, { "epoch": 0.2711864406779661, "grad_norm": 0.40234375, "learning_rate": 1.8057742782152232e-05, "loss": 1.0057, "step": 344 }, { "epoch": 0.27197477335435555, "grad_norm": 0.375, "learning_rate": 1.811023622047244e-05, "loss": 0.9891, "step": 345 }, { "epoch": 0.272763106030745, "grad_norm": 0.412109375, "learning_rate": 1.8162729658792652e-05, "loss": 0.9833, "step": 346 }, { "epoch": 0.2735514387071344, "grad_norm": 0.373046875, "learning_rate": 1.821522309711286e-05, "loss": 0.9645, "step": 347 }, { "epoch": 0.27433977138352383, "grad_norm": 0.41015625, "learning_rate": 1.8267716535433072e-05, "loss": 0.9849, "step": 348 }, { "epoch": 0.2751281040599133, "grad_norm": 0.388671875, "learning_rate": 1.8320209973753283e-05, "loss": 1.0241, "step": 349 }, { "epoch": 0.27591643673630273, "grad_norm": 0.37109375, "learning_rate": 1.837270341207349e-05, "loss": 0.9945, "step": 350 }, { "epoch": 0.2767047694126922, "grad_norm": 0.396484375, "learning_rate": 1.8425196850393703e-05, "loss": 0.9844, "step": 351 }, { "epoch": 0.27749310208908157, "grad_norm": 0.3828125, "learning_rate": 1.847769028871391e-05, "loss": 1.0335, "step": 352 }, { "epoch": 0.278281434765471, "grad_norm": 0.384765625, "learning_rate": 1.8530183727034123e-05, "loss": 1.015, "step": 353 }, { "epoch": 0.27906976744186046, "grad_norm": 0.376953125, "learning_rate": 1.858267716535433e-05, "loss": 0.973, "step": 354 }, { "epoch": 0.2798581001182499, "grad_norm": 0.400390625, "learning_rate": 1.8635170603674542e-05, "loss": 1.0305, "step": 355 }, { "epoch": 0.28064643279463936, "grad_norm": 0.392578125, "learning_rate": 1.8687664041994754e-05, "loss": 1.036, "step": 356 }, { "epoch": 0.28143476547102875, "grad_norm": 0.453125, "learning_rate": 1.8740157480314962e-05, "loss": 1.0178, "step": 357 }, { "epoch": 0.2822230981474182, "grad_norm": 0.376953125, "learning_rate": 1.879265091863517e-05, "loss": 0.985, "step": 358 }, { "epoch": 0.28301143082380764, "grad_norm": 0.435546875, "learning_rate": 1.8845144356955382e-05, "loss": 0.942, "step": 359 }, { "epoch": 0.2837997635001971, "grad_norm": 0.390625, "learning_rate": 1.8897637795275593e-05, "loss": 1.0107, "step": 360 }, { "epoch": 0.28458809617658654, "grad_norm": 0.42578125, "learning_rate": 1.89501312335958e-05, "loss": 0.991, "step": 361 }, { "epoch": 0.285376428852976, "grad_norm": 0.380859375, "learning_rate": 1.9002624671916013e-05, "loss": 0.9533, "step": 362 }, { "epoch": 0.2861647615293654, "grad_norm": 0.41015625, "learning_rate": 1.905511811023622e-05, "loss": 1.0144, "step": 363 }, { "epoch": 0.2869530942057548, "grad_norm": 0.38671875, "learning_rate": 1.9107611548556433e-05, "loss": 1.013, "step": 364 }, { "epoch": 0.28774142688214427, "grad_norm": 0.447265625, "learning_rate": 1.916010498687664e-05, "loss": 0.9768, "step": 365 }, { "epoch": 0.2885297595585337, "grad_norm": 0.37890625, "learning_rate": 1.9212598425196852e-05, "loss": 0.9657, "step": 366 }, { "epoch": 0.28931809223492316, "grad_norm": 0.423828125, "learning_rate": 1.9265091863517064e-05, "loss": 1.0099, "step": 367 }, { "epoch": 0.29010642491131255, "grad_norm": 0.375, "learning_rate": 1.9317585301837272e-05, "loss": 0.9505, "step": 368 }, { "epoch": 0.290894757587702, "grad_norm": 0.41015625, "learning_rate": 1.937007874015748e-05, "loss": 0.9735, "step": 369 }, { "epoch": 0.29168309026409145, "grad_norm": 0.431640625, "learning_rate": 1.9422572178477692e-05, "loss": 1.0181, "step": 370 }, { "epoch": 0.2924714229404809, "grad_norm": 0.408203125, "learning_rate": 1.94750656167979e-05, "loss": 0.9697, "step": 371 }, { "epoch": 0.29325975561687034, "grad_norm": 0.38671875, "learning_rate": 1.952755905511811e-05, "loss": 0.9744, "step": 372 }, { "epoch": 0.29404808829325974, "grad_norm": 0.380859375, "learning_rate": 1.9580052493438323e-05, "loss": 0.972, "step": 373 }, { "epoch": 0.2948364209696492, "grad_norm": 0.435546875, "learning_rate": 1.963254593175853e-05, "loss": 0.9689, "step": 374 }, { "epoch": 0.29562475364603863, "grad_norm": 0.412109375, "learning_rate": 1.9685039370078743e-05, "loss": 0.9573, "step": 375 }, { "epoch": 0.2964130863224281, "grad_norm": 0.412109375, "learning_rate": 1.973753280839895e-05, "loss": 0.9949, "step": 376 }, { "epoch": 0.2972014189988175, "grad_norm": 0.390625, "learning_rate": 1.9790026246719162e-05, "loss": 0.973, "step": 377 }, { "epoch": 0.2979897516752069, "grad_norm": 0.37890625, "learning_rate": 1.984251968503937e-05, "loss": 0.953, "step": 378 }, { "epoch": 0.29877808435159636, "grad_norm": 0.478515625, "learning_rate": 1.9895013123359582e-05, "loss": 0.9717, "step": 379 }, { "epoch": 0.2995664170279858, "grad_norm": 0.40625, "learning_rate": 1.9947506561679793e-05, "loss": 1.0051, "step": 380 }, { "epoch": 0.30035474970437526, "grad_norm": 0.388671875, "learning_rate": 2e-05, "loss": 0.9827, "step": 381 }, { "epoch": 0.3011430823807647, "grad_norm": 0.388671875, "learning_rate": 1.9999995788314622e-05, "loss": 0.9754, "step": 382 }, { "epoch": 0.3019314150571541, "grad_norm": 0.380859375, "learning_rate": 1.9999983153262038e-05, "loss": 0.9996, "step": 383 }, { "epoch": 0.30271974773354354, "grad_norm": 0.412109375, "learning_rate": 1.999996209485289e-05, "loss": 0.999, "step": 384 }, { "epoch": 0.303508080409933, "grad_norm": 0.40625, "learning_rate": 1.999993261310491e-05, "loss": 0.9564, "step": 385 }, { "epoch": 0.30429641308632244, "grad_norm": 0.384765625, "learning_rate": 1.9999894708042943e-05, "loss": 0.9813, "step": 386 }, { "epoch": 0.3050847457627119, "grad_norm": 0.4140625, "learning_rate": 1.9999848379698906e-05, "loss": 0.9717, "step": 387 }, { "epoch": 0.3058730784391013, "grad_norm": 0.392578125, "learning_rate": 1.9999793628111833e-05, "loss": 1.0014, "step": 388 }, { "epoch": 0.3066614111154907, "grad_norm": 0.404296875, "learning_rate": 1.999973045332784e-05, "loss": 0.9726, "step": 389 }, { "epoch": 0.30744974379188017, "grad_norm": 0.376953125, "learning_rate": 1.9999658855400135e-05, "loss": 0.9922, "step": 390 }, { "epoch": 0.3082380764682696, "grad_norm": 0.369140625, "learning_rate": 1.9999578834389036e-05, "loss": 0.9586, "step": 391 }, { "epoch": 0.30902640914465906, "grad_norm": 0.390625, "learning_rate": 1.9999490390361947e-05, "loss": 0.9863, "step": 392 }, { "epoch": 0.30981474182104846, "grad_norm": 0.38671875, "learning_rate": 1.9999393523393365e-05, "loss": 0.9847, "step": 393 }, { "epoch": 0.3106030744974379, "grad_norm": 0.388671875, "learning_rate": 1.999928823356488e-05, "loss": 0.9776, "step": 394 }, { "epoch": 0.31139140717382735, "grad_norm": 0.388671875, "learning_rate": 1.9999174520965194e-05, "loss": 0.9786, "step": 395 }, { "epoch": 0.3121797398502168, "grad_norm": 0.369140625, "learning_rate": 1.999905238569008e-05, "loss": 0.9938, "step": 396 }, { "epoch": 0.31296807252660624, "grad_norm": 0.408203125, "learning_rate": 1.9998921827842423e-05, "loss": 0.9831, "step": 397 }, { "epoch": 0.3137564052029957, "grad_norm": 0.373046875, "learning_rate": 1.9998782847532195e-05, "loss": 1.0147, "step": 398 }, { "epoch": 0.3145447378793851, "grad_norm": 0.3828125, "learning_rate": 1.999863544487646e-05, "loss": 0.9815, "step": 399 }, { "epoch": 0.31533307055577453, "grad_norm": 0.37109375, "learning_rate": 1.999847961999939e-05, "loss": 0.9931, "step": 400 }, { "epoch": 0.316121403232164, "grad_norm": 0.361328125, "learning_rate": 1.9998315373032238e-05, "loss": 1.0079, "step": 401 }, { "epoch": 0.3169097359085534, "grad_norm": 0.39453125, "learning_rate": 1.999814270411335e-05, "loss": 1.0005, "step": 402 }, { "epoch": 0.31769806858494287, "grad_norm": 0.373046875, "learning_rate": 1.9997961613388176e-05, "loss": 0.9769, "step": 403 }, { "epoch": 0.31848640126133226, "grad_norm": 0.359375, "learning_rate": 1.9997772101009255e-05, "loss": 0.9722, "step": 404 }, { "epoch": 0.3192747339377217, "grad_norm": 0.388671875, "learning_rate": 1.9997574167136225e-05, "loss": 0.9898, "step": 405 }, { "epoch": 0.32006306661411116, "grad_norm": 0.40234375, "learning_rate": 1.9997367811935807e-05, "loss": 0.9997, "step": 406 }, { "epoch": 0.3208513992905006, "grad_norm": 0.396484375, "learning_rate": 1.999715303558182e-05, "loss": 0.9456, "step": 407 }, { "epoch": 0.32163973196689005, "grad_norm": 0.40625, "learning_rate": 1.999692983825518e-05, "loss": 1.012, "step": 408 }, { "epoch": 0.32242806464327944, "grad_norm": 0.37890625, "learning_rate": 1.99966982201439e-05, "loss": 0.9616, "step": 409 }, { "epoch": 0.3232163973196689, "grad_norm": 0.384765625, "learning_rate": 1.9996458181443072e-05, "loss": 1.0329, "step": 410 }, { "epoch": 0.32400472999605834, "grad_norm": 0.3671875, "learning_rate": 1.9996209722354896e-05, "loss": 0.9998, "step": 411 }, { "epoch": 0.3247930626724478, "grad_norm": 0.39453125, "learning_rate": 1.999595284308866e-05, "loss": 0.96, "step": 412 }, { "epoch": 0.32558139534883723, "grad_norm": 0.392578125, "learning_rate": 1.999568754386073e-05, "loss": 1.0168, "step": 413 }, { "epoch": 0.3263697280252266, "grad_norm": 0.361328125, "learning_rate": 1.9995413824894593e-05, "loss": 1.0132, "step": 414 }, { "epoch": 0.32715806070161607, "grad_norm": 0.3828125, "learning_rate": 1.9995131686420802e-05, "loss": 0.9727, "step": 415 }, { "epoch": 0.3279463933780055, "grad_norm": 0.3828125, "learning_rate": 1.999484112867702e-05, "loss": 0.946, "step": 416 }, { "epoch": 0.32873472605439497, "grad_norm": 0.365234375, "learning_rate": 1.9994542151907988e-05, "loss": 1.0183, "step": 417 }, { "epoch": 0.3295230587307844, "grad_norm": 0.40234375, "learning_rate": 1.9994234756365547e-05, "loss": 1.0107, "step": 418 }, { "epoch": 0.3303113914071738, "grad_norm": 0.380859375, "learning_rate": 1.999391894230863e-05, "loss": 0.9934, "step": 419 }, { "epoch": 0.33109972408356325, "grad_norm": 0.36328125, "learning_rate": 1.9993594710003262e-05, "loss": 1.0047, "step": 420 }, { "epoch": 0.3318880567599527, "grad_norm": 0.37890625, "learning_rate": 1.9993262059722548e-05, "loss": 1.0121, "step": 421 }, { "epoch": 0.33267638943634215, "grad_norm": 0.380859375, "learning_rate": 1.9992920991746694e-05, "loss": 0.9771, "step": 422 }, { "epoch": 0.3334647221127316, "grad_norm": 0.392578125, "learning_rate": 1.9992571506362997e-05, "loss": 0.9638, "step": 423 }, { "epoch": 0.334253054789121, "grad_norm": 0.365234375, "learning_rate": 1.999221360386584e-05, "loss": 0.9973, "step": 424 }, { "epoch": 0.33504138746551043, "grad_norm": 0.36328125, "learning_rate": 1.9991847284556703e-05, "loss": 0.9643, "step": 425 }, { "epoch": 0.3358297201418999, "grad_norm": 0.38671875, "learning_rate": 1.999147254874414e-05, "loss": 1.0144, "step": 426 }, { "epoch": 0.3366180528182893, "grad_norm": 0.369140625, "learning_rate": 1.9991089396743808e-05, "loss": 0.982, "step": 427 }, { "epoch": 0.3374063854946788, "grad_norm": 0.37109375, "learning_rate": 1.9990697828878453e-05, "loss": 0.9939, "step": 428 }, { "epoch": 0.33819471817106816, "grad_norm": 0.375, "learning_rate": 1.999029784547791e-05, "loss": 0.9675, "step": 429 }, { "epoch": 0.3389830508474576, "grad_norm": 0.408203125, "learning_rate": 1.9989889446879092e-05, "loss": 0.9893, "step": 430 }, { "epoch": 0.33977138352384706, "grad_norm": 0.373046875, "learning_rate": 1.9989472633426016e-05, "loss": 0.9634, "step": 431 }, { "epoch": 0.3405597162002365, "grad_norm": 0.439453125, "learning_rate": 1.9989047405469772e-05, "loss": 0.9818, "step": 432 }, { "epoch": 0.34134804887662595, "grad_norm": 0.373046875, "learning_rate": 1.9988613763368548e-05, "loss": 0.9545, "step": 433 }, { "epoch": 0.34213638155301535, "grad_norm": 0.3828125, "learning_rate": 1.9988171707487624e-05, "loss": 0.9891, "step": 434 }, { "epoch": 0.3429247142294048, "grad_norm": 0.365234375, "learning_rate": 1.9987721238199345e-05, "loss": 0.9812, "step": 435 }, { "epoch": 0.34371304690579424, "grad_norm": 0.3671875, "learning_rate": 1.9987262355883173e-05, "loss": 0.9598, "step": 436 }, { "epoch": 0.3445013795821837, "grad_norm": 0.375, "learning_rate": 1.9986795060925636e-05, "loss": 1.0044, "step": 437 }, { "epoch": 0.34528971225857313, "grad_norm": 0.373046875, "learning_rate": 1.9986319353720353e-05, "loss": 0.9795, "step": 438 }, { "epoch": 0.3460780449349626, "grad_norm": 0.384765625, "learning_rate": 1.9985835234668025e-05, "loss": 1.0037, "step": 439 }, { "epoch": 0.346866377611352, "grad_norm": 0.396484375, "learning_rate": 1.998534270417645e-05, "loss": 1.0155, "step": 440 }, { "epoch": 0.3476547102877414, "grad_norm": 0.365234375, "learning_rate": 1.9984841762660508e-05, "loss": 0.9587, "step": 441 }, { "epoch": 0.34844304296413087, "grad_norm": 0.365234375, "learning_rate": 1.9984332410542153e-05, "loss": 0.9546, "step": 442 }, { "epoch": 0.3492313756405203, "grad_norm": 0.462890625, "learning_rate": 1.9983814648250434e-05, "loss": 0.9695, "step": 443 }, { "epoch": 0.35001970831690976, "grad_norm": 0.38671875, "learning_rate": 1.9983288476221482e-05, "loss": 0.9875, "step": 444 }, { "epoch": 0.35080804099329915, "grad_norm": 0.38671875, "learning_rate": 1.9982753894898507e-05, "loss": 0.9634, "step": 445 }, { "epoch": 0.3515963736696886, "grad_norm": 0.36328125, "learning_rate": 1.9982210904731812e-05, "loss": 0.9962, "step": 446 }, { "epoch": 0.35238470634607805, "grad_norm": 0.400390625, "learning_rate": 1.9981659506178778e-05, "loss": 0.9588, "step": 447 }, { "epoch": 0.3531730390224675, "grad_norm": 0.380859375, "learning_rate": 1.9981099699703866e-05, "loss": 0.966, "step": 448 }, { "epoch": 0.35396137169885694, "grad_norm": 0.365234375, "learning_rate": 1.9980531485778624e-05, "loss": 0.9618, "step": 449 }, { "epoch": 0.35474970437524633, "grad_norm": 0.37890625, "learning_rate": 1.9979954864881672e-05, "loss": 0.9924, "step": 450 }, { "epoch": 0.3555380370516358, "grad_norm": 0.37890625, "learning_rate": 1.997936983749873e-05, "loss": 1.022, "step": 451 }, { "epoch": 0.3563263697280252, "grad_norm": 0.37890625, "learning_rate": 1.997877640412258e-05, "loss": 0.9671, "step": 452 }, { "epoch": 0.3571147024044147, "grad_norm": 0.369140625, "learning_rate": 1.9978174565253096e-05, "loss": 1.0056, "step": 453 }, { "epoch": 0.3579030350808041, "grad_norm": 0.37890625, "learning_rate": 1.9977564321397234e-05, "loss": 0.9771, "step": 454 }, { "epoch": 0.3586913677571935, "grad_norm": 0.361328125, "learning_rate": 1.9976945673069017e-05, "loss": 1.0012, "step": 455 }, { "epoch": 0.35947970043358296, "grad_norm": 0.37109375, "learning_rate": 1.997631862078956e-05, "loss": 0.9849, "step": 456 }, { "epoch": 0.3602680331099724, "grad_norm": 0.451171875, "learning_rate": 1.997568316508705e-05, "loss": 1.0087, "step": 457 }, { "epoch": 0.36105636578636185, "grad_norm": 0.365234375, "learning_rate": 1.997503930649676e-05, "loss": 0.9772, "step": 458 }, { "epoch": 0.3618446984627513, "grad_norm": 0.3671875, "learning_rate": 1.9974387045561022e-05, "loss": 0.9777, "step": 459 }, { "epoch": 0.3626330311391407, "grad_norm": 0.37109375, "learning_rate": 1.997372638282928e-05, "loss": 1.0056, "step": 460 }, { "epoch": 0.36342136381553014, "grad_norm": 0.37109375, "learning_rate": 1.997305731885802e-05, "loss": 0.9777, "step": 461 }, { "epoch": 0.3642096964919196, "grad_norm": 0.3671875, "learning_rate": 1.9972379854210824e-05, "loss": 0.9615, "step": 462 }, { "epoch": 0.36499802916830903, "grad_norm": 0.3515625, "learning_rate": 1.9971693989458347e-05, "loss": 0.9739, "step": 463 }, { "epoch": 0.3657863618446985, "grad_norm": 0.466796875, "learning_rate": 1.9970999725178313e-05, "loss": 0.9744, "step": 464 }, { "epoch": 0.3665746945210879, "grad_norm": 0.404296875, "learning_rate": 1.9970297061955533e-05, "loss": 1.0397, "step": 465 }, { "epoch": 0.3673630271974773, "grad_norm": 0.365234375, "learning_rate": 1.9969586000381884e-05, "loss": 0.9941, "step": 466 }, { "epoch": 0.36815135987386677, "grad_norm": 0.375, "learning_rate": 1.9968866541056317e-05, "loss": 1.0239, "step": 467 }, { "epoch": 0.3689396925502562, "grad_norm": 0.35546875, "learning_rate": 1.9968138684584862e-05, "loss": 0.9818, "step": 468 }, { "epoch": 0.36972802522664566, "grad_norm": 0.359375, "learning_rate": 1.996740243158062e-05, "loss": 0.95, "step": 469 }, { "epoch": 0.37051635790303505, "grad_norm": 0.39453125, "learning_rate": 1.996665778266376e-05, "loss": 0.9997, "step": 470 }, { "epoch": 0.3713046905794245, "grad_norm": 0.3828125, "learning_rate": 1.9965904738461534e-05, "loss": 0.9859, "step": 471 }, { "epoch": 0.37209302325581395, "grad_norm": 0.388671875, "learning_rate": 1.9965143299608253e-05, "loss": 0.9789, "step": 472 }, { "epoch": 0.3728813559322034, "grad_norm": 0.361328125, "learning_rate": 1.996437346674531e-05, "loss": 1.0048, "step": 473 }, { "epoch": 0.37366968860859284, "grad_norm": 0.3828125, "learning_rate": 1.9963595240521158e-05, "loss": 0.9505, "step": 474 }, { "epoch": 0.3744580212849823, "grad_norm": 0.376953125, "learning_rate": 1.9962808621591334e-05, "loss": 1.0234, "step": 475 }, { "epoch": 0.3752463539613717, "grad_norm": 0.373046875, "learning_rate": 1.9962013610618423e-05, "loss": 1.035, "step": 476 }, { "epoch": 0.37603468663776113, "grad_norm": 0.361328125, "learning_rate": 1.996121020827211e-05, "loss": 1.0042, "step": 477 }, { "epoch": 0.3768230193141506, "grad_norm": 0.3828125, "learning_rate": 1.9960398415229114e-05, "loss": 1.0116, "step": 478 }, { "epoch": 0.37761135199054, "grad_norm": 0.365234375, "learning_rate": 1.995957823217325e-05, "loss": 0.9948, "step": 479 }, { "epoch": 0.37839968466692947, "grad_norm": 0.373046875, "learning_rate": 1.9958749659795382e-05, "loss": 1.0004, "step": 480 }, { "epoch": 0.37918801734331886, "grad_norm": 0.37109375, "learning_rate": 1.995791269879345e-05, "loss": 1.0098, "step": 481 }, { "epoch": 0.3799763500197083, "grad_norm": 0.36328125, "learning_rate": 1.9957067349872457e-05, "loss": 0.9672, "step": 482 }, { "epoch": 0.38076468269609776, "grad_norm": 0.369140625, "learning_rate": 1.995621361374447e-05, "loss": 0.9602, "step": 483 }, { "epoch": 0.3815530153724872, "grad_norm": 0.373046875, "learning_rate": 1.9955351491128624e-05, "loss": 1.0262, "step": 484 }, { "epoch": 0.38234134804887665, "grad_norm": 0.369140625, "learning_rate": 1.995448098275112e-05, "loss": 0.9738, "step": 485 }, { "epoch": 0.38312968072526604, "grad_norm": 0.396484375, "learning_rate": 1.9953602089345215e-05, "loss": 0.9812, "step": 486 }, { "epoch": 0.3839180134016555, "grad_norm": 0.373046875, "learning_rate": 1.9952714811651234e-05, "loss": 0.9821, "step": 487 }, { "epoch": 0.38470634607804494, "grad_norm": 0.375, "learning_rate": 1.9951819150416564e-05, "loss": 1.0031, "step": 488 }, { "epoch": 0.3854946787544344, "grad_norm": 0.3828125, "learning_rate": 1.995091510639566e-05, "loss": 0.9814, "step": 489 }, { "epoch": 0.38628301143082383, "grad_norm": 0.404296875, "learning_rate": 1.995000268035002e-05, "loss": 0.9554, "step": 490 }, { "epoch": 0.3870713441072132, "grad_norm": 0.3671875, "learning_rate": 1.9949081873048222e-05, "loss": 0.9974, "step": 491 }, { "epoch": 0.38785967678360267, "grad_norm": 0.396484375, "learning_rate": 1.9948152685265896e-05, "loss": 0.9794, "step": 492 }, { "epoch": 0.3886480094599921, "grad_norm": 0.36328125, "learning_rate": 1.9947215117785727e-05, "loss": 0.9883, "step": 493 }, { "epoch": 0.38943634213638156, "grad_norm": 0.353515625, "learning_rate": 1.9946269171397467e-05, "loss": 1.0018, "step": 494 }, { "epoch": 0.390224674812771, "grad_norm": 0.359375, "learning_rate": 1.9945314846897922e-05, "loss": 0.9787, "step": 495 }, { "epoch": 0.3910130074891604, "grad_norm": 0.373046875, "learning_rate": 1.9944352145090954e-05, "loss": 0.9933, "step": 496 }, { "epoch": 0.39180134016554985, "grad_norm": 0.390625, "learning_rate": 1.994338106678748e-05, "loss": 0.9867, "step": 497 }, { "epoch": 0.3925896728419393, "grad_norm": 0.361328125, "learning_rate": 1.9942401612805478e-05, "loss": 0.9678, "step": 498 }, { "epoch": 0.39337800551832874, "grad_norm": 0.359375, "learning_rate": 1.994141378396998e-05, "loss": 0.9702, "step": 499 }, { "epoch": 0.3941663381947182, "grad_norm": 0.37109375, "learning_rate": 1.9940417581113062e-05, "loss": 0.9788, "step": 500 }, { "epoch": 0.3949546708711076, "grad_norm": 0.369140625, "learning_rate": 1.9939413005073873e-05, "loss": 0.9596, "step": 501 }, { "epoch": 0.39574300354749703, "grad_norm": 0.35546875, "learning_rate": 1.99384000566986e-05, "loss": 1.0074, "step": 502 }, { "epoch": 0.3965313362238865, "grad_norm": 0.3671875, "learning_rate": 1.9937378736840486e-05, "loss": 0.9577, "step": 503 }, { "epoch": 0.3973196689002759, "grad_norm": 0.380859375, "learning_rate": 1.9936349046359833e-05, "loss": 0.9998, "step": 504 }, { "epoch": 0.39810800157666537, "grad_norm": 0.3828125, "learning_rate": 1.993531098612398e-05, "loss": 1.0003, "step": 505 }, { "epoch": 0.39889633425305476, "grad_norm": 0.353515625, "learning_rate": 1.9934264557007323e-05, "loss": 0.9469, "step": 506 }, { "epoch": 0.3996846669294442, "grad_norm": 0.36328125, "learning_rate": 1.9933209759891318e-05, "loss": 0.9549, "step": 507 }, { "epoch": 0.40047299960583366, "grad_norm": 0.384765625, "learning_rate": 1.993214659566445e-05, "loss": 0.9672, "step": 508 }, { "epoch": 0.4012613322822231, "grad_norm": 0.376953125, "learning_rate": 1.993107506522226e-05, "loss": 0.9662, "step": 509 }, { "epoch": 0.40204966495861255, "grad_norm": 0.357421875, "learning_rate": 1.9929995169467346e-05, "loss": 0.9694, "step": 510 }, { "epoch": 0.40283799763500194, "grad_norm": 0.36328125, "learning_rate": 1.9928906909309342e-05, "loss": 0.9751, "step": 511 }, { "epoch": 0.4036263303113914, "grad_norm": 0.3671875, "learning_rate": 1.9927810285664928e-05, "loss": 0.9804, "step": 512 }, { "epoch": 0.40441466298778084, "grad_norm": 0.37109375, "learning_rate": 1.992670529945783e-05, "loss": 0.9855, "step": 513 }, { "epoch": 0.4052029956641703, "grad_norm": 0.37109375, "learning_rate": 1.9925591951618822e-05, "loss": 0.9617, "step": 514 }, { "epoch": 0.40599132834055973, "grad_norm": 0.380859375, "learning_rate": 1.9924470243085716e-05, "loss": 1.0049, "step": 515 }, { "epoch": 0.4067796610169492, "grad_norm": 0.376953125, "learning_rate": 1.992334017480337e-05, "loss": 0.9773, "step": 516 }, { "epoch": 0.40756799369333857, "grad_norm": 0.380859375, "learning_rate": 1.992220174772368e-05, "loss": 0.9508, "step": 517 }, { "epoch": 0.408356326369728, "grad_norm": 0.376953125, "learning_rate": 1.9921054962805586e-05, "loss": 0.958, "step": 518 }, { "epoch": 0.40914465904611746, "grad_norm": 0.357421875, "learning_rate": 1.9919899821015066e-05, "loss": 1.0046, "step": 519 }, { "epoch": 0.4099329917225069, "grad_norm": 0.375, "learning_rate": 1.9918736323325146e-05, "loss": 0.9709, "step": 520 }, { "epoch": 0.41072132439889636, "grad_norm": 0.3671875, "learning_rate": 1.9917564470715876e-05, "loss": 0.9536, "step": 521 }, { "epoch": 0.41150965707528575, "grad_norm": 0.3828125, "learning_rate": 1.9916384264174354e-05, "loss": 0.9554, "step": 522 }, { "epoch": 0.4122979897516752, "grad_norm": 0.3828125, "learning_rate": 1.9915195704694714e-05, "loss": 0.9854, "step": 523 }, { "epoch": 0.41308632242806465, "grad_norm": 0.353515625, "learning_rate": 1.9913998793278116e-05, "loss": 0.9465, "step": 524 }, { "epoch": 0.4138746551044541, "grad_norm": 0.380859375, "learning_rate": 1.9912793530932765e-05, "loss": 0.9967, "step": 525 }, { "epoch": 0.41466298778084354, "grad_norm": 0.34375, "learning_rate": 1.9911579918673903e-05, "loss": 0.9529, "step": 526 }, { "epoch": 0.41545132045723293, "grad_norm": 0.361328125, "learning_rate": 1.99103579575238e-05, "loss": 0.9668, "step": 527 }, { "epoch": 0.4162396531336224, "grad_norm": 0.41796875, "learning_rate": 1.9909127648511758e-05, "loss": 1.0035, "step": 528 }, { "epoch": 0.4170279858100118, "grad_norm": 0.365234375, "learning_rate": 1.990788899267411e-05, "loss": 0.9814, "step": 529 }, { "epoch": 0.4178163184864013, "grad_norm": 0.392578125, "learning_rate": 1.9906641991054222e-05, "loss": 1.0052, "step": 530 }, { "epoch": 0.4186046511627907, "grad_norm": 0.39453125, "learning_rate": 1.9905386644702495e-05, "loss": 1.004, "step": 531 }, { "epoch": 0.4193929838391801, "grad_norm": 0.376953125, "learning_rate": 1.9904122954676345e-05, "loss": 0.9637, "step": 532 }, { "epoch": 0.42018131651556956, "grad_norm": 0.375, "learning_rate": 1.9902850922040227e-05, "loss": 0.9879, "step": 533 }, { "epoch": 0.420969649191959, "grad_norm": 0.40234375, "learning_rate": 1.990157054786563e-05, "loss": 0.9891, "step": 534 }, { "epoch": 0.42175798186834845, "grad_norm": 0.37890625, "learning_rate": 1.990028183323105e-05, "loss": 1.018, "step": 535 }, { "epoch": 0.4225463145447379, "grad_norm": 0.419921875, "learning_rate": 1.9898984779222027e-05, "loss": 0.9748, "step": 536 }, { "epoch": 0.4233346472211273, "grad_norm": 0.384765625, "learning_rate": 1.9897679386931115e-05, "loss": 0.9495, "step": 537 }, { "epoch": 0.42412297989751674, "grad_norm": 0.369140625, "learning_rate": 1.989636565745789e-05, "loss": 0.968, "step": 538 }, { "epoch": 0.4249113125739062, "grad_norm": 0.369140625, "learning_rate": 1.989504359190896e-05, "loss": 0.9657, "step": 539 }, { "epoch": 0.42569964525029563, "grad_norm": 0.38671875, "learning_rate": 1.9893713191397944e-05, "loss": 0.977, "step": 540 }, { "epoch": 0.4264879779266851, "grad_norm": 3.296875, "learning_rate": 1.98923744570455e-05, "loss": 0.9723, "step": 541 }, { "epoch": 0.42727631060307447, "grad_norm": 0.37890625, "learning_rate": 1.989102738997928e-05, "loss": 0.9623, "step": 542 }, { "epoch": 0.4280646432794639, "grad_norm": 0.392578125, "learning_rate": 1.9889671991333976e-05, "loss": 1.0245, "step": 543 }, { "epoch": 0.42885297595585337, "grad_norm": 0.3984375, "learning_rate": 1.9888308262251286e-05, "loss": 0.9864, "step": 544 }, { "epoch": 0.4296413086322428, "grad_norm": 0.35546875, "learning_rate": 1.9886936203879935e-05, "loss": 0.9633, "step": 545 }, { "epoch": 0.43042964130863226, "grad_norm": 0.34765625, "learning_rate": 1.9885555817375656e-05, "loss": 0.9791, "step": 546 }, { "epoch": 0.43121797398502165, "grad_norm": 0.37109375, "learning_rate": 1.9884167103901196e-05, "loss": 0.9089, "step": 547 }, { "epoch": 0.4320063066614111, "grad_norm": 0.369140625, "learning_rate": 1.988277006462633e-05, "loss": 0.9843, "step": 548 }, { "epoch": 0.43279463933780055, "grad_norm": 0.361328125, "learning_rate": 1.9881364700727827e-05, "loss": 0.9883, "step": 549 }, { "epoch": 0.43358297201419, "grad_norm": 0.384765625, "learning_rate": 1.9879951013389475e-05, "loss": 0.9755, "step": 550 }, { "epoch": 0.43437130469057944, "grad_norm": 0.3671875, "learning_rate": 1.9878529003802086e-05, "loss": 0.9638, "step": 551 }, { "epoch": 0.4351596373669689, "grad_norm": 0.349609375, "learning_rate": 1.987709867316346e-05, "loss": 0.9487, "step": 552 }, { "epoch": 0.4359479700433583, "grad_norm": 0.36328125, "learning_rate": 1.9875660022678427e-05, "loss": 0.9699, "step": 553 }, { "epoch": 0.4367363027197477, "grad_norm": 0.35546875, "learning_rate": 1.9874213053558807e-05, "loss": 0.9473, "step": 554 }, { "epoch": 0.4375246353961372, "grad_norm": 0.3515625, "learning_rate": 1.9872757767023445e-05, "loss": 0.9935, "step": 555 }, { "epoch": 0.4383129680725266, "grad_norm": 0.359375, "learning_rate": 1.9871294164298175e-05, "loss": 1.0113, "step": 556 }, { "epoch": 0.43910130074891607, "grad_norm": 0.37109375, "learning_rate": 1.9869822246615846e-05, "loss": 0.9742, "step": 557 }, { "epoch": 0.43988963342530546, "grad_norm": 0.345703125, "learning_rate": 1.9868342015216312e-05, "loss": 0.979, "step": 558 }, { "epoch": 0.4406779661016949, "grad_norm": 0.359375, "learning_rate": 1.986685347134642e-05, "loss": 0.9809, "step": 559 }, { "epoch": 0.44146629877808435, "grad_norm": 0.36328125, "learning_rate": 1.9865356616260035e-05, "loss": 0.9978, "step": 560 }, { "epoch": 0.4422546314544738, "grad_norm": 0.369140625, "learning_rate": 1.9863851451218006e-05, "loss": 1.0015, "step": 561 }, { "epoch": 0.44304296413086325, "grad_norm": 0.349609375, "learning_rate": 1.9862337977488194e-05, "loss": 0.942, "step": 562 }, { "epoch": 0.44383129680725264, "grad_norm": 0.36328125, "learning_rate": 1.986081619634545e-05, "loss": 1.0041, "step": 563 }, { "epoch": 0.4446196294836421, "grad_norm": 0.37109375, "learning_rate": 1.9859286109071626e-05, "loss": 0.9731, "step": 564 }, { "epoch": 0.44540796216003153, "grad_norm": 0.359375, "learning_rate": 1.985774771695558e-05, "loss": 0.9521, "step": 565 }, { "epoch": 0.446196294836421, "grad_norm": 0.380859375, "learning_rate": 1.985620102129315e-05, "loss": 1.0109, "step": 566 }, { "epoch": 0.44698462751281043, "grad_norm": 0.37109375, "learning_rate": 1.9854646023387173e-05, "loss": 0.9679, "step": 567 }, { "epoch": 0.4477729601891998, "grad_norm": 0.375, "learning_rate": 1.9853082724547482e-05, "loss": 1.0107, "step": 568 }, { "epoch": 0.44856129286558927, "grad_norm": 0.359375, "learning_rate": 1.9851511126090908e-05, "loss": 1.0223, "step": 569 }, { "epoch": 0.4493496255419787, "grad_norm": 0.357421875, "learning_rate": 1.9849931229341258e-05, "loss": 0.9595, "step": 570 }, { "epoch": 0.45013795821836816, "grad_norm": 0.359375, "learning_rate": 1.9848343035629345e-05, "loss": 0.969, "step": 571 }, { "epoch": 0.4509262908947576, "grad_norm": 0.349609375, "learning_rate": 1.9846746546292958e-05, "loss": 1.0221, "step": 572 }, { "epoch": 0.451714623571147, "grad_norm": 0.36328125, "learning_rate": 1.9845141762676885e-05, "loss": 0.9555, "step": 573 }, { "epoch": 0.45250295624753645, "grad_norm": 0.3515625, "learning_rate": 1.984352868613289e-05, "loss": 0.9642, "step": 574 }, { "epoch": 0.4532912889239259, "grad_norm": 0.353515625, "learning_rate": 1.9841907318019726e-05, "loss": 0.9633, "step": 575 }, { "epoch": 0.45407962160031534, "grad_norm": 0.353515625, "learning_rate": 1.9840277659703138e-05, "loss": 0.9851, "step": 576 }, { "epoch": 0.4548679542767048, "grad_norm": 0.361328125, "learning_rate": 1.9838639712555842e-05, "loss": 0.9732, "step": 577 }, { "epoch": 0.4556562869530942, "grad_norm": 0.4140625, "learning_rate": 1.983699347795754e-05, "loss": 0.9574, "step": 578 }, { "epoch": 0.45644461962948363, "grad_norm": 0.380859375, "learning_rate": 1.983533895729492e-05, "loss": 0.9908, "step": 579 }, { "epoch": 0.4572329523058731, "grad_norm": 0.359375, "learning_rate": 1.9833676151961648e-05, "loss": 0.9507, "step": 580 }, { "epoch": 0.4580212849822625, "grad_norm": 0.37109375, "learning_rate": 1.9832005063358366e-05, "loss": 1.0089, "step": 581 }, { "epoch": 0.45880961765865197, "grad_norm": 0.357421875, "learning_rate": 1.983032569289269e-05, "loss": 0.9691, "step": 582 }, { "epoch": 0.45959795033504136, "grad_norm": 0.33984375, "learning_rate": 1.9828638041979216e-05, "loss": 0.9785, "step": 583 }, { "epoch": 0.4603862830114308, "grad_norm": 0.3515625, "learning_rate": 1.982694211203952e-05, "loss": 1.009, "step": 584 }, { "epoch": 0.46117461568782026, "grad_norm": 0.408203125, "learning_rate": 1.9825237904502143e-05, "loss": 0.9593, "step": 585 }, { "epoch": 0.4619629483642097, "grad_norm": 0.357421875, "learning_rate": 1.9823525420802603e-05, "loss": 0.9895, "step": 586 }, { "epoch": 0.46275128104059915, "grad_norm": 0.345703125, "learning_rate": 1.9821804662383388e-05, "loss": 0.9566, "step": 587 }, { "epoch": 0.4635396137169886, "grad_norm": 0.3515625, "learning_rate": 1.9820075630693955e-05, "loss": 0.9822, "step": 588 }, { "epoch": 0.464327946393378, "grad_norm": 0.36328125, "learning_rate": 1.981833832719074e-05, "loss": 0.9606, "step": 589 }, { "epoch": 0.46511627906976744, "grad_norm": 0.359375, "learning_rate": 1.9816592753337125e-05, "loss": 0.988, "step": 590 }, { "epoch": 0.4659046117461569, "grad_norm": 0.349609375, "learning_rate": 1.981483891060348e-05, "loss": 0.9794, "step": 591 }, { "epoch": 0.46669294442254633, "grad_norm": 0.353515625, "learning_rate": 1.9813076800467134e-05, "loss": 0.9645, "step": 592 }, { "epoch": 0.4674812770989358, "grad_norm": 0.361328125, "learning_rate": 1.9811306424412368e-05, "loss": 1.0011, "step": 593 }, { "epoch": 0.46826960977532517, "grad_norm": 0.36328125, "learning_rate": 1.9809527783930444e-05, "loss": 0.964, "step": 594 }, { "epoch": 0.4690579424517146, "grad_norm": 0.36328125, "learning_rate": 1.9807740880519574e-05, "loss": 1.0075, "step": 595 }, { "epoch": 0.46984627512810406, "grad_norm": 0.36328125, "learning_rate": 1.9805945715684933e-05, "loss": 0.9807, "step": 596 }, { "epoch": 0.4706346078044935, "grad_norm": 0.35546875, "learning_rate": 1.9804142290938654e-05, "loss": 0.9773, "step": 597 }, { "epoch": 0.47142294048088296, "grad_norm": 0.369140625, "learning_rate": 1.9802330607799832e-05, "loss": 0.9783, "step": 598 }, { "epoch": 0.47221127315727235, "grad_norm": 0.359375, "learning_rate": 1.980051066779451e-05, "loss": 0.9547, "step": 599 }, { "epoch": 0.4729996058336618, "grad_norm": 0.37109375, "learning_rate": 1.9798682472455694e-05, "loss": 0.9982, "step": 600 }, { "epoch": 0.47378793851005124, "grad_norm": 0.361328125, "learning_rate": 1.9796846023323336e-05, "loss": 0.9645, "step": 601 }, { "epoch": 0.4745762711864407, "grad_norm": 0.41796875, "learning_rate": 1.9795001321944354e-05, "loss": 0.9686, "step": 602 }, { "epoch": 0.47536460386283014, "grad_norm": 0.357421875, "learning_rate": 1.97931483698726e-05, "loss": 0.9734, "step": 603 }, { "epoch": 0.47615293653921953, "grad_norm": 0.361328125, "learning_rate": 1.979128716866889e-05, "loss": 1.0012, "step": 604 }, { "epoch": 0.476941269215609, "grad_norm": 0.36328125, "learning_rate": 1.978941771990098e-05, "loss": 0.9741, "step": 605 }, { "epoch": 0.4777296018919984, "grad_norm": 0.359375, "learning_rate": 1.9787540025143576e-05, "loss": 0.9564, "step": 606 }, { "epoch": 0.47851793456838787, "grad_norm": 0.361328125, "learning_rate": 1.9785654085978334e-05, "loss": 1.0029, "step": 607 }, { "epoch": 0.4793062672447773, "grad_norm": 0.349609375, "learning_rate": 1.9783759903993843e-05, "loss": 0.9892, "step": 608 }, { "epoch": 0.4800945999211667, "grad_norm": 0.392578125, "learning_rate": 1.9781857480785645e-05, "loss": 0.9624, "step": 609 }, { "epoch": 0.48088293259755616, "grad_norm": 0.353515625, "learning_rate": 1.9779946817956227e-05, "loss": 1.0144, "step": 610 }, { "epoch": 0.4816712652739456, "grad_norm": 0.349609375, "learning_rate": 1.9778027917115006e-05, "loss": 0.9598, "step": 611 }, { "epoch": 0.48245959795033505, "grad_norm": 0.359375, "learning_rate": 1.9776100779878344e-05, "loss": 0.9511, "step": 612 }, { "epoch": 0.4832479306267245, "grad_norm": 0.357421875, "learning_rate": 1.9774165407869538e-05, "loss": 0.9886, "step": 613 }, { "epoch": 0.4840362633031139, "grad_norm": 0.375, "learning_rate": 1.977222180271883e-05, "loss": 1.0042, "step": 614 }, { "epoch": 0.48482459597950334, "grad_norm": 0.359375, "learning_rate": 1.977026996606339e-05, "loss": 0.9568, "step": 615 }, { "epoch": 0.4856129286558928, "grad_norm": 0.353515625, "learning_rate": 1.9768309899547313e-05, "loss": 0.9773, "step": 616 }, { "epoch": 0.48640126133228223, "grad_norm": 0.482421875, "learning_rate": 1.9766341604821646e-05, "loss": 0.9763, "step": 617 }, { "epoch": 0.4871895940086717, "grad_norm": 0.365234375, "learning_rate": 1.976436508354435e-05, "loss": 0.9852, "step": 618 }, { "epoch": 0.48797792668506107, "grad_norm": 0.349609375, "learning_rate": 1.976238033738033e-05, "loss": 0.9889, "step": 619 }, { "epoch": 0.4887662593614505, "grad_norm": 0.359375, "learning_rate": 1.97603873680014e-05, "loss": 0.9795, "step": 620 }, { "epoch": 0.48955459203783996, "grad_norm": 0.36328125, "learning_rate": 1.9758386177086324e-05, "loss": 0.9792, "step": 621 }, { "epoch": 0.4903429247142294, "grad_norm": 0.3515625, "learning_rate": 1.975637676632077e-05, "loss": 0.9573, "step": 622 }, { "epoch": 0.49113125739061886, "grad_norm": 0.357421875, "learning_rate": 1.9754359137397343e-05, "loss": 0.9761, "step": 623 }, { "epoch": 0.49191959006700825, "grad_norm": 0.3671875, "learning_rate": 1.9752333292015565e-05, "loss": 0.9971, "step": 624 }, { "epoch": 0.4927079227433977, "grad_norm": 0.34765625, "learning_rate": 1.9750299231881887e-05, "loss": 0.9387, "step": 625 }, { "epoch": 0.49349625541978714, "grad_norm": 0.35546875, "learning_rate": 1.9748256958709666e-05, "loss": 1.0062, "step": 626 }, { "epoch": 0.4942845880961766, "grad_norm": 0.357421875, "learning_rate": 1.9746206474219182e-05, "loss": 0.9558, "step": 627 }, { "epoch": 0.49507292077256604, "grad_norm": 0.357421875, "learning_rate": 1.9744147780137644e-05, "loss": 0.9446, "step": 628 }, { "epoch": 0.4958612534489555, "grad_norm": 0.35546875, "learning_rate": 1.974208087819916e-05, "loss": 0.9843, "step": 629 }, { "epoch": 0.4966495861253449, "grad_norm": 0.37890625, "learning_rate": 1.9740005770144762e-05, "loss": 0.9646, "step": 630 }, { "epoch": 0.4974379188017343, "grad_norm": 0.357421875, "learning_rate": 1.973792245772238e-05, "loss": 0.9737, "step": 631 }, { "epoch": 0.49822625147812377, "grad_norm": 0.349609375, "learning_rate": 1.973583094268688e-05, "loss": 0.982, "step": 632 }, { "epoch": 0.4990145841545132, "grad_norm": 0.369140625, "learning_rate": 1.9733731226800016e-05, "loss": 0.9926, "step": 633 }, { "epoch": 0.49980291683090267, "grad_norm": 0.384765625, "learning_rate": 1.973162331183045e-05, "loss": 1.0092, "step": 634 }, { "epoch": 0.5005912495072921, "grad_norm": 0.34375, "learning_rate": 1.9729507199553767e-05, "loss": 0.9784, "step": 635 }, { "epoch": 0.5013795821836815, "grad_norm": 0.3671875, "learning_rate": 1.9727382891752446e-05, "loss": 0.9654, "step": 636 }, { "epoch": 0.502167914860071, "grad_norm": 0.36328125, "learning_rate": 1.9725250390215863e-05, "loss": 0.9766, "step": 637 }, { "epoch": 0.5029562475364604, "grad_norm": 0.357421875, "learning_rate": 1.972310969674031e-05, "loss": 0.9745, "step": 638 }, { "epoch": 0.5037445802128498, "grad_norm": 0.37109375, "learning_rate": 1.9720960813128966e-05, "loss": 0.9489, "step": 639 }, { "epoch": 0.5045329128892393, "grad_norm": 0.359375, "learning_rate": 1.9718803741191918e-05, "loss": 0.9481, "step": 640 }, { "epoch": 0.5053212455656287, "grad_norm": 0.357421875, "learning_rate": 1.9716638482746155e-05, "loss": 0.9685, "step": 641 }, { "epoch": 0.5061095782420181, "grad_norm": 0.3828125, "learning_rate": 1.9714465039615545e-05, "loss": 0.995, "step": 642 }, { "epoch": 0.5068979109184075, "grad_norm": 0.3671875, "learning_rate": 1.9712283413630865e-05, "loss": 0.9765, "step": 643 }, { "epoch": 0.507686243594797, "grad_norm": 0.361328125, "learning_rate": 1.9710093606629774e-05, "loss": 0.9793, "step": 644 }, { "epoch": 0.5084745762711864, "grad_norm": 0.35546875, "learning_rate": 1.9707895620456832e-05, "loss": 0.9667, "step": 645 }, { "epoch": 0.5092629089475759, "grad_norm": 0.365234375, "learning_rate": 1.9705689456963484e-05, "loss": 0.9788, "step": 646 }, { "epoch": 0.5100512416239653, "grad_norm": 0.37890625, "learning_rate": 1.970347511800806e-05, "loss": 0.9969, "step": 647 }, { "epoch": 0.5108395743003548, "grad_norm": 0.36328125, "learning_rate": 1.970125260545579e-05, "loss": 0.9846, "step": 648 }, { "epoch": 0.5116279069767442, "grad_norm": 0.365234375, "learning_rate": 1.9699021921178762e-05, "loss": 0.9542, "step": 649 }, { "epoch": 0.5124162396531337, "grad_norm": 0.359375, "learning_rate": 1.969678306705598e-05, "loss": 0.9845, "step": 650 }, { "epoch": 0.5132045723295231, "grad_norm": 0.3515625, "learning_rate": 1.9694536044973303e-05, "loss": 0.9798, "step": 651 }, { "epoch": 0.5139929050059125, "grad_norm": 0.357421875, "learning_rate": 1.9692280856823486e-05, "loss": 0.9489, "step": 652 }, { "epoch": 0.5147812376823019, "grad_norm": 0.357421875, "learning_rate": 1.969001750450616e-05, "loss": 0.9967, "step": 653 }, { "epoch": 0.5155695703586913, "grad_norm": 0.38671875, "learning_rate": 1.9687745989927823e-05, "loss": 0.9596, "step": 654 }, { "epoch": 0.5163579030350808, "grad_norm": 0.35546875, "learning_rate": 1.9685466315001863e-05, "loss": 0.9805, "step": 655 }, { "epoch": 0.5171462357114702, "grad_norm": 0.3515625, "learning_rate": 1.968317848164853e-05, "loss": 0.9592, "step": 656 }, { "epoch": 0.5179345683878597, "grad_norm": 0.361328125, "learning_rate": 1.9680882491794953e-05, "loss": 0.9633, "step": 657 }, { "epoch": 0.5187229010642491, "grad_norm": 0.34765625, "learning_rate": 1.967857834737513e-05, "loss": 0.955, "step": 658 }, { "epoch": 0.5195112337406386, "grad_norm": 0.365234375, "learning_rate": 1.9676266050329926e-05, "loss": 0.9782, "step": 659 }, { "epoch": 0.520299566417028, "grad_norm": 0.376953125, "learning_rate": 1.9673945602607073e-05, "loss": 0.9809, "step": 660 }, { "epoch": 0.5210878990934175, "grad_norm": 0.3515625, "learning_rate": 1.967161700616117e-05, "loss": 0.9245, "step": 661 }, { "epoch": 0.5218762317698069, "grad_norm": 0.37890625, "learning_rate": 1.966928026295369e-05, "loss": 0.9981, "step": 662 }, { "epoch": 0.5226645644461962, "grad_norm": 0.35546875, "learning_rate": 1.9666935374952946e-05, "loss": 0.977, "step": 663 }, { "epoch": 0.5234528971225857, "grad_norm": 0.388671875, "learning_rate": 1.966458234413413e-05, "loss": 0.9567, "step": 664 }, { "epoch": 0.5242412297989751, "grad_norm": 0.361328125, "learning_rate": 1.9662221172479287e-05, "loss": 0.9747, "step": 665 }, { "epoch": 0.5250295624753646, "grad_norm": 0.35546875, "learning_rate": 1.9659851861977316e-05, "loss": 0.9533, "step": 666 }, { "epoch": 0.525817895151754, "grad_norm": 0.349609375, "learning_rate": 1.9657474414623974e-05, "loss": 0.9463, "step": 667 }, { "epoch": 0.5266062278281435, "grad_norm": 0.359375, "learning_rate": 1.965508883242188e-05, "loss": 0.9559, "step": 668 }, { "epoch": 0.5273945605045329, "grad_norm": 0.353515625, "learning_rate": 1.9652695117380496e-05, "loss": 0.9636, "step": 669 }, { "epoch": 0.5281828931809224, "grad_norm": 0.34765625, "learning_rate": 1.9650293271516135e-05, "loss": 0.9828, "step": 670 }, { "epoch": 0.5289712258573118, "grad_norm": 0.37109375, "learning_rate": 1.964788329685196e-05, "loss": 0.9876, "step": 671 }, { "epoch": 0.5297595585337013, "grad_norm": 0.365234375, "learning_rate": 1.9645465195417986e-05, "loss": 0.9969, "step": 672 }, { "epoch": 0.5305478912100906, "grad_norm": 0.375, "learning_rate": 1.9643038969251066e-05, "loss": 0.9614, "step": 673 }, { "epoch": 0.53133622388648, "grad_norm": 0.3671875, "learning_rate": 1.96406046203949e-05, "loss": 0.9538, "step": 674 }, { "epoch": 0.5321245565628695, "grad_norm": 0.36328125, "learning_rate": 1.9638162150900028e-05, "loss": 0.9834, "step": 675 }, { "epoch": 0.532912889239259, "grad_norm": 0.365234375, "learning_rate": 1.963571156282384e-05, "loss": 0.9586, "step": 676 }, { "epoch": 0.5337012219156484, "grad_norm": 0.34765625, "learning_rate": 1.9633252858230553e-05, "loss": 1.0023, "step": 677 }, { "epoch": 0.5344895545920378, "grad_norm": 0.3515625, "learning_rate": 1.963078603919123e-05, "loss": 0.983, "step": 678 }, { "epoch": 0.5352778872684273, "grad_norm": 0.359375, "learning_rate": 1.9628311107783753e-05, "loss": 0.9699, "step": 679 }, { "epoch": 0.5360662199448167, "grad_norm": 0.359375, "learning_rate": 1.9625828066092857e-05, "loss": 0.9912, "step": 680 }, { "epoch": 0.5368545526212062, "grad_norm": 0.3515625, "learning_rate": 1.96233369162101e-05, "loss": 0.9241, "step": 681 }, { "epoch": 0.5376428852975956, "grad_norm": 0.384765625, "learning_rate": 1.9620837660233866e-05, "loss": 0.9778, "step": 682 }, { "epoch": 0.5384312179739851, "grad_norm": 0.35546875, "learning_rate": 1.9618330300269372e-05, "loss": 0.969, "step": 683 }, { "epoch": 0.5392195506503744, "grad_norm": 0.361328125, "learning_rate": 1.9615814838428662e-05, "loss": 1.0089, "step": 684 }, { "epoch": 0.5400078833267639, "grad_norm": 0.58984375, "learning_rate": 1.9613291276830604e-05, "loss": 1.0068, "step": 685 }, { "epoch": 0.5407962160031533, "grad_norm": 0.357421875, "learning_rate": 1.9610759617600883e-05, "loss": 1.0009, "step": 686 }, { "epoch": 0.5415845486795428, "grad_norm": 0.36328125, "learning_rate": 1.960821986287201e-05, "loss": 0.9768, "step": 687 }, { "epoch": 0.5423728813559322, "grad_norm": 0.365234375, "learning_rate": 1.960567201478332e-05, "loss": 0.9893, "step": 688 }, { "epoch": 0.5431612140323216, "grad_norm": 0.353515625, "learning_rate": 1.960311607548096e-05, "loss": 1.0018, "step": 689 }, { "epoch": 0.5439495467087111, "grad_norm": 0.359375, "learning_rate": 1.9600552047117883e-05, "loss": 0.9698, "step": 690 }, { "epoch": 0.5447378793851005, "grad_norm": 0.40625, "learning_rate": 1.959797993185387e-05, "loss": 0.9648, "step": 691 }, { "epoch": 0.54552621206149, "grad_norm": 0.369140625, "learning_rate": 1.9595399731855514e-05, "loss": 1.027, "step": 692 }, { "epoch": 0.5463145447378794, "grad_norm": 0.357421875, "learning_rate": 1.9592811449296206e-05, "loss": 0.9667, "step": 693 }, { "epoch": 0.5471028774142688, "grad_norm": 0.337890625, "learning_rate": 1.959021508635616e-05, "loss": 0.9646, "step": 694 }, { "epoch": 0.5478912100906582, "grad_norm": 0.359375, "learning_rate": 1.958761064522238e-05, "loss": 0.9681, "step": 695 }, { "epoch": 0.5486795427670477, "grad_norm": 0.380859375, "learning_rate": 1.9584998128088686e-05, "loss": 0.9498, "step": 696 }, { "epoch": 0.5494678754434371, "grad_norm": 0.3671875, "learning_rate": 1.9582377537155703e-05, "loss": 1.0051, "step": 697 }, { "epoch": 0.5502562081198266, "grad_norm": 0.375, "learning_rate": 1.957974887463085e-05, "loss": 0.9901, "step": 698 }, { "epoch": 0.551044540796216, "grad_norm": 0.37109375, "learning_rate": 1.957711214272834e-05, "loss": 1.0211, "step": 699 }, { "epoch": 0.5518328734726055, "grad_norm": 0.36328125, "learning_rate": 1.9574467343669202e-05, "loss": 0.9511, "step": 700 }, { "epoch": 0.5526212061489949, "grad_norm": 0.353515625, "learning_rate": 1.9571814479681236e-05, "loss": 0.9484, "step": 701 }, { "epoch": 0.5534095388253844, "grad_norm": 0.3671875, "learning_rate": 1.9569153552999057e-05, "loss": 0.9585, "step": 702 }, { "epoch": 0.5541978715017738, "grad_norm": 0.361328125, "learning_rate": 1.9566484565864056e-05, "loss": 0.9389, "step": 703 }, { "epoch": 0.5549862041781631, "grad_norm": 0.458984375, "learning_rate": 1.9563807520524426e-05, "loss": 0.928, "step": 704 }, { "epoch": 0.5557745368545526, "grad_norm": 0.361328125, "learning_rate": 1.9561122419235137e-05, "loss": 0.9635, "step": 705 }, { "epoch": 0.556562869530942, "grad_norm": 0.359375, "learning_rate": 1.955842926425795e-05, "loss": 0.9952, "step": 706 }, { "epoch": 0.5573512022073315, "grad_norm": 0.345703125, "learning_rate": 1.955572805786141e-05, "loss": 0.9316, "step": 707 }, { "epoch": 0.5581395348837209, "grad_norm": 0.361328125, "learning_rate": 1.9553018802320843e-05, "loss": 0.9371, "step": 708 }, { "epoch": 0.5589278675601104, "grad_norm": 0.369140625, "learning_rate": 1.9550301499918355e-05, "loss": 0.9507, "step": 709 }, { "epoch": 0.5597162002364998, "grad_norm": 0.34765625, "learning_rate": 1.954757615294283e-05, "loss": 0.9586, "step": 710 }, { "epoch": 0.5605045329128893, "grad_norm": 0.357421875, "learning_rate": 1.9544842763689928e-05, "loss": 0.9551, "step": 711 }, { "epoch": 0.5612928655892787, "grad_norm": 0.35546875, "learning_rate": 1.954210133446209e-05, "loss": 0.9423, "step": 712 }, { "epoch": 0.5620811982656682, "grad_norm": 0.375, "learning_rate": 1.9539351867568516e-05, "loss": 0.9849, "step": 713 }, { "epoch": 0.5628695309420575, "grad_norm": 0.36328125, "learning_rate": 1.953659436532519e-05, "loss": 0.961, "step": 714 }, { "epoch": 0.5636578636184469, "grad_norm": 0.3515625, "learning_rate": 1.9533828830054852e-05, "loss": 0.9852, "step": 715 }, { "epoch": 0.5644461962948364, "grad_norm": 0.35546875, "learning_rate": 1.9531055264087025e-05, "loss": 0.9844, "step": 716 }, { "epoch": 0.5652345289712258, "grad_norm": 0.39453125, "learning_rate": 1.9528273669757974e-05, "loss": 0.948, "step": 717 }, { "epoch": 0.5660228616476153, "grad_norm": 0.34375, "learning_rate": 1.9525484049410746e-05, "loss": 1.0073, "step": 718 }, { "epoch": 0.5668111943240047, "grad_norm": 0.349609375, "learning_rate": 1.9522686405395143e-05, "loss": 0.9673, "step": 719 }, { "epoch": 0.5675995270003942, "grad_norm": 0.3515625, "learning_rate": 1.951988074006772e-05, "loss": 0.9576, "step": 720 }, { "epoch": 0.5683878596767836, "grad_norm": 0.34765625, "learning_rate": 1.9517067055791793e-05, "loss": 0.9354, "step": 721 }, { "epoch": 0.5691761923531731, "grad_norm": 0.357421875, "learning_rate": 1.9514245354937437e-05, "loss": 0.9774, "step": 722 }, { "epoch": 0.5699645250295625, "grad_norm": 0.474609375, "learning_rate": 1.9511415639881474e-05, "loss": 0.9822, "step": 723 }, { "epoch": 0.570752857705952, "grad_norm": 0.361328125, "learning_rate": 1.9508577913007475e-05, "loss": 0.9205, "step": 724 }, { "epoch": 0.5715411903823413, "grad_norm": 0.357421875, "learning_rate": 1.9505732176705763e-05, "loss": 0.9514, "step": 725 }, { "epoch": 0.5723295230587307, "grad_norm": 0.35546875, "learning_rate": 1.950287843337341e-05, "loss": 0.9887, "step": 726 }, { "epoch": 0.5731178557351202, "grad_norm": 0.375, "learning_rate": 1.9500016685414226e-05, "loss": 0.9979, "step": 727 }, { "epoch": 0.5739061884115096, "grad_norm": 0.3515625, "learning_rate": 1.949714693523877e-05, "loss": 1.009, "step": 728 }, { "epoch": 0.5746945210878991, "grad_norm": 0.376953125, "learning_rate": 1.949426918526434e-05, "loss": 1.0218, "step": 729 }, { "epoch": 0.5754828537642885, "grad_norm": 0.58984375, "learning_rate": 1.9491383437914968e-05, "loss": 0.9688, "step": 730 }, { "epoch": 0.576271186440678, "grad_norm": 0.3515625, "learning_rate": 1.9488489695621432e-05, "loss": 0.9676, "step": 731 }, { "epoch": 0.5770595191170674, "grad_norm": 0.3515625, "learning_rate": 1.948558796082123e-05, "loss": 0.9706, "step": 732 }, { "epoch": 0.5778478517934569, "grad_norm": 0.357421875, "learning_rate": 1.9482678235958604e-05, "loss": 0.9627, "step": 733 }, { "epoch": 0.5786361844698463, "grad_norm": 0.359375, "learning_rate": 1.947976052348453e-05, "loss": 0.9997, "step": 734 }, { "epoch": 0.5794245171462357, "grad_norm": 0.36328125, "learning_rate": 1.9476834825856696e-05, "loss": 0.9641, "step": 735 }, { "epoch": 0.5802128498226251, "grad_norm": 0.392578125, "learning_rate": 1.947390114553953e-05, "loss": 0.9689, "step": 736 }, { "epoch": 0.5810011824990146, "grad_norm": 0.396484375, "learning_rate": 1.947095948500418e-05, "loss": 0.9583, "step": 737 }, { "epoch": 0.581789515175404, "grad_norm": 0.369140625, "learning_rate": 1.9468009846728515e-05, "loss": 1.0123, "step": 738 }, { "epoch": 0.5825778478517935, "grad_norm": 0.35546875, "learning_rate": 1.9465052233197125e-05, "loss": 0.9493, "step": 739 }, { "epoch": 0.5833661805281829, "grad_norm": 0.3515625, "learning_rate": 1.946208664690131e-05, "loss": 0.9765, "step": 740 }, { "epoch": 0.5841545132045723, "grad_norm": 0.36328125, "learning_rate": 1.9459113090339107e-05, "loss": 0.9724, "step": 741 }, { "epoch": 0.5849428458809618, "grad_norm": 0.380859375, "learning_rate": 1.9456131566015245e-05, "loss": 0.9839, "step": 742 }, { "epoch": 0.5857311785573512, "grad_norm": 0.34375, "learning_rate": 1.9453142076441173e-05, "loss": 0.9679, "step": 743 }, { "epoch": 0.5865195112337407, "grad_norm": 0.349609375, "learning_rate": 1.945014462413505e-05, "loss": 0.956, "step": 744 }, { "epoch": 0.58730784391013, "grad_norm": 0.375, "learning_rate": 1.944713921162174e-05, "loss": 0.9527, "step": 745 }, { "epoch": 0.5880961765865195, "grad_norm": 0.3671875, "learning_rate": 1.9444125841432817e-05, "loss": 0.9732, "step": 746 }, { "epoch": 0.5888845092629089, "grad_norm": 0.36328125, "learning_rate": 1.944110451610655e-05, "loss": 0.9685, "step": 747 }, { "epoch": 0.5896728419392984, "grad_norm": 0.3671875, "learning_rate": 1.9438075238187916e-05, "loss": 1.0161, "step": 748 }, { "epoch": 0.5904611746156878, "grad_norm": 0.3671875, "learning_rate": 1.9435038010228584e-05, "loss": 0.9396, "step": 749 }, { "epoch": 0.5912495072920773, "grad_norm": 0.35546875, "learning_rate": 1.943199283478693e-05, "loss": 0.9756, "step": 750 }, { "epoch": 0.5920378399684667, "grad_norm": 0.3515625, "learning_rate": 1.942893971442801e-05, "loss": 0.9476, "step": 751 }, { "epoch": 0.5928261726448562, "grad_norm": 0.369140625, "learning_rate": 1.942587865172359e-05, "loss": 0.9911, "step": 752 }, { "epoch": 0.5936145053212456, "grad_norm": 0.35546875, "learning_rate": 1.942280964925211e-05, "loss": 0.9396, "step": 753 }, { "epoch": 0.594402837997635, "grad_norm": 0.349609375, "learning_rate": 1.9419732709598708e-05, "loss": 0.9701, "step": 754 }, { "epoch": 0.5951911706740244, "grad_norm": 0.361328125, "learning_rate": 1.94166478353552e-05, "loss": 0.981, "step": 755 }, { "epoch": 0.5959795033504138, "grad_norm": 0.3671875, "learning_rate": 1.94135550291201e-05, "loss": 0.9569, "step": 756 }, { "epoch": 0.5967678360268033, "grad_norm": 0.36328125, "learning_rate": 1.9410454293498577e-05, "loss": 0.9845, "step": 757 }, { "epoch": 0.5975561687031927, "grad_norm": 0.34375, "learning_rate": 1.940734563110251e-05, "loss": 0.9518, "step": 758 }, { "epoch": 0.5983445013795822, "grad_norm": 0.345703125, "learning_rate": 1.9404229044550432e-05, "loss": 0.9571, "step": 759 }, { "epoch": 0.5991328340559716, "grad_norm": 0.353515625, "learning_rate": 1.9401104536467566e-05, "loss": 0.9886, "step": 760 }, { "epoch": 0.5999211667323611, "grad_norm": 0.34765625, "learning_rate": 1.9397972109485798e-05, "loss": 0.9511, "step": 761 }, { "epoch": 0.6007094994087505, "grad_norm": 0.3515625, "learning_rate": 1.9394831766243688e-05, "loss": 0.9661, "step": 762 }, { "epoch": 0.60149783208514, "grad_norm": 0.345703125, "learning_rate": 1.9391683509386457e-05, "loss": 0.9346, "step": 763 }, { "epoch": 0.6022861647615294, "grad_norm": 0.34765625, "learning_rate": 1.9388527341566012e-05, "loss": 0.9589, "step": 764 }, { "epoch": 0.6030744974379189, "grad_norm": 0.34765625, "learning_rate": 1.9385363265440896e-05, "loss": 0.9812, "step": 765 }, { "epoch": 0.6038628301143082, "grad_norm": 0.341796875, "learning_rate": 1.9382191283676336e-05, "loss": 0.9376, "step": 766 }, { "epoch": 0.6046511627906976, "grad_norm": 0.359375, "learning_rate": 1.937901139894421e-05, "loss": 0.9717, "step": 767 }, { "epoch": 0.6054394954670871, "grad_norm": 0.345703125, "learning_rate": 1.937582361392305e-05, "loss": 0.968, "step": 768 }, { "epoch": 0.6062278281434765, "grad_norm": 0.35546875, "learning_rate": 1.9372627931298043e-05, "loss": 0.9825, "step": 769 }, { "epoch": 0.607016160819866, "grad_norm": 0.359375, "learning_rate": 1.9369424353761037e-05, "loss": 0.98, "step": 770 }, { "epoch": 0.6078044934962554, "grad_norm": 0.361328125, "learning_rate": 1.9366212884010523e-05, "loss": 0.9918, "step": 771 }, { "epoch": 0.6085928261726449, "grad_norm": 0.361328125, "learning_rate": 1.9362993524751634e-05, "loss": 0.987, "step": 772 }, { "epoch": 0.6093811588490343, "grad_norm": 0.357421875, "learning_rate": 1.9359766278696165e-05, "loss": 0.9751, "step": 773 }, { "epoch": 0.6101694915254238, "grad_norm": 0.345703125, "learning_rate": 1.935653114856254e-05, "loss": 0.9614, "step": 774 }, { "epoch": 0.6109578242018132, "grad_norm": 0.37890625, "learning_rate": 1.935328813707583e-05, "loss": 0.9916, "step": 775 }, { "epoch": 0.6117461568782026, "grad_norm": 0.359375, "learning_rate": 1.9350037246967744e-05, "loss": 0.9544, "step": 776 }, { "epoch": 0.612534489554592, "grad_norm": 0.345703125, "learning_rate": 1.9346778480976626e-05, "loss": 0.9567, "step": 777 }, { "epoch": 0.6133228222309814, "grad_norm": 0.345703125, "learning_rate": 1.9343511841847458e-05, "loss": 0.9329, "step": 778 }, { "epoch": 0.6141111549073709, "grad_norm": 0.3515625, "learning_rate": 1.9340237332331848e-05, "loss": 0.9372, "step": 779 }, { "epoch": 0.6148994875837603, "grad_norm": 0.36328125, "learning_rate": 1.9336954955188042e-05, "loss": 0.9324, "step": 780 }, { "epoch": 0.6156878202601498, "grad_norm": 0.349609375, "learning_rate": 1.93336647131809e-05, "loss": 0.9513, "step": 781 }, { "epoch": 0.6164761529365392, "grad_norm": 0.36328125, "learning_rate": 1.933036660908192e-05, "loss": 0.9666, "step": 782 }, { "epoch": 0.6172644856129287, "grad_norm": 0.34765625, "learning_rate": 1.932706064566922e-05, "loss": 0.9518, "step": 783 }, { "epoch": 0.6180528182893181, "grad_norm": 0.3359375, "learning_rate": 1.932374682572753e-05, "loss": 0.9682, "step": 784 }, { "epoch": 0.6188411509657076, "grad_norm": 0.35546875, "learning_rate": 1.9320425152048205e-05, "loss": 0.9665, "step": 785 }, { "epoch": 0.6196294836420969, "grad_norm": 0.375, "learning_rate": 1.9317095627429215e-05, "loss": 0.9688, "step": 786 }, { "epoch": 0.6204178163184864, "grad_norm": 0.36328125, "learning_rate": 1.9313758254675143e-05, "loss": 0.9682, "step": 787 }, { "epoch": 0.6212061489948758, "grad_norm": 0.34765625, "learning_rate": 1.931041303659718e-05, "loss": 0.9685, "step": 788 }, { "epoch": 0.6219944816712653, "grad_norm": 0.369140625, "learning_rate": 1.930705997601313e-05, "loss": 0.9854, "step": 789 }, { "epoch": 0.6227828143476547, "grad_norm": 0.3515625, "learning_rate": 1.9303699075747392e-05, "loss": 0.9139, "step": 790 }, { "epoch": 0.6235711470240441, "grad_norm": 0.349609375, "learning_rate": 1.9300330338630986e-05, "loss": 0.9522, "step": 791 }, { "epoch": 0.6243594797004336, "grad_norm": 0.349609375, "learning_rate": 1.929695376750152e-05, "loss": 0.9695, "step": 792 }, { "epoch": 0.625147812376823, "grad_norm": 0.33984375, "learning_rate": 1.9293569365203205e-05, "loss": 0.9595, "step": 793 }, { "epoch": 0.6259361450532125, "grad_norm": 0.359375, "learning_rate": 1.929017713458685e-05, "loss": 0.9474, "step": 794 }, { "epoch": 0.6267244777296019, "grad_norm": 0.357421875, "learning_rate": 1.928677707850986e-05, "loss": 0.9764, "step": 795 }, { "epoch": 0.6275128104059914, "grad_norm": 0.375, "learning_rate": 1.9283369199836222e-05, "loss": 0.9735, "step": 796 }, { "epoch": 0.6283011430823807, "grad_norm": 0.349609375, "learning_rate": 1.9279953501436518e-05, "loss": 0.9839, "step": 797 }, { "epoch": 0.6290894757587702, "grad_norm": 0.3671875, "learning_rate": 1.9276529986187925e-05, "loss": 0.9636, "step": 798 }, { "epoch": 0.6298778084351596, "grad_norm": 0.34765625, "learning_rate": 1.9273098656974188e-05, "loss": 0.9613, "step": 799 }, { "epoch": 0.6306661411115491, "grad_norm": 0.359375, "learning_rate": 1.926965951668565e-05, "loss": 0.9899, "step": 800 }, { "epoch": 0.6314544737879385, "grad_norm": 0.369140625, "learning_rate": 1.9266212568219223e-05, "loss": 0.9507, "step": 801 }, { "epoch": 0.632242806464328, "grad_norm": 0.361328125, "learning_rate": 1.92627578144784e-05, "loss": 0.9985, "step": 802 }, { "epoch": 0.6330311391407174, "grad_norm": 0.375, "learning_rate": 1.9259295258373245e-05, "loss": 1.0029, "step": 803 }, { "epoch": 0.6338194718171068, "grad_norm": 0.3515625, "learning_rate": 1.9255824902820403e-05, "loss": 0.9837, "step": 804 }, { "epoch": 0.6346078044934963, "grad_norm": 0.359375, "learning_rate": 1.925234675074308e-05, "loss": 1.0052, "step": 805 }, { "epoch": 0.6353961371698857, "grad_norm": 0.345703125, "learning_rate": 1.9248860805071056e-05, "loss": 0.9282, "step": 806 }, { "epoch": 0.6361844698462751, "grad_norm": 0.37109375, "learning_rate": 1.9245367068740664e-05, "loss": 0.9695, "step": 807 }, { "epoch": 0.6369728025226645, "grad_norm": 0.359375, "learning_rate": 1.9241865544694817e-05, "loss": 0.9672, "step": 808 }, { "epoch": 0.637761135199054, "grad_norm": 0.50390625, "learning_rate": 1.923835623588297e-05, "loss": 0.9882, "step": 809 }, { "epoch": 0.6385494678754434, "grad_norm": 0.51953125, "learning_rate": 1.9234839145261154e-05, "loss": 0.9761, "step": 810 }, { "epoch": 0.6393378005518329, "grad_norm": 0.369140625, "learning_rate": 1.9231314275791934e-05, "loss": 0.9959, "step": 811 }, { "epoch": 0.6401261332282223, "grad_norm": 0.3671875, "learning_rate": 1.9227781630444448e-05, "loss": 0.9736, "step": 812 }, { "epoch": 0.6409144659046118, "grad_norm": 0.41796875, "learning_rate": 1.9224241212194364e-05, "loss": 0.9462, "step": 813 }, { "epoch": 0.6417027985810012, "grad_norm": 0.35546875, "learning_rate": 1.922069302402392e-05, "loss": 0.9821, "step": 814 }, { "epoch": 0.6424911312573907, "grad_norm": 0.37109375, "learning_rate": 1.9217137068921875e-05, "loss": 0.9828, "step": 815 }, { "epoch": 0.6432794639337801, "grad_norm": 0.40234375, "learning_rate": 1.9213573349883545e-05, "loss": 0.9715, "step": 816 }, { "epoch": 0.6440677966101694, "grad_norm": 0.3515625, "learning_rate": 1.921000186991079e-05, "loss": 1.0017, "step": 817 }, { "epoch": 0.6448561292865589, "grad_norm": 0.357421875, "learning_rate": 1.920642263201199e-05, "loss": 0.9685, "step": 818 }, { "epoch": 0.6456444619629483, "grad_norm": 0.353515625, "learning_rate": 1.9202835639202075e-05, "loss": 1.0051, "step": 819 }, { "epoch": 0.6464327946393378, "grad_norm": 0.353515625, "learning_rate": 1.91992408945025e-05, "loss": 0.9475, "step": 820 }, { "epoch": 0.6472211273157272, "grad_norm": 0.365234375, "learning_rate": 1.9195638400941254e-05, "loss": 0.9911, "step": 821 }, { "epoch": 0.6480094599921167, "grad_norm": 0.34375, "learning_rate": 1.9192028161552848e-05, "loss": 0.9745, "step": 822 }, { "epoch": 0.6487977926685061, "grad_norm": 0.3515625, "learning_rate": 1.9188410179378324e-05, "loss": 0.9503, "step": 823 }, { "epoch": 0.6495861253448956, "grad_norm": 0.353515625, "learning_rate": 1.9184784457465238e-05, "loss": 0.9823, "step": 824 }, { "epoch": 0.650374458021285, "grad_norm": 0.359375, "learning_rate": 1.9181150998867674e-05, "loss": 0.9742, "step": 825 }, { "epoch": 0.6511627906976745, "grad_norm": 0.3671875, "learning_rate": 1.9177509806646225e-05, "loss": 0.9587, "step": 826 }, { "epoch": 0.6519511233740638, "grad_norm": 0.36328125, "learning_rate": 1.9173860883868008e-05, "loss": 0.9425, "step": 827 }, { "epoch": 0.6527394560504532, "grad_norm": 0.353515625, "learning_rate": 1.917020423360664e-05, "loss": 0.9819, "step": 828 }, { "epoch": 0.6535277887268427, "grad_norm": 0.357421875, "learning_rate": 1.9166539858942258e-05, "loss": 0.9637, "step": 829 }, { "epoch": 0.6543161214032321, "grad_norm": 0.3671875, "learning_rate": 1.9162867762961497e-05, "loss": 0.9715, "step": 830 }, { "epoch": 0.6551044540796216, "grad_norm": 0.369140625, "learning_rate": 1.9159187948757503e-05, "loss": 0.9656, "step": 831 }, { "epoch": 0.655892786756011, "grad_norm": 0.36328125, "learning_rate": 1.9155500419429916e-05, "loss": 0.9356, "step": 832 }, { "epoch": 0.6566811194324005, "grad_norm": 0.36328125, "learning_rate": 1.9151805178084878e-05, "loss": 0.9625, "step": 833 }, { "epoch": 0.6574694521087899, "grad_norm": 0.37109375, "learning_rate": 1.9148102227835033e-05, "loss": 0.9626, "step": 834 }, { "epoch": 0.6582577847851794, "grad_norm": 0.349609375, "learning_rate": 1.9144391571799513e-05, "loss": 0.968, "step": 835 }, { "epoch": 0.6590461174615688, "grad_norm": 0.349609375, "learning_rate": 1.9140673213103932e-05, "loss": 1.004, "step": 836 }, { "epoch": 0.6598344501379583, "grad_norm": 0.36328125, "learning_rate": 1.9136947154880413e-05, "loss": 0.9806, "step": 837 }, { "epoch": 0.6606227828143476, "grad_norm": 0.34765625, "learning_rate": 1.9133213400267548e-05, "loss": 0.9822, "step": 838 }, { "epoch": 0.6614111154907371, "grad_norm": 0.34765625, "learning_rate": 1.9129471952410417e-05, "loss": 0.9804, "step": 839 }, { "epoch": 0.6621994481671265, "grad_norm": 0.357421875, "learning_rate": 1.9125722814460582e-05, "loss": 0.9368, "step": 840 }, { "epoch": 0.662987780843516, "grad_norm": 0.361328125, "learning_rate": 1.9121965989576075e-05, "loss": 0.9702, "step": 841 }, { "epoch": 0.6637761135199054, "grad_norm": 0.345703125, "learning_rate": 1.9118201480921413e-05, "loss": 0.9986, "step": 842 }, { "epoch": 0.6645644461962948, "grad_norm": 0.353515625, "learning_rate": 1.9114429291667583e-05, "loss": 0.9713, "step": 843 }, { "epoch": 0.6653527788726843, "grad_norm": 0.353515625, "learning_rate": 1.911064942499204e-05, "loss": 0.9646, "step": 844 }, { "epoch": 0.6661411115490737, "grad_norm": 0.35546875, "learning_rate": 1.9106861884078704e-05, "loss": 0.9557, "step": 845 }, { "epoch": 0.6669294442254632, "grad_norm": 0.3515625, "learning_rate": 1.9103066672117957e-05, "loss": 0.925, "step": 846 }, { "epoch": 0.6677177769018526, "grad_norm": 0.3515625, "learning_rate": 1.9099263792306654e-05, "loss": 1.0093, "step": 847 }, { "epoch": 0.668506109578242, "grad_norm": 0.341796875, "learning_rate": 1.90954532478481e-05, "loss": 0.975, "step": 848 }, { "epoch": 0.6692944422546314, "grad_norm": 0.33984375, "learning_rate": 1.9091635041952052e-05, "loss": 0.9198, "step": 849 }, { "epoch": 0.6700827749310209, "grad_norm": 0.33203125, "learning_rate": 1.908780917783473e-05, "loss": 0.9366, "step": 850 }, { "epoch": 0.6708711076074103, "grad_norm": 0.34375, "learning_rate": 1.9083975658718808e-05, "loss": 0.9488, "step": 851 }, { "epoch": 0.6716594402837998, "grad_norm": 0.345703125, "learning_rate": 1.9080134487833393e-05, "loss": 0.9819, "step": 852 }, { "epoch": 0.6724477729601892, "grad_norm": 0.34765625, "learning_rate": 1.9076285668414046e-05, "loss": 0.9749, "step": 853 }, { "epoch": 0.6732361056365787, "grad_norm": 0.361328125, "learning_rate": 1.9072429203702774e-05, "loss": 0.9691, "step": 854 }, { "epoch": 0.6740244383129681, "grad_norm": 0.341796875, "learning_rate": 1.9068565096948017e-05, "loss": 0.9597, "step": 855 }, { "epoch": 0.6748127709893575, "grad_norm": 0.361328125, "learning_rate": 1.9064693351404657e-05, "loss": 1.0036, "step": 856 }, { "epoch": 0.675601103665747, "grad_norm": 0.349609375, "learning_rate": 1.906081397033401e-05, "loss": 0.96, "step": 857 }, { "epoch": 0.6763894363421363, "grad_norm": 0.357421875, "learning_rate": 1.905692695700382e-05, "loss": 0.9632, "step": 858 }, { "epoch": 0.6771777690185258, "grad_norm": 0.376953125, "learning_rate": 1.9053032314688264e-05, "loss": 0.9322, "step": 859 }, { "epoch": 0.6779661016949152, "grad_norm": 0.3515625, "learning_rate": 1.9049130046667943e-05, "loss": 0.9835, "step": 860 }, { "epoch": 0.6787544343713047, "grad_norm": 0.359375, "learning_rate": 1.904522015622988e-05, "loss": 0.9282, "step": 861 }, { "epoch": 0.6795427670476941, "grad_norm": 0.376953125, "learning_rate": 1.9041302646667526e-05, "loss": 0.9761, "step": 862 }, { "epoch": 0.6803310997240836, "grad_norm": 0.357421875, "learning_rate": 1.903737752128074e-05, "loss": 0.951, "step": 863 }, { "epoch": 0.681119432400473, "grad_norm": 0.33984375, "learning_rate": 1.9033444783375806e-05, "loss": 0.9613, "step": 864 }, { "epoch": 0.6819077650768625, "grad_norm": 0.357421875, "learning_rate": 1.9029504436265406e-05, "loss": 0.9756, "step": 865 }, { "epoch": 0.6826960977532519, "grad_norm": 0.35546875, "learning_rate": 1.9025556483268647e-05, "loss": 0.9879, "step": 866 }, { "epoch": 0.6834844304296414, "grad_norm": 0.365234375, "learning_rate": 1.9021600927711037e-05, "loss": 1.0026, "step": 867 }, { "epoch": 0.6842727631060307, "grad_norm": 0.33984375, "learning_rate": 1.9017637772924483e-05, "loss": 0.9354, "step": 868 }, { "epoch": 0.6850610957824201, "grad_norm": 0.3515625, "learning_rate": 1.9013667022247297e-05, "loss": 0.9993, "step": 869 }, { "epoch": 0.6858494284588096, "grad_norm": 0.369140625, "learning_rate": 1.900968867902419e-05, "loss": 0.9881, "step": 870 }, { "epoch": 0.686637761135199, "grad_norm": 0.3515625, "learning_rate": 1.9005702746606274e-05, "loss": 0.9607, "step": 871 }, { "epoch": 0.6874260938115885, "grad_norm": 0.345703125, "learning_rate": 1.9001709228351042e-05, "loss": 0.9411, "step": 872 }, { "epoch": 0.6882144264879779, "grad_norm": 0.353515625, "learning_rate": 1.8997708127622384e-05, "loss": 0.9774, "step": 873 }, { "epoch": 0.6890027591643674, "grad_norm": 0.359375, "learning_rate": 1.8993699447790576e-05, "loss": 0.952, "step": 874 }, { "epoch": 0.6897910918407568, "grad_norm": 0.349609375, "learning_rate": 1.8989683192232276e-05, "loss": 0.9782, "step": 875 }, { "epoch": 0.6905794245171463, "grad_norm": 0.376953125, "learning_rate": 1.8985659364330522e-05, "loss": 0.953, "step": 876 }, { "epoch": 0.6913677571935357, "grad_norm": 0.341796875, "learning_rate": 1.898162796747474e-05, "loss": 0.9401, "step": 877 }, { "epoch": 0.6921560898699252, "grad_norm": 0.3515625, "learning_rate": 1.8977589005060723e-05, "loss": 0.9714, "step": 878 }, { "epoch": 0.6929444225463145, "grad_norm": 0.34375, "learning_rate": 1.8973542480490636e-05, "loss": 0.9489, "step": 879 }, { "epoch": 0.693732755222704, "grad_norm": 0.365234375, "learning_rate": 1.8969488397173023e-05, "loss": 0.9389, "step": 880 }, { "epoch": 0.6945210878990934, "grad_norm": 0.341796875, "learning_rate": 1.896542675852278e-05, "loss": 0.956, "step": 881 }, { "epoch": 0.6953094205754828, "grad_norm": 0.3671875, "learning_rate": 1.8961357567961182e-05, "loss": 0.9331, "step": 882 }, { "epoch": 0.6960977532518723, "grad_norm": 0.34375, "learning_rate": 1.8957280828915855e-05, "loss": 0.9392, "step": 883 }, { "epoch": 0.6968860859282617, "grad_norm": 0.3515625, "learning_rate": 1.895319654482079e-05, "loss": 0.9661, "step": 884 }, { "epoch": 0.6976744186046512, "grad_norm": 0.34765625, "learning_rate": 1.8949104719116334e-05, "loss": 0.9599, "step": 885 }, { "epoch": 0.6984627512810406, "grad_norm": 0.35546875, "learning_rate": 1.894500535524918e-05, "loss": 0.9792, "step": 886 }, { "epoch": 0.6992510839574301, "grad_norm": 0.36328125, "learning_rate": 1.894089845667237e-05, "loss": 0.9706, "step": 887 }, { "epoch": 0.7000394166338195, "grad_norm": 0.3515625, "learning_rate": 1.8936784026845304e-05, "loss": 0.9675, "step": 888 }, { "epoch": 0.7008277493102089, "grad_norm": 0.359375, "learning_rate": 1.8932662069233717e-05, "loss": 0.9454, "step": 889 }, { "epoch": 0.7016160819865983, "grad_norm": 0.373046875, "learning_rate": 1.8928532587309682e-05, "loss": 0.9693, "step": 890 }, { "epoch": 0.7024044146629878, "grad_norm": 0.349609375, "learning_rate": 1.8924395584551624e-05, "loss": 0.9799, "step": 891 }, { "epoch": 0.7031927473393772, "grad_norm": 0.357421875, "learning_rate": 1.8920251064444284e-05, "loss": 0.9549, "step": 892 }, { "epoch": 0.7039810800157666, "grad_norm": 0.33984375, "learning_rate": 1.8916099030478747e-05, "loss": 0.9507, "step": 893 }, { "epoch": 0.7047694126921561, "grad_norm": 0.3515625, "learning_rate": 1.8911939486152433e-05, "loss": 0.9399, "step": 894 }, { "epoch": 0.7055577453685455, "grad_norm": 0.375, "learning_rate": 1.8907772434969073e-05, "loss": 1.0025, "step": 895 }, { "epoch": 0.706346078044935, "grad_norm": 0.34375, "learning_rate": 1.890359788043873e-05, "loss": 0.9551, "step": 896 }, { "epoch": 0.7071344107213244, "grad_norm": 0.353515625, "learning_rate": 1.8899415826077784e-05, "loss": 1.0056, "step": 897 }, { "epoch": 0.7079227433977139, "grad_norm": 0.3515625, "learning_rate": 1.889522627540894e-05, "loss": 0.9936, "step": 898 }, { "epoch": 0.7087110760741032, "grad_norm": 0.36328125, "learning_rate": 1.8891029231961208e-05, "loss": 0.9423, "step": 899 }, { "epoch": 0.7094994087504927, "grad_norm": 0.349609375, "learning_rate": 1.8886824699269916e-05, "loss": 0.9715, "step": 900 }, { "epoch": 0.7102877414268821, "grad_norm": 0.34375, "learning_rate": 1.888261268087669e-05, "loss": 0.9558, "step": 901 }, { "epoch": 0.7110760741032716, "grad_norm": 0.349609375, "learning_rate": 1.8878393180329482e-05, "loss": 0.9705, "step": 902 }, { "epoch": 0.711864406779661, "grad_norm": 0.34375, "learning_rate": 1.8874166201182526e-05, "loss": 0.954, "step": 903 }, { "epoch": 0.7126527394560505, "grad_norm": 0.34765625, "learning_rate": 1.886993174699636e-05, "loss": 0.9622, "step": 904 }, { "epoch": 0.7134410721324399, "grad_norm": 0.359375, "learning_rate": 1.8865689821337828e-05, "loss": 0.9623, "step": 905 }, { "epoch": 0.7142294048088293, "grad_norm": 0.3515625, "learning_rate": 1.886144042778006e-05, "loss": 0.9711, "step": 906 }, { "epoch": 0.7150177374852188, "grad_norm": 0.361328125, "learning_rate": 1.8857183569902476e-05, "loss": 0.9696, "step": 907 }, { "epoch": 0.7158060701616082, "grad_norm": 0.34765625, "learning_rate": 1.8852919251290785e-05, "loss": 0.9833, "step": 908 }, { "epoch": 0.7165944028379977, "grad_norm": 0.3515625, "learning_rate": 1.884864747553698e-05, "loss": 0.9689, "step": 909 }, { "epoch": 0.717382735514387, "grad_norm": 0.35546875, "learning_rate": 1.8844368246239343e-05, "loss": 0.99, "step": 910 }, { "epoch": 0.7181710681907765, "grad_norm": 0.36328125, "learning_rate": 1.884008156700242e-05, "loss": 0.9462, "step": 911 }, { "epoch": 0.7189594008671659, "grad_norm": 0.37890625, "learning_rate": 1.8835787441437043e-05, "loss": 0.9691, "step": 912 }, { "epoch": 0.7197477335435554, "grad_norm": 0.359375, "learning_rate": 1.8831485873160312e-05, "loss": 0.9987, "step": 913 }, { "epoch": 0.7205360662199448, "grad_norm": 0.34765625, "learning_rate": 1.8827176865795597e-05, "loss": 0.9851, "step": 914 }, { "epoch": 0.7213243988963343, "grad_norm": 0.35546875, "learning_rate": 1.882286042297254e-05, "loss": 0.9771, "step": 915 }, { "epoch": 0.7221127315727237, "grad_norm": 0.353515625, "learning_rate": 1.881853654832703e-05, "loss": 0.943, "step": 916 }, { "epoch": 0.7229010642491132, "grad_norm": 0.35546875, "learning_rate": 1.8814205245501235e-05, "loss": 0.9545, "step": 917 }, { "epoch": 0.7236893969255026, "grad_norm": 0.375, "learning_rate": 1.880986651814357e-05, "loss": 0.9595, "step": 918 }, { "epoch": 0.724477729601892, "grad_norm": 0.349609375, "learning_rate": 1.8805520369908707e-05, "loss": 0.9574, "step": 919 }, { "epoch": 0.7252660622782814, "grad_norm": 0.35546875, "learning_rate": 1.880116680445757e-05, "loss": 0.9905, "step": 920 }, { "epoch": 0.7260543949546708, "grad_norm": 0.361328125, "learning_rate": 1.8796805825457324e-05, "loss": 0.9625, "step": 921 }, { "epoch": 0.7268427276310603, "grad_norm": 0.35546875, "learning_rate": 1.8792437436581382e-05, "loss": 0.9981, "step": 922 }, { "epoch": 0.7276310603074497, "grad_norm": 0.36328125, "learning_rate": 1.8788061641509402e-05, "loss": 0.9707, "step": 923 }, { "epoch": 0.7284193929838392, "grad_norm": 0.349609375, "learning_rate": 1.8783678443927282e-05, "loss": 0.9528, "step": 924 }, { "epoch": 0.7292077256602286, "grad_norm": 0.35546875, "learning_rate": 1.8779287847527146e-05, "loss": 0.9566, "step": 925 }, { "epoch": 0.7299960583366181, "grad_norm": 0.341796875, "learning_rate": 1.8774889856007365e-05, "loss": 0.9506, "step": 926 }, { "epoch": 0.7307843910130075, "grad_norm": 0.3671875, "learning_rate": 1.8770484473072518e-05, "loss": 0.9764, "step": 927 }, { "epoch": 0.731572723689397, "grad_norm": 0.353515625, "learning_rate": 1.876607170243343e-05, "loss": 0.9335, "step": 928 }, { "epoch": 0.7323610563657864, "grad_norm": 0.3515625, "learning_rate": 1.8761651547807144e-05, "loss": 0.952, "step": 929 }, { "epoch": 0.7331493890421757, "grad_norm": 0.36328125, "learning_rate": 1.8757224012916913e-05, "loss": 0.9734, "step": 930 }, { "epoch": 0.7339377217185652, "grad_norm": 0.404296875, "learning_rate": 1.8752789101492217e-05, "loss": 0.984, "step": 931 }, { "epoch": 0.7347260543949546, "grad_norm": 0.353515625, "learning_rate": 1.874834681726875e-05, "loss": 1.0054, "step": 932 }, { "epoch": 0.7355143870713441, "grad_norm": 0.357421875, "learning_rate": 1.87438971639884e-05, "loss": 0.952, "step": 933 }, { "epoch": 0.7363027197477335, "grad_norm": 0.35546875, "learning_rate": 1.8739440145399295e-05, "loss": 0.9549, "step": 934 }, { "epoch": 0.737091052424123, "grad_norm": 0.341796875, "learning_rate": 1.8734975765255732e-05, "loss": 0.945, "step": 935 }, { "epoch": 0.7378793851005124, "grad_norm": 0.36328125, "learning_rate": 1.8730504027318223e-05, "loss": 0.9707, "step": 936 }, { "epoch": 0.7386677177769019, "grad_norm": 0.35546875, "learning_rate": 1.872602493535349e-05, "loss": 0.976, "step": 937 }, { "epoch": 0.7394560504532913, "grad_norm": 0.34375, "learning_rate": 1.8721538493134428e-05, "loss": 0.9413, "step": 938 }, { "epoch": 0.7402443831296808, "grad_norm": 0.365234375, "learning_rate": 1.871704470444014e-05, "loss": 0.9774, "step": 939 }, { "epoch": 0.7410327158060701, "grad_norm": 0.345703125, "learning_rate": 1.8712543573055907e-05, "loss": 0.9769, "step": 940 }, { "epoch": 0.7418210484824596, "grad_norm": 0.36328125, "learning_rate": 1.8708035102773198e-05, "loss": 0.9704, "step": 941 }, { "epoch": 0.742609381158849, "grad_norm": 0.37109375, "learning_rate": 1.870351929738967e-05, "loss": 0.9873, "step": 942 }, { "epoch": 0.7433977138352384, "grad_norm": 0.396484375, "learning_rate": 1.8698996160709147e-05, "loss": 0.9515, "step": 943 }, { "epoch": 0.7441860465116279, "grad_norm": 0.359375, "learning_rate": 1.8694465696541643e-05, "loss": 0.9891, "step": 944 }, { "epoch": 0.7449743791880173, "grad_norm": 0.3515625, "learning_rate": 1.8689927908703325e-05, "loss": 0.9847, "step": 945 }, { "epoch": 0.7457627118644068, "grad_norm": 0.37890625, "learning_rate": 1.868538280101655e-05, "loss": 0.9786, "step": 946 }, { "epoch": 0.7465510445407962, "grad_norm": 0.373046875, "learning_rate": 1.868083037730982e-05, "loss": 0.9621, "step": 947 }, { "epoch": 0.7473393772171857, "grad_norm": 0.376953125, "learning_rate": 1.8676270641417824e-05, "loss": 0.9462, "step": 948 }, { "epoch": 0.7481277098935751, "grad_norm": 0.3671875, "learning_rate": 1.8671703597181387e-05, "loss": 1.0023, "step": 949 }, { "epoch": 0.7489160425699646, "grad_norm": 0.35546875, "learning_rate": 1.8667129248447498e-05, "loss": 0.972, "step": 950 }, { "epoch": 0.7497043752463539, "grad_norm": 0.357421875, "learning_rate": 1.866254759906931e-05, "loss": 0.9461, "step": 951 }, { "epoch": 0.7504927079227434, "grad_norm": 0.3984375, "learning_rate": 1.8657958652906106e-05, "loss": 0.981, "step": 952 }, { "epoch": 0.7512810405991328, "grad_norm": 0.3515625, "learning_rate": 1.8653362413823333e-05, "loss": 0.9613, "step": 953 }, { "epoch": 0.7520693732755223, "grad_norm": 0.361328125, "learning_rate": 1.864875888569257e-05, "loss": 0.9859, "step": 954 }, { "epoch": 0.7528577059519117, "grad_norm": 0.35546875, "learning_rate": 1.864414807239154e-05, "loss": 0.9673, "step": 955 }, { "epoch": 0.7536460386283012, "grad_norm": 0.345703125, "learning_rate": 1.86395299778041e-05, "loss": 0.9504, "step": 956 }, { "epoch": 0.7544343713046906, "grad_norm": 0.36328125, "learning_rate": 1.863490460582025e-05, "loss": 0.9414, "step": 957 }, { "epoch": 0.75522270398108, "grad_norm": 0.361328125, "learning_rate": 1.86302719603361e-05, "loss": 0.9881, "step": 958 }, { "epoch": 0.7560110366574695, "grad_norm": 0.349609375, "learning_rate": 1.862563204525391e-05, "loss": 0.9067, "step": 959 }, { "epoch": 0.7567993693338589, "grad_norm": 0.359375, "learning_rate": 1.8620984864482046e-05, "loss": 0.9702, "step": 960 }, { "epoch": 0.7575877020102483, "grad_norm": 0.37109375, "learning_rate": 1.8616330421935004e-05, "loss": 0.9706, "step": 961 }, { "epoch": 0.7583760346866377, "grad_norm": 0.357421875, "learning_rate": 1.861166872153339e-05, "loss": 0.945, "step": 962 }, { "epoch": 0.7591643673630272, "grad_norm": 0.353515625, "learning_rate": 1.860699976720393e-05, "loss": 0.9521, "step": 963 }, { "epoch": 0.7599527000394166, "grad_norm": 0.36328125, "learning_rate": 1.8602323562879464e-05, "loss": 0.972, "step": 964 }, { "epoch": 0.7607410327158061, "grad_norm": 0.349609375, "learning_rate": 1.8597640112498917e-05, "loss": 0.9654, "step": 965 }, { "epoch": 0.7615293653921955, "grad_norm": 0.353515625, "learning_rate": 1.859294942000734e-05, "loss": 0.9605, "step": 966 }, { "epoch": 0.762317698068585, "grad_norm": 0.35546875, "learning_rate": 1.8588251489355883e-05, "loss": 1.0063, "step": 967 }, { "epoch": 0.7631060307449744, "grad_norm": 0.34765625, "learning_rate": 1.8583546324501783e-05, "loss": 0.9823, "step": 968 }, { "epoch": 0.7638943634213639, "grad_norm": 0.359375, "learning_rate": 1.857883392940837e-05, "loss": 0.9623, "step": 969 }, { "epoch": 0.7646826960977533, "grad_norm": 0.37890625, "learning_rate": 1.8574114308045077e-05, "loss": 0.9662, "step": 970 }, { "epoch": 0.7654710287741426, "grad_norm": 0.3671875, "learning_rate": 1.8569387464387412e-05, "loss": 1.0004, "step": 971 }, { "epoch": 0.7662593614505321, "grad_norm": 0.42578125, "learning_rate": 1.856465340241697e-05, "loss": 0.9294, "step": 972 }, { "epoch": 0.7670476941269215, "grad_norm": 0.3515625, "learning_rate": 1.8559912126121428e-05, "loss": 0.9312, "step": 973 }, { "epoch": 0.767836026803311, "grad_norm": 0.353515625, "learning_rate": 1.8555163639494537e-05, "loss": 0.9706, "step": 974 }, { "epoch": 0.7686243594797004, "grad_norm": 0.35546875, "learning_rate": 1.8550407946536127e-05, "loss": 0.9255, "step": 975 }, { "epoch": 0.7694126921560899, "grad_norm": 0.375, "learning_rate": 1.8545645051252094e-05, "loss": 0.9801, "step": 976 }, { "epoch": 0.7702010248324793, "grad_norm": 0.34765625, "learning_rate": 1.85408749576544e-05, "loss": 0.9721, "step": 977 }, { "epoch": 0.7709893575088688, "grad_norm": 0.341796875, "learning_rate": 1.8536097669761066e-05, "loss": 0.9833, "step": 978 }, { "epoch": 0.7717776901852582, "grad_norm": 0.345703125, "learning_rate": 1.853131319159619e-05, "loss": 0.9517, "step": 979 }, { "epoch": 0.7725660228616477, "grad_norm": 0.341796875, "learning_rate": 1.8526521527189905e-05, "loss": 1.0004, "step": 980 }, { "epoch": 0.773354355538037, "grad_norm": 0.36328125, "learning_rate": 1.8521722680578413e-05, "loss": 0.9715, "step": 981 }, { "epoch": 0.7741426882144264, "grad_norm": 0.353515625, "learning_rate": 1.8516916655803963e-05, "loss": 0.9934, "step": 982 }, { "epoch": 0.7749310208908159, "grad_norm": 0.419921875, "learning_rate": 1.851210345691484e-05, "loss": 0.9967, "step": 983 }, { "epoch": 0.7757193535672053, "grad_norm": 0.34375, "learning_rate": 1.850728308796539e-05, "loss": 0.9814, "step": 984 }, { "epoch": 0.7765076862435948, "grad_norm": 0.365234375, "learning_rate": 1.8502455553015978e-05, "loss": 0.971, "step": 985 }, { "epoch": 0.7772960189199842, "grad_norm": 0.345703125, "learning_rate": 1.849762085613302e-05, "loss": 0.9499, "step": 986 }, { "epoch": 0.7780843515963737, "grad_norm": 0.357421875, "learning_rate": 1.8492779001388964e-05, "loss": 0.9851, "step": 987 }, { "epoch": 0.7788726842727631, "grad_norm": 0.359375, "learning_rate": 1.8487929992862282e-05, "loss": 0.9815, "step": 988 }, { "epoch": 0.7796610169491526, "grad_norm": 0.365234375, "learning_rate": 1.848307383463747e-05, "loss": 0.9583, "step": 989 }, { "epoch": 0.780449349625542, "grad_norm": 0.365234375, "learning_rate": 1.847821053080505e-05, "loss": 0.9918, "step": 990 }, { "epoch": 0.7812376823019315, "grad_norm": 0.3515625, "learning_rate": 1.847334008546157e-05, "loss": 0.9442, "step": 991 }, { "epoch": 0.7820260149783208, "grad_norm": 0.34765625, "learning_rate": 1.846846250270958e-05, "loss": 0.9727, "step": 992 }, { "epoch": 0.7828143476547103, "grad_norm": 0.33984375, "learning_rate": 1.8463577786657653e-05, "loss": 0.9793, "step": 993 }, { "epoch": 0.7836026803310997, "grad_norm": 0.34765625, "learning_rate": 1.845868594142036e-05, "loss": 0.9679, "step": 994 }, { "epoch": 0.7843910130074891, "grad_norm": 0.341796875, "learning_rate": 1.845378697111829e-05, "loss": 0.9702, "step": 995 }, { "epoch": 0.7851793456838786, "grad_norm": 0.353515625, "learning_rate": 1.8448880879878026e-05, "loss": 0.9397, "step": 996 }, { "epoch": 0.785967678360268, "grad_norm": 0.353515625, "learning_rate": 1.844396767183215e-05, "loss": 0.9384, "step": 997 }, { "epoch": 0.7867560110366575, "grad_norm": 0.349609375, "learning_rate": 1.8439047351119242e-05, "loss": 0.9733, "step": 998 }, { "epoch": 0.7875443437130469, "grad_norm": 0.345703125, "learning_rate": 1.8434119921883865e-05, "loss": 0.9936, "step": 999 }, { "epoch": 0.7883326763894364, "grad_norm": 0.353515625, "learning_rate": 1.8429185388276578e-05, "loss": 0.9409, "step": 1000 }, { "epoch": 0.7891210090658258, "grad_norm": 0.349609375, "learning_rate": 1.842424375445392e-05, "loss": 0.9992, "step": 1001 }, { "epoch": 0.7899093417422152, "grad_norm": 0.359375, "learning_rate": 1.8419295024578417e-05, "loss": 0.9454, "step": 1002 }, { "epoch": 0.7906976744186046, "grad_norm": 0.3515625, "learning_rate": 1.8414339202818564e-05, "loss": 0.9761, "step": 1003 }, { "epoch": 0.7914860070949941, "grad_norm": 0.349609375, "learning_rate": 1.8409376293348836e-05, "loss": 0.9382, "step": 1004 }, { "epoch": 0.7922743397713835, "grad_norm": 0.42578125, "learning_rate": 1.8404406300349673e-05, "loss": 0.9692, "step": 1005 }, { "epoch": 0.793062672447773, "grad_norm": 0.357421875, "learning_rate": 1.8399429228007484e-05, "loss": 0.9356, "step": 1006 }, { "epoch": 0.7938510051241624, "grad_norm": 0.345703125, "learning_rate": 1.8394445080514643e-05, "loss": 0.9563, "step": 1007 }, { "epoch": 0.7946393378005518, "grad_norm": 0.36328125, "learning_rate": 1.838945386206948e-05, "loss": 0.9638, "step": 1008 }, { "epoch": 0.7954276704769413, "grad_norm": 0.345703125, "learning_rate": 1.8384455576876292e-05, "loss": 0.9229, "step": 1009 }, { "epoch": 0.7962160031533307, "grad_norm": 0.369140625, "learning_rate": 1.8379450229145308e-05, "loss": 0.9521, "step": 1010 }, { "epoch": 0.7970043358297202, "grad_norm": 0.34375, "learning_rate": 1.8374437823092726e-05, "loss": 0.9771, "step": 1011 }, { "epoch": 0.7977926685061095, "grad_norm": 0.34375, "learning_rate": 1.8369418362940675e-05, "loss": 0.9573, "step": 1012 }, { "epoch": 0.798581001182499, "grad_norm": 0.345703125, "learning_rate": 1.836439185291724e-05, "loss": 0.9504, "step": 1013 }, { "epoch": 0.7993693338588884, "grad_norm": 0.357421875, "learning_rate": 1.835935829725643e-05, "loss": 0.9802, "step": 1014 }, { "epoch": 0.8001576665352779, "grad_norm": 0.361328125, "learning_rate": 1.8354317700198198e-05, "loss": 0.9368, "step": 1015 }, { "epoch": 0.8009459992116673, "grad_norm": 0.34375, "learning_rate": 1.834927006598843e-05, "loss": 0.9165, "step": 1016 }, { "epoch": 0.8017343318880568, "grad_norm": 0.353515625, "learning_rate": 1.8344215398878925e-05, "loss": 1.0035, "step": 1017 }, { "epoch": 0.8025226645644462, "grad_norm": 0.35546875, "learning_rate": 1.833915370312743e-05, "loss": 0.9211, "step": 1018 }, { "epoch": 0.8033109972408357, "grad_norm": 0.369140625, "learning_rate": 1.833408498299759e-05, "loss": 0.951, "step": 1019 }, { "epoch": 0.8040993299172251, "grad_norm": 0.345703125, "learning_rate": 1.8329009242758977e-05, "loss": 0.9629, "step": 1020 }, { "epoch": 0.8048876625936146, "grad_norm": 0.361328125, "learning_rate": 1.8323926486687076e-05, "loss": 0.9393, "step": 1021 }, { "epoch": 0.8056759952700039, "grad_norm": 0.3515625, "learning_rate": 1.831883671906328e-05, "loss": 0.948, "step": 1022 }, { "epoch": 0.8064643279463933, "grad_norm": 0.36328125, "learning_rate": 1.8313739944174894e-05, "loss": 0.9971, "step": 1023 }, { "epoch": 0.8072526606227828, "grad_norm": 0.36328125, "learning_rate": 1.8308636166315114e-05, "loss": 0.9632, "step": 1024 }, { "epoch": 0.8080409932991722, "grad_norm": 0.33203125, "learning_rate": 1.8303525389783045e-05, "loss": 0.9657, "step": 1025 }, { "epoch": 0.8088293259755617, "grad_norm": 0.345703125, "learning_rate": 1.829840761888368e-05, "loss": 0.9594, "step": 1026 }, { "epoch": 0.8096176586519511, "grad_norm": 0.369140625, "learning_rate": 1.8293282857927913e-05, "loss": 0.9621, "step": 1027 }, { "epoch": 0.8104059913283406, "grad_norm": 0.3515625, "learning_rate": 1.8288151111232514e-05, "loss": 0.9453, "step": 1028 }, { "epoch": 0.81119432400473, "grad_norm": 0.345703125, "learning_rate": 1.8283012383120148e-05, "loss": 0.9371, "step": 1029 }, { "epoch": 0.8119826566811195, "grad_norm": 0.353515625, "learning_rate": 1.8277866677919354e-05, "loss": 0.9718, "step": 1030 }, { "epoch": 0.8127709893575089, "grad_norm": 0.388671875, "learning_rate": 1.8272713999964547e-05, "loss": 0.9701, "step": 1031 }, { "epoch": 0.8135593220338984, "grad_norm": 0.375, "learning_rate": 1.8267554353596027e-05, "loss": 0.973, "step": 1032 }, { "epoch": 0.8143476547102877, "grad_norm": 0.36328125, "learning_rate": 1.826238774315995e-05, "loss": 0.9853, "step": 1033 }, { "epoch": 0.8151359873866771, "grad_norm": 0.341796875, "learning_rate": 1.8257214173008347e-05, "loss": 0.9422, "step": 1034 }, { "epoch": 0.8159243200630666, "grad_norm": 0.353515625, "learning_rate": 1.82520336474991e-05, "loss": 0.9482, "step": 1035 }, { "epoch": 0.816712652739456, "grad_norm": 0.341796875, "learning_rate": 1.8246846170995964e-05, "loss": 0.9542, "step": 1036 }, { "epoch": 0.8175009854158455, "grad_norm": 0.34765625, "learning_rate": 1.824165174786854e-05, "loss": 0.9702, "step": 1037 }, { "epoch": 0.8182893180922349, "grad_norm": 0.357421875, "learning_rate": 1.8236450382492293e-05, "loss": 0.9811, "step": 1038 }, { "epoch": 0.8190776507686244, "grad_norm": 0.353515625, "learning_rate": 1.8231242079248512e-05, "loss": 0.9835, "step": 1039 }, { "epoch": 0.8198659834450138, "grad_norm": 0.34765625, "learning_rate": 1.8226026842524353e-05, "loss": 0.937, "step": 1040 }, { "epoch": 0.8206543161214033, "grad_norm": 0.369140625, "learning_rate": 1.8220804676712797e-05, "loss": 0.9406, "step": 1041 }, { "epoch": 0.8214426487977927, "grad_norm": 0.359375, "learning_rate": 1.8215575586212672e-05, "loss": 0.9717, "step": 1042 }, { "epoch": 0.822230981474182, "grad_norm": 0.35546875, "learning_rate": 1.8210339575428632e-05, "loss": 0.9613, "step": 1043 }, { "epoch": 0.8230193141505715, "grad_norm": 0.353515625, "learning_rate": 1.8205096648771166e-05, "loss": 0.949, "step": 1044 }, { "epoch": 0.823807646826961, "grad_norm": 0.337890625, "learning_rate": 1.8199846810656586e-05, "loss": 0.8965, "step": 1045 }, { "epoch": 0.8245959795033504, "grad_norm": 0.3515625, "learning_rate": 1.819459006550702e-05, "loss": 1.0044, "step": 1046 }, { "epoch": 0.8253843121797398, "grad_norm": 0.37109375, "learning_rate": 1.818932641775043e-05, "loss": 0.9803, "step": 1047 }, { "epoch": 0.8261726448561293, "grad_norm": 0.337890625, "learning_rate": 1.8184055871820568e-05, "loss": 0.9424, "step": 1048 }, { "epoch": 0.8269609775325187, "grad_norm": 0.365234375, "learning_rate": 1.8178778432157015e-05, "loss": 0.9526, "step": 1049 }, { "epoch": 0.8277493102089082, "grad_norm": 0.361328125, "learning_rate": 1.817349410320516e-05, "loss": 0.9953, "step": 1050 }, { "epoch": 0.8285376428852976, "grad_norm": 0.359375, "learning_rate": 1.8168202889416184e-05, "loss": 0.9729, "step": 1051 }, { "epoch": 0.8293259755616871, "grad_norm": 0.34765625, "learning_rate": 1.8162904795247077e-05, "loss": 1.0034, "step": 1052 }, { "epoch": 0.8301143082380764, "grad_norm": 0.353515625, "learning_rate": 1.815759982516061e-05, "loss": 0.9722, "step": 1053 }, { "epoch": 0.8309026409144659, "grad_norm": 0.365234375, "learning_rate": 1.815228798362537e-05, "loss": 0.9133, "step": 1054 }, { "epoch": 0.8316909735908553, "grad_norm": 0.34765625, "learning_rate": 1.8146969275115704e-05, "loss": 0.9413, "step": 1055 }, { "epoch": 0.8324793062672448, "grad_norm": 0.36328125, "learning_rate": 1.814164370411177e-05, "loss": 0.9751, "step": 1056 }, { "epoch": 0.8332676389436342, "grad_norm": 0.359375, "learning_rate": 1.8136311275099484e-05, "loss": 0.972, "step": 1057 }, { "epoch": 0.8340559716200237, "grad_norm": 0.3671875, "learning_rate": 1.8130971992570555e-05, "loss": 0.9812, "step": 1058 }, { "epoch": 0.8348443042964131, "grad_norm": 0.357421875, "learning_rate": 1.8125625861022455e-05, "loss": 0.9668, "step": 1059 }, { "epoch": 0.8356326369728025, "grad_norm": 0.357421875, "learning_rate": 1.812027288495843e-05, "loss": 0.9325, "step": 1060 }, { "epoch": 0.836420969649192, "grad_norm": 0.345703125, "learning_rate": 1.8114913068887493e-05, "loss": 0.9834, "step": 1061 }, { "epoch": 0.8372093023255814, "grad_norm": 0.3515625, "learning_rate": 1.810954641732441e-05, "loss": 0.9279, "step": 1062 }, { "epoch": 0.8379976350019709, "grad_norm": 0.35546875, "learning_rate": 1.8104172934789716e-05, "loss": 0.9512, "step": 1063 }, { "epoch": 0.8387859676783602, "grad_norm": 0.373046875, "learning_rate": 1.809879262580969e-05, "loss": 0.9479, "step": 1064 }, { "epoch": 0.8395743003547497, "grad_norm": 0.3515625, "learning_rate": 1.8093405494916373e-05, "loss": 0.9466, "step": 1065 }, { "epoch": 0.8403626330311391, "grad_norm": 0.361328125, "learning_rate": 1.8088011546647536e-05, "loss": 0.9786, "step": 1066 }, { "epoch": 0.8411509657075286, "grad_norm": 0.365234375, "learning_rate": 1.808261078554671e-05, "loss": 0.9594, "step": 1067 }, { "epoch": 0.841939298383918, "grad_norm": 0.390625, "learning_rate": 1.8077203216163145e-05, "loss": 0.9516, "step": 1068 }, { "epoch": 0.8427276310603075, "grad_norm": 0.353515625, "learning_rate": 1.807178884305185e-05, "loss": 0.9695, "step": 1069 }, { "epoch": 0.8435159637366969, "grad_norm": 0.349609375, "learning_rate": 1.8066367670773543e-05, "loss": 0.9557, "step": 1070 }, { "epoch": 0.8443042964130864, "grad_norm": 0.345703125, "learning_rate": 1.8060939703894684e-05, "loss": 0.9346, "step": 1071 }, { "epoch": 0.8450926290894758, "grad_norm": 0.349609375, "learning_rate": 1.805550494698745e-05, "loss": 0.999, "step": 1072 }, { "epoch": 0.8458809617658652, "grad_norm": 0.33984375, "learning_rate": 1.8050063404629733e-05, "loss": 0.9702, "step": 1073 }, { "epoch": 0.8466692944422546, "grad_norm": 0.37109375, "learning_rate": 1.8044615081405153e-05, "loss": 0.9804, "step": 1074 }, { "epoch": 0.847457627118644, "grad_norm": 0.353515625, "learning_rate": 1.8039159981903028e-05, "loss": 0.9627, "step": 1075 }, { "epoch": 0.8482459597950335, "grad_norm": 0.361328125, "learning_rate": 1.8033698110718395e-05, "loss": 0.9761, "step": 1076 }, { "epoch": 0.8490342924714229, "grad_norm": 0.37890625, "learning_rate": 1.8028229472451994e-05, "loss": 0.9749, "step": 1077 }, { "epoch": 0.8498226251478124, "grad_norm": 0.35546875, "learning_rate": 1.8022754071710254e-05, "loss": 0.9661, "step": 1078 }, { "epoch": 0.8506109578242018, "grad_norm": 0.349609375, "learning_rate": 1.801727191310531e-05, "loss": 0.9561, "step": 1079 }, { "epoch": 0.8513992905005913, "grad_norm": 0.376953125, "learning_rate": 1.801178300125499e-05, "loss": 0.9459, "step": 1080 }, { "epoch": 0.8521876231769807, "grad_norm": 0.3828125, "learning_rate": 1.8006287340782807e-05, "loss": 0.9556, "step": 1081 }, { "epoch": 0.8529759558533702, "grad_norm": 0.359375, "learning_rate": 1.800078493631796e-05, "loss": 1.0088, "step": 1082 }, { "epoch": 0.8537642885297596, "grad_norm": 0.35546875, "learning_rate": 1.7995275792495327e-05, "loss": 0.9928, "step": 1083 }, { "epoch": 0.8545526212061489, "grad_norm": 0.375, "learning_rate": 1.7989759913955465e-05, "loss": 0.9545, "step": 1084 }, { "epoch": 0.8553409538825384, "grad_norm": 0.376953125, "learning_rate": 1.7984237305344604e-05, "loss": 0.9564, "step": 1085 }, { "epoch": 0.8561292865589278, "grad_norm": 0.345703125, "learning_rate": 1.7978707971314636e-05, "loss": 0.9814, "step": 1086 }, { "epoch": 0.8569176192353173, "grad_norm": 0.3671875, "learning_rate": 1.7973171916523134e-05, "loss": 0.9941, "step": 1087 }, { "epoch": 0.8577059519117067, "grad_norm": 0.392578125, "learning_rate": 1.7967629145633312e-05, "loss": 0.949, "step": 1088 }, { "epoch": 0.8584942845880962, "grad_norm": 0.349609375, "learning_rate": 1.796207966331406e-05, "loss": 0.9673, "step": 1089 }, { "epoch": 0.8592826172644856, "grad_norm": 0.3515625, "learning_rate": 1.795652347423991e-05, "loss": 0.9556, "step": 1090 }, { "epoch": 0.8600709499408751, "grad_norm": 0.357421875, "learning_rate": 1.7950960583091045e-05, "loss": 0.9547, "step": 1091 }, { "epoch": 0.8608592826172645, "grad_norm": 0.3671875, "learning_rate": 1.794539099455329e-05, "loss": 0.9789, "step": 1092 }, { "epoch": 0.861647615293654, "grad_norm": 0.373046875, "learning_rate": 1.7939814713318123e-05, "loss": 0.921, "step": 1093 }, { "epoch": 0.8624359479700433, "grad_norm": 0.353515625, "learning_rate": 1.7934231744082652e-05, "loss": 0.9991, "step": 1094 }, { "epoch": 0.8632242806464328, "grad_norm": 0.357421875, "learning_rate": 1.7928642091549616e-05, "loss": 0.9823, "step": 1095 }, { "epoch": 0.8640126133228222, "grad_norm": 0.35546875, "learning_rate": 1.7923045760427387e-05, "loss": 0.9513, "step": 1096 }, { "epoch": 0.8648009459992116, "grad_norm": 0.380859375, "learning_rate": 1.791744275542996e-05, "loss": 0.966, "step": 1097 }, { "epoch": 0.8655892786756011, "grad_norm": 0.359375, "learning_rate": 1.7911833081276962e-05, "loss": 0.9716, "step": 1098 }, { "epoch": 0.8663776113519905, "grad_norm": 0.35546875, "learning_rate": 1.790621674269362e-05, "loss": 0.9828, "step": 1099 }, { "epoch": 0.86716594402838, "grad_norm": 0.36328125, "learning_rate": 1.790059374441079e-05, "loss": 0.9901, "step": 1100 }, { "epoch": 0.8679542767047694, "grad_norm": 0.39453125, "learning_rate": 1.7894964091164932e-05, "loss": 0.9552, "step": 1101 }, { "epoch": 0.8687426093811589, "grad_norm": 0.359375, "learning_rate": 1.7889327787698105e-05, "loss": 0.9398, "step": 1102 }, { "epoch": 0.8695309420575483, "grad_norm": 0.357421875, "learning_rate": 1.7883684838757983e-05, "loss": 0.9598, "step": 1103 }, { "epoch": 0.8703192747339378, "grad_norm": 0.34375, "learning_rate": 1.787803524909783e-05, "loss": 0.94, "step": 1104 }, { "epoch": 0.8711076074103271, "grad_norm": 0.3671875, "learning_rate": 1.787237902347651e-05, "loss": 0.9362, "step": 1105 }, { "epoch": 0.8718959400867166, "grad_norm": 0.34375, "learning_rate": 1.786671616665846e-05, "loss": 0.9517, "step": 1106 }, { "epoch": 0.872684272763106, "grad_norm": 0.341796875, "learning_rate": 1.7861046683413717e-05, "loss": 0.9748, "step": 1107 }, { "epoch": 0.8734726054394955, "grad_norm": 0.35546875, "learning_rate": 1.7855370578517902e-05, "loss": 0.9374, "step": 1108 }, { "epoch": 0.8742609381158849, "grad_norm": 0.3515625, "learning_rate": 1.784968785675221e-05, "loss": 0.9491, "step": 1109 }, { "epoch": 0.8750492707922743, "grad_norm": 0.349609375, "learning_rate": 1.78439985229034e-05, "loss": 0.9308, "step": 1110 }, { "epoch": 0.8758376034686638, "grad_norm": 0.341796875, "learning_rate": 1.7838302581763818e-05, "loss": 0.95, "step": 1111 }, { "epoch": 0.8766259361450532, "grad_norm": 0.34765625, "learning_rate": 1.783260003813136e-05, "loss": 0.924, "step": 1112 }, { "epoch": 0.8774142688214427, "grad_norm": 0.359375, "learning_rate": 1.7826890896809492e-05, "loss": 0.9522, "step": 1113 }, { "epoch": 0.8782026014978321, "grad_norm": 0.337890625, "learning_rate": 1.7821175162607235e-05, "loss": 0.9412, "step": 1114 }, { "epoch": 0.8789909341742215, "grad_norm": 0.34375, "learning_rate": 1.7815452840339166e-05, "loss": 0.943, "step": 1115 }, { "epoch": 0.8797792668506109, "grad_norm": 0.357421875, "learning_rate": 1.7809723934825405e-05, "loss": 0.9523, "step": 1116 }, { "epoch": 0.8805675995270004, "grad_norm": 0.35546875, "learning_rate": 1.7803988450891628e-05, "loss": 0.9694, "step": 1117 }, { "epoch": 0.8813559322033898, "grad_norm": 0.34765625, "learning_rate": 1.7798246393369037e-05, "loss": 1.0036, "step": 1118 }, { "epoch": 0.8821442648797793, "grad_norm": 0.34765625, "learning_rate": 1.7792497767094384e-05, "loss": 0.9969, "step": 1119 }, { "epoch": 0.8829325975561687, "grad_norm": 0.33984375, "learning_rate": 1.7786742576909955e-05, "loss": 0.9449, "step": 1120 }, { "epoch": 0.8837209302325582, "grad_norm": 0.349609375, "learning_rate": 1.7780980827663553e-05, "loss": 0.9752, "step": 1121 }, { "epoch": 0.8845092629089476, "grad_norm": 0.466796875, "learning_rate": 1.7775212524208513e-05, "loss": 0.9518, "step": 1122 }, { "epoch": 0.885297595585337, "grad_norm": 0.341796875, "learning_rate": 1.77694376714037e-05, "loss": 0.9488, "step": 1123 }, { "epoch": 0.8860859282617265, "grad_norm": 0.34375, "learning_rate": 1.7763656274113476e-05, "loss": 0.9963, "step": 1124 }, { "epoch": 0.8868742609381158, "grad_norm": 0.353515625, "learning_rate": 1.775786833720773e-05, "loss": 0.9993, "step": 1125 }, { "epoch": 0.8876625936145053, "grad_norm": 0.345703125, "learning_rate": 1.775207386556186e-05, "loss": 0.9749, "step": 1126 }, { "epoch": 0.8884509262908947, "grad_norm": 0.345703125, "learning_rate": 1.7746272864056754e-05, "loss": 0.9399, "step": 1127 }, { "epoch": 0.8892392589672842, "grad_norm": 0.349609375, "learning_rate": 1.7740465337578823e-05, "loss": 0.9523, "step": 1128 }, { "epoch": 0.8900275916436736, "grad_norm": 0.34765625, "learning_rate": 1.7734651291019955e-05, "loss": 0.9557, "step": 1129 }, { "epoch": 0.8908159243200631, "grad_norm": 0.35546875, "learning_rate": 1.772883072927754e-05, "loss": 0.9431, "step": 1130 }, { "epoch": 0.8916042569964525, "grad_norm": 0.34765625, "learning_rate": 1.7723003657254447e-05, "loss": 0.9687, "step": 1131 }, { "epoch": 0.892392589672842, "grad_norm": 0.345703125, "learning_rate": 1.771717007985904e-05, "loss": 0.967, "step": 1132 }, { "epoch": 0.8931809223492314, "grad_norm": 0.353515625, "learning_rate": 1.7711330002005157e-05, "loss": 0.9333, "step": 1133 }, { "epoch": 0.8939692550256209, "grad_norm": 0.34375, "learning_rate": 1.7705483428612114e-05, "loss": 0.9425, "step": 1134 }, { "epoch": 0.8947575877020102, "grad_norm": 0.33984375, "learning_rate": 1.769963036460469e-05, "loss": 0.9594, "step": 1135 }, { "epoch": 0.8955459203783996, "grad_norm": 0.3359375, "learning_rate": 1.7693770814913144e-05, "loss": 0.9625, "step": 1136 }, { "epoch": 0.8963342530547891, "grad_norm": 0.357421875, "learning_rate": 1.768790478447319e-05, "loss": 0.9645, "step": 1137 }, { "epoch": 0.8971225857311785, "grad_norm": 0.341796875, "learning_rate": 1.7682032278226002e-05, "loss": 0.9568, "step": 1138 }, { "epoch": 0.897910918407568, "grad_norm": 0.353515625, "learning_rate": 1.7676153301118207e-05, "loss": 0.973, "step": 1139 }, { "epoch": 0.8986992510839574, "grad_norm": 0.341796875, "learning_rate": 1.7670267858101895e-05, "loss": 0.9613, "step": 1140 }, { "epoch": 0.8994875837603469, "grad_norm": 0.34375, "learning_rate": 1.7664375954134586e-05, "loss": 0.9431, "step": 1141 }, { "epoch": 0.9002759164367363, "grad_norm": 0.33984375, "learning_rate": 1.7658477594179248e-05, "loss": 0.9525, "step": 1142 }, { "epoch": 0.9010642491131258, "grad_norm": 0.345703125, "learning_rate": 1.7652572783204286e-05, "loss": 0.9755, "step": 1143 }, { "epoch": 0.9018525817895152, "grad_norm": 0.349609375, "learning_rate": 1.764666152618355e-05, "loss": 0.9777, "step": 1144 }, { "epoch": 0.9026409144659047, "grad_norm": 0.33984375, "learning_rate": 1.7640743828096306e-05, "loss": 0.9487, "step": 1145 }, { "epoch": 0.903429247142294, "grad_norm": 0.34375, "learning_rate": 1.7634819693927254e-05, "loss": 0.9786, "step": 1146 }, { "epoch": 0.9042175798186834, "grad_norm": 0.345703125, "learning_rate": 1.7628889128666503e-05, "loss": 0.9682, "step": 1147 }, { "epoch": 0.9050059124950729, "grad_norm": 0.33984375, "learning_rate": 1.7622952137309596e-05, "loss": 0.9182, "step": 1148 }, { "epoch": 0.9057942451714623, "grad_norm": 0.341796875, "learning_rate": 1.761700872485748e-05, "loss": 0.9352, "step": 1149 }, { "epoch": 0.9065825778478518, "grad_norm": 0.349609375, "learning_rate": 1.761105889631651e-05, "loss": 0.9617, "step": 1150 }, { "epoch": 0.9073709105242412, "grad_norm": 0.349609375, "learning_rate": 1.7605102656698444e-05, "loss": 0.949, "step": 1151 }, { "epoch": 0.9081592432006307, "grad_norm": 0.34375, "learning_rate": 1.759914001102045e-05, "loss": 0.9653, "step": 1152 }, { "epoch": 0.9089475758770201, "grad_norm": 0.341796875, "learning_rate": 1.759317096430508e-05, "loss": 0.9512, "step": 1153 }, { "epoch": 0.9097359085534096, "grad_norm": 0.34375, "learning_rate": 1.758719552158029e-05, "loss": 0.9343, "step": 1154 }, { "epoch": 0.910524241229799, "grad_norm": 0.345703125, "learning_rate": 1.758121368787941e-05, "loss": 1.0255, "step": 1155 }, { "epoch": 0.9113125739061884, "grad_norm": 0.349609375, "learning_rate": 1.757522546824116e-05, "loss": 0.9741, "step": 1156 }, { "epoch": 0.9121009065825778, "grad_norm": 0.361328125, "learning_rate": 1.7569230867709648e-05, "loss": 0.9744, "step": 1157 }, { "epoch": 0.9128892392589673, "grad_norm": 0.34765625, "learning_rate": 1.756322989133434e-05, "loss": 0.9542, "step": 1158 }, { "epoch": 0.9136775719353567, "grad_norm": 0.333984375, "learning_rate": 1.755722254417008e-05, "loss": 0.9365, "step": 1159 }, { "epoch": 0.9144659046117461, "grad_norm": 0.349609375, "learning_rate": 1.7551208831277092e-05, "loss": 0.9543, "step": 1160 }, { "epoch": 0.9152542372881356, "grad_norm": 0.341796875, "learning_rate": 1.7545188757720933e-05, "loss": 0.9697, "step": 1161 }, { "epoch": 0.916042569964525, "grad_norm": 0.34765625, "learning_rate": 1.7539162328572543e-05, "loss": 0.9689, "step": 1162 }, { "epoch": 0.9168309026409145, "grad_norm": 0.359375, "learning_rate": 1.7533129548908205e-05, "loss": 0.9305, "step": 1163 }, { "epoch": 0.9176192353173039, "grad_norm": 0.34765625, "learning_rate": 1.7527090423809553e-05, "loss": 0.9677, "step": 1164 }, { "epoch": 0.9184075679936934, "grad_norm": 0.36328125, "learning_rate": 1.7521044958363567e-05, "loss": 0.981, "step": 1165 }, { "epoch": 0.9191959006700827, "grad_norm": 0.337890625, "learning_rate": 1.7514993157662564e-05, "loss": 0.9403, "step": 1166 }, { "epoch": 0.9199842333464722, "grad_norm": 0.337890625, "learning_rate": 1.7508935026804202e-05, "loss": 0.9545, "step": 1167 }, { "epoch": 0.9207725660228616, "grad_norm": 0.353515625, "learning_rate": 1.750287057089147e-05, "loss": 0.9584, "step": 1168 }, { "epoch": 0.9215608986992511, "grad_norm": 0.345703125, "learning_rate": 1.7496799795032685e-05, "loss": 0.9917, "step": 1169 }, { "epoch": 0.9223492313756405, "grad_norm": 0.34375, "learning_rate": 1.749072270434148e-05, "loss": 0.9472, "step": 1170 }, { "epoch": 0.92313756405203, "grad_norm": 0.353515625, "learning_rate": 1.7484639303936823e-05, "loss": 0.975, "step": 1171 }, { "epoch": 0.9239258967284194, "grad_norm": 0.3671875, "learning_rate": 1.7478549598942983e-05, "loss": 1.0044, "step": 1172 }, { "epoch": 0.9247142294048089, "grad_norm": 0.33984375, "learning_rate": 1.747245359448954e-05, "loss": 0.9926, "step": 1173 }, { "epoch": 0.9255025620811983, "grad_norm": 0.337890625, "learning_rate": 1.746635129571139e-05, "loss": 0.9414, "step": 1174 }, { "epoch": 0.9262908947575877, "grad_norm": 0.3515625, "learning_rate": 1.746024270774873e-05, "loss": 0.9564, "step": 1175 }, { "epoch": 0.9270792274339772, "grad_norm": 0.349609375, "learning_rate": 1.745412783574704e-05, "loss": 0.929, "step": 1176 }, { "epoch": 0.9278675601103665, "grad_norm": 0.341796875, "learning_rate": 1.7448006684857108e-05, "loss": 0.9299, "step": 1177 }, { "epoch": 0.928655892786756, "grad_norm": 0.376953125, "learning_rate": 1.744187926023501e-05, "loss": 0.9369, "step": 1178 }, { "epoch": 0.9294442254631454, "grad_norm": 0.353515625, "learning_rate": 1.7435745567042096e-05, "loss": 0.9705, "step": 1179 }, { "epoch": 0.9302325581395349, "grad_norm": 0.349609375, "learning_rate": 1.7429605610445007e-05, "loss": 0.9535, "step": 1180 }, { "epoch": 0.9310208908159243, "grad_norm": 0.37109375, "learning_rate": 1.7423459395615654e-05, "loss": 0.9478, "step": 1181 }, { "epoch": 0.9318092234923138, "grad_norm": 0.33984375, "learning_rate": 1.7417306927731226e-05, "loss": 0.9435, "step": 1182 }, { "epoch": 0.9325975561687032, "grad_norm": 0.34765625, "learning_rate": 1.741114821197417e-05, "loss": 0.9418, "step": 1183 }, { "epoch": 0.9333858888450927, "grad_norm": 0.357421875, "learning_rate": 1.7404983253532205e-05, "loss": 1.0006, "step": 1184 }, { "epoch": 0.9341742215214821, "grad_norm": 0.349609375, "learning_rate": 1.73988120575983e-05, "loss": 1.005, "step": 1185 }, { "epoch": 0.9349625541978716, "grad_norm": 0.34765625, "learning_rate": 1.7392634629370684e-05, "loss": 0.9985, "step": 1186 }, { "epoch": 0.9357508868742609, "grad_norm": 0.361328125, "learning_rate": 1.7386450974052836e-05, "loss": 0.9512, "step": 1187 }, { "epoch": 0.9365392195506503, "grad_norm": 0.357421875, "learning_rate": 1.738026109685347e-05, "loss": 0.9463, "step": 1188 }, { "epoch": 0.9373275522270398, "grad_norm": 0.353515625, "learning_rate": 1.737406500298656e-05, "loss": 0.9227, "step": 1189 }, { "epoch": 0.9381158849034292, "grad_norm": 0.37890625, "learning_rate": 1.73678626976713e-05, "loss": 1.0176, "step": 1190 }, { "epoch": 0.9389042175798187, "grad_norm": 0.361328125, "learning_rate": 1.736165418613212e-05, "loss": 0.9706, "step": 1191 }, { "epoch": 0.9396925502562081, "grad_norm": 0.345703125, "learning_rate": 1.7355439473598682e-05, "loss": 0.9517, "step": 1192 }, { "epoch": 0.9404808829325976, "grad_norm": 0.34375, "learning_rate": 1.734921856530587e-05, "loss": 0.9409, "step": 1193 }, { "epoch": 0.941269215608987, "grad_norm": 0.35546875, "learning_rate": 1.7342991466493785e-05, "loss": 0.979, "step": 1194 }, { "epoch": 0.9420575482853765, "grad_norm": 0.35546875, "learning_rate": 1.733675818240774e-05, "loss": 0.9868, "step": 1195 }, { "epoch": 0.9428458809617659, "grad_norm": 0.33984375, "learning_rate": 1.7330518718298263e-05, "loss": 0.9217, "step": 1196 }, { "epoch": 0.9436342136381553, "grad_norm": 0.349609375, "learning_rate": 1.732427307942109e-05, "loss": 0.9441, "step": 1197 }, { "epoch": 0.9444225463145447, "grad_norm": 0.3671875, "learning_rate": 1.731802127103715e-05, "loss": 1.0233, "step": 1198 }, { "epoch": 0.9452108789909341, "grad_norm": 0.359375, "learning_rate": 1.731176329841257e-05, "loss": 0.9484, "step": 1199 }, { "epoch": 0.9459992116673236, "grad_norm": 0.33984375, "learning_rate": 1.730549916681868e-05, "loss": 0.9532, "step": 1200 }, { "epoch": 0.946787544343713, "grad_norm": 0.34375, "learning_rate": 1.7299228881531984e-05, "loss": 0.9391, "step": 1201 }, { "epoch": 0.9475758770201025, "grad_norm": 0.37109375, "learning_rate": 1.729295244783418e-05, "loss": 0.9879, "step": 1202 }, { "epoch": 0.9483642096964919, "grad_norm": 0.349609375, "learning_rate": 1.728666987101214e-05, "loss": 0.9333, "step": 1203 }, { "epoch": 0.9491525423728814, "grad_norm": 0.408203125, "learning_rate": 1.7280381156357907e-05, "loss": 0.945, "step": 1204 }, { "epoch": 0.9499408750492708, "grad_norm": 0.3515625, "learning_rate": 1.7274086309168702e-05, "loss": 0.9648, "step": 1205 }, { "epoch": 0.9507292077256603, "grad_norm": 0.3359375, "learning_rate": 1.726778533474691e-05, "loss": 0.9102, "step": 1206 }, { "epoch": 0.9515175404020496, "grad_norm": 0.3515625, "learning_rate": 1.726147823840007e-05, "loss": 0.9966, "step": 1207 }, { "epoch": 0.9523058730784391, "grad_norm": 0.34765625, "learning_rate": 1.7255165025440893e-05, "loss": 0.9319, "step": 1208 }, { "epoch": 0.9530942057548285, "grad_norm": 0.34375, "learning_rate": 1.724884570118722e-05, "loss": 0.9119, "step": 1209 }, { "epoch": 0.953882538431218, "grad_norm": 0.359375, "learning_rate": 1.724252027096206e-05, "loss": 0.9576, "step": 1210 }, { "epoch": 0.9546708711076074, "grad_norm": 0.345703125, "learning_rate": 1.7236188740093557e-05, "loss": 0.9779, "step": 1211 }, { "epoch": 0.9554592037839968, "grad_norm": 0.34375, "learning_rate": 1.722985111391499e-05, "loss": 0.9695, "step": 1212 }, { "epoch": 0.9562475364603863, "grad_norm": 0.349609375, "learning_rate": 1.722350739776478e-05, "loss": 0.9546, "step": 1213 }, { "epoch": 0.9570358691367757, "grad_norm": 0.361328125, "learning_rate": 1.7217157596986474e-05, "loss": 0.952, "step": 1214 }, { "epoch": 0.9578242018131652, "grad_norm": 0.390625, "learning_rate": 1.721080171692874e-05, "loss": 0.9583, "step": 1215 }, { "epoch": 0.9586125344895546, "grad_norm": 0.359375, "learning_rate": 1.7204439762945382e-05, "loss": 0.9472, "step": 1216 }, { "epoch": 0.9594008671659441, "grad_norm": 0.345703125, "learning_rate": 1.71980717403953e-05, "loss": 0.9863, "step": 1217 }, { "epoch": 0.9601891998423334, "grad_norm": 0.34765625, "learning_rate": 1.7191697654642517e-05, "loss": 0.9419, "step": 1218 }, { "epoch": 0.9609775325187229, "grad_norm": 0.353515625, "learning_rate": 1.7185317511056166e-05, "loss": 0.9458, "step": 1219 }, { "epoch": 0.9617658651951123, "grad_norm": 0.345703125, "learning_rate": 1.7178931315010473e-05, "loss": 0.9439, "step": 1220 }, { "epoch": 0.9625541978715018, "grad_norm": 0.359375, "learning_rate": 1.717253907188477e-05, "loss": 0.9968, "step": 1221 }, { "epoch": 0.9633425305478912, "grad_norm": 0.34375, "learning_rate": 1.7166140787063486e-05, "loss": 0.9454, "step": 1222 }, { "epoch": 0.9641308632242807, "grad_norm": 0.349609375, "learning_rate": 1.7159736465936124e-05, "loss": 0.9489, "step": 1223 }, { "epoch": 0.9649191959006701, "grad_norm": 0.33984375, "learning_rate": 1.7153326113897286e-05, "loss": 0.9553, "step": 1224 }, { "epoch": 0.9657075285770595, "grad_norm": 0.357421875, "learning_rate": 1.714690973634665e-05, "loss": 0.953, "step": 1225 }, { "epoch": 0.966495861253449, "grad_norm": 0.35546875, "learning_rate": 1.7140487338688967e-05, "loss": 0.9807, "step": 1226 }, { "epoch": 0.9672841939298384, "grad_norm": 0.353515625, "learning_rate": 1.7134058926334063e-05, "loss": 0.9345, "step": 1227 }, { "epoch": 0.9680725266062278, "grad_norm": 0.36328125, "learning_rate": 1.7127624504696824e-05, "loss": 0.9999, "step": 1228 }, { "epoch": 0.9688608592826172, "grad_norm": 0.34765625, "learning_rate": 1.7121184079197202e-05, "loss": 0.9534, "step": 1229 }, { "epoch": 0.9696491919590067, "grad_norm": 0.357421875, "learning_rate": 1.711473765526021e-05, "loss": 0.9473, "step": 1230 }, { "epoch": 0.9704375246353961, "grad_norm": 0.34765625, "learning_rate": 1.710828523831591e-05, "loss": 0.9442, "step": 1231 }, { "epoch": 0.9712258573117856, "grad_norm": 0.33984375, "learning_rate": 1.710182683379941e-05, "loss": 0.9489, "step": 1232 }, { "epoch": 0.972014189988175, "grad_norm": 0.349609375, "learning_rate": 1.7095362447150866e-05, "loss": 0.9536, "step": 1233 }, { "epoch": 0.9728025226645645, "grad_norm": 0.359375, "learning_rate": 1.708889208381546e-05, "loss": 0.9833, "step": 1234 }, { "epoch": 0.9735908553409539, "grad_norm": 0.357421875, "learning_rate": 1.7082415749243436e-05, "loss": 0.9557, "step": 1235 }, { "epoch": 0.9743791880173434, "grad_norm": 0.365234375, "learning_rate": 1.7075933448890037e-05, "loss": 0.9868, "step": 1236 }, { "epoch": 0.9751675206937328, "grad_norm": 0.365234375, "learning_rate": 1.706944518821555e-05, "loss": 0.9498, "step": 1237 }, { "epoch": 0.9759558533701221, "grad_norm": 0.34765625, "learning_rate": 1.706295097268528e-05, "loss": 0.9663, "step": 1238 }, { "epoch": 0.9767441860465116, "grad_norm": 0.388671875, "learning_rate": 1.7056450807769543e-05, "loss": 0.9659, "step": 1239 }, { "epoch": 0.977532518722901, "grad_norm": 0.365234375, "learning_rate": 1.7049944698943668e-05, "loss": 0.9747, "step": 1240 }, { "epoch": 0.9783208513992905, "grad_norm": 0.3515625, "learning_rate": 1.7043432651687987e-05, "loss": 0.9106, "step": 1241 }, { "epoch": 0.9791091840756799, "grad_norm": 0.3671875, "learning_rate": 1.7036914671487854e-05, "loss": 0.9707, "step": 1242 }, { "epoch": 0.9798975167520694, "grad_norm": 0.349609375, "learning_rate": 1.7030390763833588e-05, "loss": 0.9451, "step": 1243 }, { "epoch": 0.9806858494284588, "grad_norm": 0.376953125, "learning_rate": 1.702386093422053e-05, "loss": 0.9997, "step": 1244 }, { "epoch": 0.9814741821048483, "grad_norm": 0.357421875, "learning_rate": 1.701732518814899e-05, "loss": 0.9692, "step": 1245 }, { "epoch": 0.9822625147812377, "grad_norm": 0.357421875, "learning_rate": 1.7010783531124278e-05, "loss": 0.9888, "step": 1246 }, { "epoch": 0.9830508474576272, "grad_norm": 0.353515625, "learning_rate": 1.7004235968656665e-05, "loss": 0.9529, "step": 1247 }, { "epoch": 0.9838391801340165, "grad_norm": 0.365234375, "learning_rate": 1.699768250626141e-05, "loss": 0.9285, "step": 1248 }, { "epoch": 0.984627512810406, "grad_norm": 0.34375, "learning_rate": 1.699112314945874e-05, "loss": 0.9296, "step": 1249 }, { "epoch": 0.9854158454867954, "grad_norm": 0.33984375, "learning_rate": 1.698455790377384e-05, "loss": 0.9651, "step": 1250 }, { "epoch": 0.9862041781631848, "grad_norm": 0.33984375, "learning_rate": 1.697798677473686e-05, "loss": 1.0072, "step": 1251 }, { "epoch": 0.9869925108395743, "grad_norm": 0.349609375, "learning_rate": 1.697140976788291e-05, "loss": 0.9659, "step": 1252 }, { "epoch": 0.9877808435159637, "grad_norm": 0.361328125, "learning_rate": 1.696482688875204e-05, "loss": 0.9748, "step": 1253 }, { "epoch": 0.9885691761923532, "grad_norm": 0.353515625, "learning_rate": 1.6958238142889258e-05, "loss": 0.9567, "step": 1254 }, { "epoch": 0.9893575088687426, "grad_norm": 0.341796875, "learning_rate": 1.6951643535844508e-05, "loss": 0.944, "step": 1255 }, { "epoch": 0.9901458415451321, "grad_norm": 0.357421875, "learning_rate": 1.694504307317267e-05, "loss": 0.9468, "step": 1256 }, { "epoch": 0.9909341742215215, "grad_norm": 0.34765625, "learning_rate": 1.6938436760433565e-05, "loss": 0.9666, "step": 1257 }, { "epoch": 0.991722506897911, "grad_norm": 0.39453125, "learning_rate": 1.6931824603191926e-05, "loss": 0.9281, "step": 1258 }, { "epoch": 0.9925108395743003, "grad_norm": 0.365234375, "learning_rate": 1.6925206607017425e-05, "loss": 0.9291, "step": 1259 }, { "epoch": 0.9932991722506898, "grad_norm": 0.349609375, "learning_rate": 1.6918582777484642e-05, "loss": 0.963, "step": 1260 }, { "epoch": 0.9940875049270792, "grad_norm": 0.3515625, "learning_rate": 1.6911953120173075e-05, "loss": 0.9461, "step": 1261 }, { "epoch": 0.9948758376034686, "grad_norm": 0.3359375, "learning_rate": 1.690531764066713e-05, "loss": 0.9448, "step": 1262 }, { "epoch": 0.9956641702798581, "grad_norm": 0.34375, "learning_rate": 1.689867634455612e-05, "loss": 0.9283, "step": 1263 }, { "epoch": 0.9964525029562475, "grad_norm": 0.34765625, "learning_rate": 1.6892029237434248e-05, "loss": 0.9781, "step": 1264 }, { "epoch": 0.997240835632637, "grad_norm": 0.341796875, "learning_rate": 1.688537632490063e-05, "loss": 0.9538, "step": 1265 }, { "epoch": 0.9980291683090264, "grad_norm": 0.349609375, "learning_rate": 1.687871761255925e-05, "loss": 0.9581, "step": 1266 }, { "epoch": 0.9988175009854159, "grad_norm": 0.345703125, "learning_rate": 1.6872053106018996e-05, "loss": 0.9663, "step": 1267 }, { "epoch": 0.9996058336618053, "grad_norm": 0.349609375, "learning_rate": 1.686538281089362e-05, "loss": 0.9773, "step": 1268 }, { "epoch": 0.9996058336618053, "eval_loss": 0.9508654475212097, "eval_runtime": 615.533, "eval_samples_per_second": 26.699, "eval_steps_per_second": 1.67, "step": 1268 }, { "epoch": 1.0003941663381948, "grad_norm": 0.345703125, "learning_rate": 1.6858706732801767e-05, "loss": 0.9436, "step": 1269 }, { "epoch": 1.0011824990145841, "grad_norm": 0.341796875, "learning_rate": 1.6852024877366945e-05, "loss": 0.942, "step": 1270 }, { "epoch": 1.0019708316909737, "grad_norm": 0.34765625, "learning_rate": 1.6845337250217525e-05, "loss": 0.9701, "step": 1271 }, { "epoch": 1.002759164367363, "grad_norm": 0.345703125, "learning_rate": 1.6838643856986746e-05, "loss": 0.8978, "step": 1272 }, { "epoch": 1.0035474970437526, "grad_norm": 0.34375, "learning_rate": 1.6831944703312694e-05, "loss": 0.928, "step": 1273 }, { "epoch": 1.004335829720142, "grad_norm": 0.341796875, "learning_rate": 1.6825239794838326e-05, "loss": 0.9399, "step": 1274 }, { "epoch": 1.0051241623965312, "grad_norm": 0.345703125, "learning_rate": 1.6818529137211427e-05, "loss": 0.9453, "step": 1275 }, { "epoch": 1.0059124950729208, "grad_norm": 0.337890625, "learning_rate": 1.6811812736084635e-05, "loss": 0.9596, "step": 1276 }, { "epoch": 1.0067008277493101, "grad_norm": 0.34765625, "learning_rate": 1.6805090597115424e-05, "loss": 0.9724, "step": 1277 }, { "epoch": 1.0074891604256997, "grad_norm": 0.353515625, "learning_rate": 1.6798362725966102e-05, "loss": 0.9622, "step": 1278 }, { "epoch": 1.008277493102089, "grad_norm": 0.333984375, "learning_rate": 1.67916291283038e-05, "loss": 0.941, "step": 1279 }, { "epoch": 1.0090658257784786, "grad_norm": 0.3359375, "learning_rate": 1.678488980980048e-05, "loss": 0.939, "step": 1280 }, { "epoch": 1.009854158454868, "grad_norm": 0.357421875, "learning_rate": 1.6778144776132927e-05, "loss": 0.9265, "step": 1281 }, { "epoch": 1.0106424911312575, "grad_norm": 0.3515625, "learning_rate": 1.6771394032982718e-05, "loss": 0.935, "step": 1282 }, { "epoch": 1.0114308238076468, "grad_norm": 0.349609375, "learning_rate": 1.676463758603626e-05, "loss": 0.9625, "step": 1283 }, { "epoch": 1.0122191564840362, "grad_norm": 0.33984375, "learning_rate": 1.675787544098477e-05, "loss": 0.9342, "step": 1284 }, { "epoch": 1.0130074891604257, "grad_norm": 0.349609375, "learning_rate": 1.6751107603524238e-05, "loss": 0.9691, "step": 1285 }, { "epoch": 1.013795821836815, "grad_norm": 0.345703125, "learning_rate": 1.6744334079355472e-05, "loss": 0.9577, "step": 1286 }, { "epoch": 1.0145841545132046, "grad_norm": 0.337890625, "learning_rate": 1.6737554874184058e-05, "loss": 0.9504, "step": 1287 }, { "epoch": 1.015372487189594, "grad_norm": 0.353515625, "learning_rate": 1.6730769993720376e-05, "loss": 0.9132, "step": 1288 }, { "epoch": 1.0161608198659835, "grad_norm": 0.34375, "learning_rate": 1.672397944367958e-05, "loss": 0.9505, "step": 1289 }, { "epoch": 1.0169491525423728, "grad_norm": 0.3359375, "learning_rate": 1.671718322978161e-05, "loss": 0.9201, "step": 1290 }, { "epoch": 1.0177374852187624, "grad_norm": 0.34765625, "learning_rate": 1.6710381357751155e-05, "loss": 0.9571, "step": 1291 }, { "epoch": 1.0185258178951517, "grad_norm": 0.34375, "learning_rate": 1.6703573833317698e-05, "loss": 0.9441, "step": 1292 }, { "epoch": 1.0193141505715413, "grad_norm": 0.34765625, "learning_rate": 1.6696760662215457e-05, "loss": 0.9467, "step": 1293 }, { "epoch": 1.0201024832479306, "grad_norm": 0.341796875, "learning_rate": 1.6689941850183425e-05, "loss": 0.9216, "step": 1294 }, { "epoch": 1.02089081592432, "grad_norm": 0.35546875, "learning_rate": 1.668311740296534e-05, "loss": 0.9799, "step": 1295 }, { "epoch": 1.0216791486007095, "grad_norm": 0.35546875, "learning_rate": 1.6676287326309684e-05, "loss": 0.9333, "step": 1296 }, { "epoch": 1.0224674812770989, "grad_norm": 0.3515625, "learning_rate": 1.666945162596969e-05, "loss": 0.9573, "step": 1297 }, { "epoch": 1.0232558139534884, "grad_norm": 0.380859375, "learning_rate": 1.6662610307703318e-05, "loss": 0.9231, "step": 1298 }, { "epoch": 1.0240441466298777, "grad_norm": 0.3515625, "learning_rate": 1.6655763377273258e-05, "loss": 0.9338, "step": 1299 }, { "epoch": 1.0248324793062673, "grad_norm": 0.357421875, "learning_rate": 1.6648910840446947e-05, "loss": 0.9068, "step": 1300 }, { "epoch": 1.0256208119826566, "grad_norm": 0.353515625, "learning_rate": 1.664205270299652e-05, "loss": 0.9377, "step": 1301 }, { "epoch": 1.0264091446590462, "grad_norm": 0.416015625, "learning_rate": 1.6635188970698843e-05, "loss": 0.9368, "step": 1302 }, { "epoch": 1.0271974773354355, "grad_norm": 0.345703125, "learning_rate": 1.662831964933549e-05, "loss": 0.9369, "step": 1303 }, { "epoch": 1.027985810011825, "grad_norm": 0.35546875, "learning_rate": 1.6621444744692753e-05, "loss": 0.9374, "step": 1304 }, { "epoch": 1.0287741426882144, "grad_norm": 0.349609375, "learning_rate": 1.661456426256161e-05, "loss": 0.9269, "step": 1305 }, { "epoch": 1.0295624753646038, "grad_norm": 0.369140625, "learning_rate": 1.660767820873775e-05, "loss": 0.9295, "step": 1306 }, { "epoch": 1.0303508080409933, "grad_norm": 0.33984375, "learning_rate": 1.6600786589021555e-05, "loss": 0.9561, "step": 1307 }, { "epoch": 1.0311391407173827, "grad_norm": 0.35546875, "learning_rate": 1.6593889409218084e-05, "loss": 0.9506, "step": 1308 }, { "epoch": 1.0319274733937722, "grad_norm": 0.35546875, "learning_rate": 1.6586986675137095e-05, "loss": 0.9684, "step": 1309 }, { "epoch": 1.0327158060701616, "grad_norm": 0.345703125, "learning_rate": 1.6580078392593012e-05, "loss": 0.9621, "step": 1310 }, { "epoch": 1.0335041387465511, "grad_norm": 0.337890625, "learning_rate": 1.657316456740494e-05, "loss": 0.9313, "step": 1311 }, { "epoch": 1.0342924714229405, "grad_norm": 0.34765625, "learning_rate": 1.6566245205396647e-05, "loss": 0.942, "step": 1312 }, { "epoch": 1.03508080409933, "grad_norm": 0.353515625, "learning_rate": 1.6559320312396573e-05, "loss": 0.9625, "step": 1313 }, { "epoch": 1.0358691367757193, "grad_norm": 0.35546875, "learning_rate": 1.6552389894237806e-05, "loss": 0.9668, "step": 1314 }, { "epoch": 1.0366574694521087, "grad_norm": 0.34765625, "learning_rate": 1.6545453956758098e-05, "loss": 0.9383, "step": 1315 }, { "epoch": 1.0374458021284982, "grad_norm": 0.357421875, "learning_rate": 1.653851250579985e-05, "loss": 0.9842, "step": 1316 }, { "epoch": 1.0382341348048876, "grad_norm": 0.36328125, "learning_rate": 1.6531565547210095e-05, "loss": 0.9463, "step": 1317 }, { "epoch": 1.0390224674812771, "grad_norm": 0.345703125, "learning_rate": 1.652461308684052e-05, "loss": 0.9637, "step": 1318 }, { "epoch": 1.0398108001576665, "grad_norm": 0.35546875, "learning_rate": 1.6517655130547435e-05, "loss": 0.9345, "step": 1319 }, { "epoch": 1.040599132834056, "grad_norm": 0.3515625, "learning_rate": 1.6510691684191795e-05, "loss": 0.9518, "step": 1320 }, { "epoch": 1.0413874655104454, "grad_norm": 0.34375, "learning_rate": 1.6503722753639155e-05, "loss": 0.9269, "step": 1321 }, { "epoch": 1.042175798186835, "grad_norm": 0.33984375, "learning_rate": 1.6496748344759715e-05, "loss": 0.9228, "step": 1322 }, { "epoch": 1.0429641308632243, "grad_norm": 0.353515625, "learning_rate": 1.648976846342827e-05, "loss": 0.9464, "step": 1323 }, { "epoch": 1.0437524635396138, "grad_norm": 0.33984375, "learning_rate": 1.648278311552424e-05, "loss": 0.8976, "step": 1324 }, { "epoch": 1.0445407962160032, "grad_norm": 0.37890625, "learning_rate": 1.647579230693164e-05, "loss": 0.9323, "step": 1325 }, { "epoch": 1.0453291288923925, "grad_norm": 0.341796875, "learning_rate": 1.6468796043539082e-05, "loss": 0.9379, "step": 1326 }, { "epoch": 1.046117461568782, "grad_norm": 0.349609375, "learning_rate": 1.6461794331239785e-05, "loss": 0.939, "step": 1327 }, { "epoch": 1.0469057942451714, "grad_norm": 0.369140625, "learning_rate": 1.6454787175931547e-05, "loss": 0.9752, "step": 1328 }, { "epoch": 1.047694126921561, "grad_norm": 0.361328125, "learning_rate": 1.6447774583516756e-05, "loss": 0.9435, "step": 1329 }, { "epoch": 1.0484824595979503, "grad_norm": 0.337890625, "learning_rate": 1.644075655990238e-05, "loss": 0.9392, "step": 1330 }, { "epoch": 1.0492707922743398, "grad_norm": 0.349609375, "learning_rate": 1.6433733110999956e-05, "loss": 0.9526, "step": 1331 }, { "epoch": 1.0500591249507292, "grad_norm": 0.341796875, "learning_rate": 1.6426704242725603e-05, "loss": 0.9213, "step": 1332 }, { "epoch": 1.0508474576271187, "grad_norm": 0.345703125, "learning_rate": 1.641966996099999e-05, "loss": 0.9783, "step": 1333 }, { "epoch": 1.051635790303508, "grad_norm": 0.34375, "learning_rate": 1.6412630271748354e-05, "loss": 0.9218, "step": 1334 }, { "epoch": 1.0524241229798976, "grad_norm": 0.35546875, "learning_rate": 1.640558518090049e-05, "loss": 0.9204, "step": 1335 }, { "epoch": 1.053212455656287, "grad_norm": 0.34375, "learning_rate": 1.639853469439074e-05, "loss": 0.9268, "step": 1336 }, { "epoch": 1.0540007883326763, "grad_norm": 0.33984375, "learning_rate": 1.6391478818157987e-05, "loss": 0.9394, "step": 1337 }, { "epoch": 1.0547891210090659, "grad_norm": 0.349609375, "learning_rate": 1.6384417558145654e-05, "loss": 0.9379, "step": 1338 }, { "epoch": 1.0555774536854552, "grad_norm": 0.337890625, "learning_rate": 1.637735092030171e-05, "loss": 0.9354, "step": 1339 }, { "epoch": 1.0563657863618447, "grad_norm": 0.353515625, "learning_rate": 1.6370278910578644e-05, "loss": 0.9672, "step": 1340 }, { "epoch": 1.057154119038234, "grad_norm": 0.345703125, "learning_rate": 1.6363201534933465e-05, "loss": 0.9351, "step": 1341 }, { "epoch": 1.0579424517146236, "grad_norm": 0.361328125, "learning_rate": 1.6356118799327716e-05, "loss": 0.948, "step": 1342 }, { "epoch": 1.058730784391013, "grad_norm": 0.359375, "learning_rate": 1.6349030709727444e-05, "loss": 0.9893, "step": 1343 }, { "epoch": 1.0595191170674025, "grad_norm": 0.341796875, "learning_rate": 1.6341937272103213e-05, "loss": 0.9605, "step": 1344 }, { "epoch": 1.0603074497437919, "grad_norm": 0.34765625, "learning_rate": 1.6334838492430084e-05, "loss": 0.9509, "step": 1345 }, { "epoch": 1.0610957824201812, "grad_norm": 0.35546875, "learning_rate": 1.632773437668763e-05, "loss": 0.9553, "step": 1346 }, { "epoch": 1.0618841150965708, "grad_norm": 0.341796875, "learning_rate": 1.6320624930859905e-05, "loss": 0.9268, "step": 1347 }, { "epoch": 1.06267244777296, "grad_norm": 0.349609375, "learning_rate": 1.6313510160935457e-05, "loss": 0.9565, "step": 1348 }, { "epoch": 1.0634607804493497, "grad_norm": 0.341796875, "learning_rate": 1.6306390072907327e-05, "loss": 0.927, "step": 1349 }, { "epoch": 1.064249113125739, "grad_norm": 0.365234375, "learning_rate": 1.6299264672773025e-05, "loss": 0.9735, "step": 1350 }, { "epoch": 1.0650374458021286, "grad_norm": 0.365234375, "learning_rate": 1.629213396653454e-05, "loss": 0.9612, "step": 1351 }, { "epoch": 1.065825778478518, "grad_norm": 0.357421875, "learning_rate": 1.628499796019833e-05, "loss": 0.9805, "step": 1352 }, { "epoch": 1.0666141111549075, "grad_norm": 0.34375, "learning_rate": 1.627785665977532e-05, "loss": 0.9561, "step": 1353 }, { "epoch": 1.0674024438312968, "grad_norm": 0.365234375, "learning_rate": 1.627071007128089e-05, "loss": 0.9428, "step": 1354 }, { "epoch": 1.0681907765076863, "grad_norm": 0.3515625, "learning_rate": 1.6263558200734875e-05, "loss": 0.9204, "step": 1355 }, { "epoch": 1.0689791091840757, "grad_norm": 0.353515625, "learning_rate": 1.6256401054161565e-05, "loss": 0.951, "step": 1356 }, { "epoch": 1.069767441860465, "grad_norm": 0.33984375, "learning_rate": 1.624923863758969e-05, "loss": 0.95, "step": 1357 }, { "epoch": 1.0705557745368546, "grad_norm": 0.34765625, "learning_rate": 1.624207095705241e-05, "loss": 0.9381, "step": 1358 }, { "epoch": 1.071344107213244, "grad_norm": 0.341796875, "learning_rate": 1.6234898018587336e-05, "loss": 0.9507, "step": 1359 }, { "epoch": 1.0721324398896335, "grad_norm": 0.353515625, "learning_rate": 1.6227719828236503e-05, "loss": 0.9367, "step": 1360 }, { "epoch": 1.0729207725660228, "grad_norm": 0.35546875, "learning_rate": 1.6220536392046357e-05, "loss": 0.9027, "step": 1361 }, { "epoch": 1.0737091052424124, "grad_norm": 0.34375, "learning_rate": 1.621334771606778e-05, "loss": 0.9606, "step": 1362 }, { "epoch": 1.0744974379188017, "grad_norm": 0.34375, "learning_rate": 1.6206153806356062e-05, "loss": 0.9537, "step": 1363 }, { "epoch": 1.0752857705951913, "grad_norm": 0.337890625, "learning_rate": 1.6198954668970893e-05, "loss": 0.9289, "step": 1364 }, { "epoch": 1.0760741032715806, "grad_norm": 0.34375, "learning_rate": 1.619175030997638e-05, "loss": 0.9706, "step": 1365 }, { "epoch": 1.07686243594797, "grad_norm": 0.341796875, "learning_rate": 1.6184540735441015e-05, "loss": 0.9341, "step": 1366 }, { "epoch": 1.0776507686243595, "grad_norm": 0.341796875, "learning_rate": 1.6177325951437693e-05, "loss": 0.9334, "step": 1367 }, { "epoch": 1.0784391013007488, "grad_norm": 0.345703125, "learning_rate": 1.6170105964043698e-05, "loss": 0.9529, "step": 1368 }, { "epoch": 1.0792274339771384, "grad_norm": 0.35546875, "learning_rate": 1.6162880779340686e-05, "loss": 0.9522, "step": 1369 }, { "epoch": 1.0800157666535277, "grad_norm": 0.353515625, "learning_rate": 1.6155650403414703e-05, "loss": 0.9361, "step": 1370 }, { "epoch": 1.0808040993299173, "grad_norm": 0.34375, "learning_rate": 1.614841484235616e-05, "loss": 0.9499, "step": 1371 }, { "epoch": 1.0815924320063066, "grad_norm": 0.3515625, "learning_rate": 1.6141174102259838e-05, "loss": 0.9207, "step": 1372 }, { "epoch": 1.0823807646826962, "grad_norm": 0.353515625, "learning_rate": 1.613392818922489e-05, "loss": 0.9501, "step": 1373 }, { "epoch": 1.0831690973590855, "grad_norm": 0.365234375, "learning_rate": 1.61266771093548e-05, "loss": 0.9306, "step": 1374 }, { "epoch": 1.083957430035475, "grad_norm": 0.337890625, "learning_rate": 1.6119420868757433e-05, "loss": 0.9181, "step": 1375 }, { "epoch": 1.0847457627118644, "grad_norm": 0.439453125, "learning_rate": 1.611215947354499e-05, "loss": 0.9247, "step": 1376 }, { "epoch": 1.0855340953882537, "grad_norm": 0.34765625, "learning_rate": 1.610489292983401e-05, "loss": 0.9662, "step": 1377 }, { "epoch": 1.0863224280646433, "grad_norm": 0.3515625, "learning_rate": 1.6097621243745373e-05, "loss": 0.9784, "step": 1378 }, { "epoch": 1.0871107607410326, "grad_norm": 0.341796875, "learning_rate": 1.6090344421404286e-05, "loss": 0.9431, "step": 1379 }, { "epoch": 1.0878990934174222, "grad_norm": 0.34765625, "learning_rate": 1.6083062468940297e-05, "loss": 1.0208, "step": 1380 }, { "epoch": 1.0886874260938115, "grad_norm": 0.45703125, "learning_rate": 1.6075775392487252e-05, "loss": 0.9521, "step": 1381 }, { "epoch": 1.089475758770201, "grad_norm": 0.349609375, "learning_rate": 1.606848319818333e-05, "loss": 0.9233, "step": 1382 }, { "epoch": 1.0902640914465904, "grad_norm": 0.349609375, "learning_rate": 1.606118589217102e-05, "loss": 0.9399, "step": 1383 }, { "epoch": 1.09105242412298, "grad_norm": 0.34765625, "learning_rate": 1.6053883480597115e-05, "loss": 0.9332, "step": 1384 }, { "epoch": 1.0918407567993693, "grad_norm": 0.35546875, "learning_rate": 1.60465759696127e-05, "loss": 0.9366, "step": 1385 }, { "epoch": 1.0926290894757589, "grad_norm": 0.34765625, "learning_rate": 1.6039263365373167e-05, "loss": 0.946, "step": 1386 }, { "epoch": 1.0934174221521482, "grad_norm": 0.34765625, "learning_rate": 1.603194567403819e-05, "loss": 0.9696, "step": 1387 }, { "epoch": 1.0942057548285375, "grad_norm": 0.349609375, "learning_rate": 1.6024622901771736e-05, "loss": 0.9185, "step": 1388 }, { "epoch": 1.094994087504927, "grad_norm": 0.357421875, "learning_rate": 1.6017295054742045e-05, "loss": 0.9366, "step": 1389 }, { "epoch": 1.0957824201813164, "grad_norm": 0.359375, "learning_rate": 1.6009962139121635e-05, "loss": 0.9558, "step": 1390 }, { "epoch": 1.096570752857706, "grad_norm": 0.34375, "learning_rate": 1.6002624161087293e-05, "loss": 0.9286, "step": 1391 }, { "epoch": 1.0973590855340953, "grad_norm": 0.34375, "learning_rate": 1.5995281126820067e-05, "loss": 0.9412, "step": 1392 }, { "epoch": 1.098147418210485, "grad_norm": 0.353515625, "learning_rate": 1.5987933042505272e-05, "loss": 0.979, "step": 1393 }, { "epoch": 1.0989357508868742, "grad_norm": 0.353515625, "learning_rate": 1.598057991433247e-05, "loss": 0.9625, "step": 1394 }, { "epoch": 1.0997240835632638, "grad_norm": 0.341796875, "learning_rate": 1.5973221748495472e-05, "loss": 0.9465, "step": 1395 }, { "epoch": 1.1005124162396531, "grad_norm": 0.35546875, "learning_rate": 1.596585855119233e-05, "loss": 0.9567, "step": 1396 }, { "epoch": 1.1013007489160427, "grad_norm": 0.349609375, "learning_rate": 1.595849032862535e-05, "loss": 0.9241, "step": 1397 }, { "epoch": 1.102089081592432, "grad_norm": 0.34765625, "learning_rate": 1.5951117087001048e-05, "loss": 0.9512, "step": 1398 }, { "epoch": 1.1028774142688214, "grad_norm": 0.349609375, "learning_rate": 1.5943738832530183e-05, "loss": 0.9325, "step": 1399 }, { "epoch": 1.103665746945211, "grad_norm": 0.34765625, "learning_rate": 1.5936355571427734e-05, "loss": 0.9536, "step": 1400 }, { "epoch": 1.1044540796216002, "grad_norm": 0.349609375, "learning_rate": 1.592896730991289e-05, "loss": 0.9256, "step": 1401 }, { "epoch": 1.1052424122979898, "grad_norm": 0.3515625, "learning_rate": 1.5921574054209064e-05, "loss": 0.9034, "step": 1402 }, { "epoch": 1.1060307449743791, "grad_norm": 0.337890625, "learning_rate": 1.5914175810543868e-05, "loss": 0.9633, "step": 1403 }, { "epoch": 1.1068190776507687, "grad_norm": 0.349609375, "learning_rate": 1.590677258514911e-05, "loss": 0.9355, "step": 1404 }, { "epoch": 1.107607410327158, "grad_norm": 0.38671875, "learning_rate": 1.5899364384260813e-05, "loss": 0.9606, "step": 1405 }, { "epoch": 1.1083957430035476, "grad_norm": 0.337890625, "learning_rate": 1.5891951214119167e-05, "loss": 0.929, "step": 1406 }, { "epoch": 1.109184075679937, "grad_norm": 0.3515625, "learning_rate": 1.588453308096857e-05, "loss": 0.9448, "step": 1407 }, { "epoch": 1.1099724083563263, "grad_norm": 0.349609375, "learning_rate": 1.587710999105759e-05, "loss": 0.9816, "step": 1408 }, { "epoch": 1.1107607410327158, "grad_norm": 0.3515625, "learning_rate": 1.586968195063896e-05, "loss": 0.9653, "step": 1409 }, { "epoch": 1.1115490737091052, "grad_norm": 0.345703125, "learning_rate": 1.5862248965969604e-05, "loss": 0.9112, "step": 1410 }, { "epoch": 1.1123374063854947, "grad_norm": 0.349609375, "learning_rate": 1.58548110433106e-05, "loss": 0.9279, "step": 1411 }, { "epoch": 1.113125739061884, "grad_norm": 0.337890625, "learning_rate": 1.584736818892718e-05, "loss": 0.9391, "step": 1412 }, { "epoch": 1.1139140717382736, "grad_norm": 0.365234375, "learning_rate": 1.5839920409088743e-05, "loss": 0.9658, "step": 1413 }, { "epoch": 1.114702404414663, "grad_norm": 0.341796875, "learning_rate": 1.5832467710068825e-05, "loss": 0.9457, "step": 1414 }, { "epoch": 1.1154907370910525, "grad_norm": 0.353515625, "learning_rate": 1.5825010098145117e-05, "loss": 0.9144, "step": 1415 }, { "epoch": 1.1162790697674418, "grad_norm": 0.361328125, "learning_rate": 1.5817547579599436e-05, "loss": 0.9444, "step": 1416 }, { "epoch": 1.1170674024438312, "grad_norm": 0.365234375, "learning_rate": 1.5810080160717737e-05, "loss": 0.9657, "step": 1417 }, { "epoch": 1.1178557351202207, "grad_norm": 0.357421875, "learning_rate": 1.580260784779011e-05, "loss": 0.9655, "step": 1418 }, { "epoch": 1.11864406779661, "grad_norm": 0.373046875, "learning_rate": 1.5795130647110755e-05, "loss": 0.9202, "step": 1419 }, { "epoch": 1.1194324004729996, "grad_norm": 0.35546875, "learning_rate": 1.5787648564978e-05, "loss": 0.9044, "step": 1420 }, { "epoch": 1.120220733149389, "grad_norm": 0.361328125, "learning_rate": 1.5780161607694276e-05, "loss": 0.9825, "step": 1421 }, { "epoch": 1.1210090658257785, "grad_norm": 0.357421875, "learning_rate": 1.577266978156613e-05, "loss": 0.94, "step": 1422 }, { "epoch": 1.1217973985021679, "grad_norm": 0.373046875, "learning_rate": 1.5765173092904202e-05, "loss": 0.9779, "step": 1423 }, { "epoch": 1.1225857311785574, "grad_norm": 0.369140625, "learning_rate": 1.575767154802323e-05, "loss": 0.988, "step": 1424 }, { "epoch": 1.1233740638549468, "grad_norm": 0.3671875, "learning_rate": 1.5750165153242048e-05, "loss": 0.932, "step": 1425 }, { "epoch": 1.1241623965313363, "grad_norm": 0.35546875, "learning_rate": 1.574265391488356e-05, "loss": 0.92, "step": 1426 }, { "epoch": 1.1249507292077257, "grad_norm": 0.365234375, "learning_rate": 1.5735137839274775e-05, "loss": 0.917, "step": 1427 }, { "epoch": 1.125739061884115, "grad_norm": 0.341796875, "learning_rate": 1.5727616932746748e-05, "loss": 0.9159, "step": 1428 }, { "epoch": 1.1265273945605045, "grad_norm": 0.359375, "learning_rate": 1.572009120163463e-05, "loss": 0.9704, "step": 1429 }, { "epoch": 1.1273157272368939, "grad_norm": 0.33984375, "learning_rate": 1.571256065227761e-05, "loss": 0.9331, "step": 1430 }, { "epoch": 1.1281040599132834, "grad_norm": 0.36328125, "learning_rate": 1.570502529101896e-05, "loss": 0.9534, "step": 1431 }, { "epoch": 1.1288923925896728, "grad_norm": 0.357421875, "learning_rate": 1.569748512420599e-05, "loss": 0.9588, "step": 1432 }, { "epoch": 1.1296807252660623, "grad_norm": 0.3671875, "learning_rate": 1.5689940158190064e-05, "loss": 0.9455, "step": 1433 }, { "epoch": 1.1304690579424517, "grad_norm": 0.349609375, "learning_rate": 1.5682390399326585e-05, "loss": 0.9562, "step": 1434 }, { "epoch": 1.1312573906188412, "grad_norm": 0.353515625, "learning_rate": 1.5674835853974992e-05, "loss": 0.9608, "step": 1435 }, { "epoch": 1.1320457232952306, "grad_norm": 0.353515625, "learning_rate": 1.5667276528498766e-05, "loss": 0.9693, "step": 1436 }, { "epoch": 1.1328340559716201, "grad_norm": 0.341796875, "learning_rate": 1.5659712429265403e-05, "loss": 0.9136, "step": 1437 }, { "epoch": 1.1336223886480095, "grad_norm": 0.376953125, "learning_rate": 1.5652143562646416e-05, "loss": 0.9798, "step": 1438 }, { "epoch": 1.1344107213243988, "grad_norm": 0.34375, "learning_rate": 1.5644569935017357e-05, "loss": 0.9778, "step": 1439 }, { "epoch": 1.1351990540007884, "grad_norm": 0.34765625, "learning_rate": 1.5636991552757762e-05, "loss": 0.9222, "step": 1440 }, { "epoch": 1.1359873866771777, "grad_norm": 0.33984375, "learning_rate": 1.5629408422251194e-05, "loss": 0.9171, "step": 1441 }, { "epoch": 1.1367757193535672, "grad_norm": 0.3515625, "learning_rate": 1.5621820549885192e-05, "loss": 0.9451, "step": 1442 }, { "epoch": 1.1375640520299566, "grad_norm": 0.34375, "learning_rate": 1.561422794205131e-05, "loss": 0.963, "step": 1443 }, { "epoch": 1.1383523847063461, "grad_norm": 0.34375, "learning_rate": 1.5606630605145084e-05, "loss": 0.919, "step": 1444 }, { "epoch": 1.1391407173827355, "grad_norm": 0.34765625, "learning_rate": 1.5599028545566028e-05, "loss": 0.9583, "step": 1445 }, { "epoch": 1.139929050059125, "grad_norm": 0.37109375, "learning_rate": 1.5591421769717642e-05, "loss": 0.9252, "step": 1446 }, { "epoch": 1.1407173827355144, "grad_norm": 0.35546875, "learning_rate": 1.5583810284007395e-05, "loss": 0.9171, "step": 1447 }, { "epoch": 1.141505715411904, "grad_norm": 0.34375, "learning_rate": 1.5576194094846723e-05, "loss": 0.9085, "step": 1448 }, { "epoch": 1.1422940480882933, "grad_norm": 0.337890625, "learning_rate": 1.5568573208651027e-05, "loss": 0.9337, "step": 1449 }, { "epoch": 1.1430823807646826, "grad_norm": 0.345703125, "learning_rate": 1.5560947631839654e-05, "loss": 0.9519, "step": 1450 }, { "epoch": 1.1438707134410722, "grad_norm": 0.345703125, "learning_rate": 1.5553317370835916e-05, "loss": 0.9278, "step": 1451 }, { "epoch": 1.1446590461174615, "grad_norm": 0.345703125, "learning_rate": 1.5545682432067068e-05, "loss": 0.8989, "step": 1452 }, { "epoch": 1.145447378793851, "grad_norm": 0.33203125, "learning_rate": 1.5538042821964293e-05, "loss": 0.9326, "step": 1453 }, { "epoch": 1.1462357114702404, "grad_norm": 0.34375, "learning_rate": 1.553039854696273e-05, "loss": 0.9243, "step": 1454 }, { "epoch": 1.14702404414663, "grad_norm": 0.359375, "learning_rate": 1.5522749613501424e-05, "loss": 0.9221, "step": 1455 }, { "epoch": 1.1478123768230193, "grad_norm": 0.3671875, "learning_rate": 1.551509602802336e-05, "loss": 0.9272, "step": 1456 }, { "epoch": 1.1486007094994088, "grad_norm": 0.3515625, "learning_rate": 1.5507437796975436e-05, "loss": 0.9285, "step": 1457 }, { "epoch": 1.1493890421757982, "grad_norm": 0.34375, "learning_rate": 1.5499774926808468e-05, "loss": 0.9238, "step": 1458 }, { "epoch": 1.1501773748521877, "grad_norm": 0.38671875, "learning_rate": 1.5492107423977167e-05, "loss": 0.9925, "step": 1459 }, { "epoch": 1.150965707528577, "grad_norm": 0.359375, "learning_rate": 1.548443529494016e-05, "loss": 0.9215, "step": 1460 }, { "epoch": 1.1517540402049664, "grad_norm": 0.341796875, "learning_rate": 1.5476758546159966e-05, "loss": 0.8907, "step": 1461 }, { "epoch": 1.152542372881356, "grad_norm": 0.3359375, "learning_rate": 1.5469077184103e-05, "loss": 0.9383, "step": 1462 }, { "epoch": 1.1533307055577453, "grad_norm": 0.36328125, "learning_rate": 1.546139121523955e-05, "loss": 0.9601, "step": 1463 }, { "epoch": 1.1541190382341349, "grad_norm": 0.357421875, "learning_rate": 1.5453700646043793e-05, "loss": 0.9481, "step": 1464 }, { "epoch": 1.1549073709105242, "grad_norm": 0.349609375, "learning_rate": 1.5446005482993783e-05, "loss": 0.9321, "step": 1465 }, { "epoch": 1.1556957035869138, "grad_norm": 0.349609375, "learning_rate": 1.5438305732571445e-05, "loss": 0.9464, "step": 1466 }, { "epoch": 1.156484036263303, "grad_norm": 0.3515625, "learning_rate": 1.5430601401262554e-05, "loss": 0.9456, "step": 1467 }, { "epoch": 1.1572723689396924, "grad_norm": 0.36328125, "learning_rate": 1.5422892495556764e-05, "loss": 0.9169, "step": 1468 }, { "epoch": 1.158060701616082, "grad_norm": 0.341796875, "learning_rate": 1.5415179021947566e-05, "loss": 0.9627, "step": 1469 }, { "epoch": 1.1588490342924715, "grad_norm": 0.333984375, "learning_rate": 1.540746098693231e-05, "loss": 0.9481, "step": 1470 }, { "epoch": 1.1596373669688609, "grad_norm": 0.345703125, "learning_rate": 1.5399738397012177e-05, "loss": 0.912, "step": 1471 }, { "epoch": 1.1604256996452502, "grad_norm": 0.3671875, "learning_rate": 1.5392011258692198e-05, "loss": 0.941, "step": 1472 }, { "epoch": 1.1612140323216398, "grad_norm": 0.359375, "learning_rate": 1.5384279578481223e-05, "loss": 0.9389, "step": 1473 }, { "epoch": 1.1620023649980291, "grad_norm": 0.359375, "learning_rate": 1.5376543362891932e-05, "loss": 0.9346, "step": 1474 }, { "epoch": 1.1627906976744187, "grad_norm": 0.337890625, "learning_rate": 1.536880261844083e-05, "loss": 0.9548, "step": 1475 }, { "epoch": 1.163579030350808, "grad_norm": 0.365234375, "learning_rate": 1.536105735164823e-05, "loss": 0.9748, "step": 1476 }, { "epoch": 1.1643673630271976, "grad_norm": 0.353515625, "learning_rate": 1.5353307569038255e-05, "loss": 0.9452, "step": 1477 }, { "epoch": 1.165155695703587, "grad_norm": 0.3515625, "learning_rate": 1.5345553277138846e-05, "loss": 0.939, "step": 1478 }, { "epoch": 1.1659440283799762, "grad_norm": 0.341796875, "learning_rate": 1.5337794482481714e-05, "loss": 0.9486, "step": 1479 }, { "epoch": 1.1667323610563658, "grad_norm": 0.353515625, "learning_rate": 1.5330031191602395e-05, "loss": 0.9414, "step": 1480 }, { "epoch": 1.1675206937327551, "grad_norm": 0.3515625, "learning_rate": 1.5322263411040186e-05, "loss": 0.9455, "step": 1481 }, { "epoch": 1.1683090264091447, "grad_norm": 0.34765625, "learning_rate": 1.531449114733818e-05, "loss": 0.9452, "step": 1482 }, { "epoch": 1.169097359085534, "grad_norm": 0.345703125, "learning_rate": 1.530671440704324e-05, "loss": 0.9338, "step": 1483 }, { "epoch": 1.1698856917619236, "grad_norm": 0.34375, "learning_rate": 1.529893319670601e-05, "loss": 0.948, "step": 1484 }, { "epoch": 1.170674024438313, "grad_norm": 0.36328125, "learning_rate": 1.5291147522880887e-05, "loss": 0.9328, "step": 1485 }, { "epoch": 1.1714623571147025, "grad_norm": 0.357421875, "learning_rate": 1.528335739212603e-05, "loss": 0.9676, "step": 1486 }, { "epoch": 1.1722506897910918, "grad_norm": 0.36328125, "learning_rate": 1.5275562811003363e-05, "loss": 0.9243, "step": 1487 }, { "epoch": 1.1730390224674814, "grad_norm": 0.349609375, "learning_rate": 1.5267763786078544e-05, "loss": 0.9406, "step": 1488 }, { "epoch": 1.1738273551438707, "grad_norm": 0.37890625, "learning_rate": 1.525996032392098e-05, "loss": 0.928, "step": 1489 }, { "epoch": 1.17461568782026, "grad_norm": 0.34765625, "learning_rate": 1.5252152431103824e-05, "loss": 0.9423, "step": 1490 }, { "epoch": 1.1754040204966496, "grad_norm": 0.34765625, "learning_rate": 1.5244340114203946e-05, "loss": 0.9447, "step": 1491 }, { "epoch": 1.176192353173039, "grad_norm": 0.345703125, "learning_rate": 1.5236523379801954e-05, "loss": 0.9279, "step": 1492 }, { "epoch": 1.1769806858494285, "grad_norm": 0.33984375, "learning_rate": 1.5228702234482172e-05, "loss": 0.9207, "step": 1493 }, { "epoch": 1.1777690185258178, "grad_norm": 0.359375, "learning_rate": 1.522087668483264e-05, "loss": 0.9543, "step": 1494 }, { "epoch": 1.1785573512022074, "grad_norm": 0.361328125, "learning_rate": 1.5213046737445108e-05, "loss": 0.9137, "step": 1495 }, { "epoch": 1.1793456838785967, "grad_norm": 0.35546875, "learning_rate": 1.5205212398915034e-05, "loss": 0.9152, "step": 1496 }, { "epoch": 1.1801340165549863, "grad_norm": 0.345703125, "learning_rate": 1.5197373675841572e-05, "loss": 0.9268, "step": 1497 }, { "epoch": 1.1809223492313756, "grad_norm": 0.341796875, "learning_rate": 1.5189530574827567e-05, "loss": 0.9355, "step": 1498 }, { "epoch": 1.1817106819077652, "grad_norm": 0.341796875, "learning_rate": 1.5181683102479553e-05, "loss": 0.8908, "step": 1499 }, { "epoch": 1.1824990145841545, "grad_norm": 0.349609375, "learning_rate": 1.5173831265407749e-05, "loss": 0.9727, "step": 1500 }, { "epoch": 1.1832873472605439, "grad_norm": 0.369140625, "learning_rate": 1.5165975070226045e-05, "loss": 0.9489, "step": 1501 }, { "epoch": 1.1840756799369334, "grad_norm": 0.359375, "learning_rate": 1.5158114523552011e-05, "loss": 0.9655, "step": 1502 }, { "epoch": 1.1848640126133227, "grad_norm": 0.33984375, "learning_rate": 1.5150249632006871e-05, "loss": 0.9262, "step": 1503 }, { "epoch": 1.1856523452897123, "grad_norm": 0.408203125, "learning_rate": 1.5142380402215519e-05, "loss": 0.9317, "step": 1504 }, { "epoch": 1.1864406779661016, "grad_norm": 0.37890625, "learning_rate": 1.5134506840806498e-05, "loss": 0.9645, "step": 1505 }, { "epoch": 1.1872290106424912, "grad_norm": 0.35546875, "learning_rate": 1.5126628954412002e-05, "loss": 0.9124, "step": 1506 }, { "epoch": 1.1880173433188805, "grad_norm": 0.34765625, "learning_rate": 1.5118746749667862e-05, "loss": 0.9014, "step": 1507 }, { "epoch": 1.18880567599527, "grad_norm": 0.361328125, "learning_rate": 1.5110860233213556e-05, "loss": 0.9479, "step": 1508 }, { "epoch": 1.1895940086716594, "grad_norm": 0.369140625, "learning_rate": 1.5102969411692186e-05, "loss": 0.9552, "step": 1509 }, { "epoch": 1.190382341348049, "grad_norm": 0.3515625, "learning_rate": 1.5095074291750486e-05, "loss": 0.9306, "step": 1510 }, { "epoch": 1.1911706740244383, "grad_norm": 0.369140625, "learning_rate": 1.5087174880038808e-05, "loss": 0.942, "step": 1511 }, { "epoch": 1.1919590067008277, "grad_norm": 0.3671875, "learning_rate": 1.5079271183211118e-05, "loss": 0.9584, "step": 1512 }, { "epoch": 1.1927473393772172, "grad_norm": 0.357421875, "learning_rate": 1.5071363207924994e-05, "loss": 0.9495, "step": 1513 }, { "epoch": 1.1935356720536066, "grad_norm": 0.349609375, "learning_rate": 1.5063450960841616e-05, "loss": 0.9535, "step": 1514 }, { "epoch": 1.1943240047299961, "grad_norm": 0.357421875, "learning_rate": 1.5055534448625766e-05, "loss": 0.9501, "step": 1515 }, { "epoch": 1.1951123374063854, "grad_norm": 0.353515625, "learning_rate": 1.504761367794581e-05, "loss": 0.9012, "step": 1516 }, { "epoch": 1.195900670082775, "grad_norm": 0.3515625, "learning_rate": 1.5039688655473712e-05, "loss": 0.9243, "step": 1517 }, { "epoch": 1.1966890027591643, "grad_norm": 0.3515625, "learning_rate": 1.5031759387885008e-05, "loss": 0.9437, "step": 1518 }, { "epoch": 1.1974773354355537, "grad_norm": 0.34375, "learning_rate": 1.502382588185882e-05, "loss": 0.9329, "step": 1519 }, { "epoch": 1.1982656681119432, "grad_norm": 0.34375, "learning_rate": 1.5015888144077826e-05, "loss": 0.9811, "step": 1520 }, { "epoch": 1.1990540007883328, "grad_norm": 0.345703125, "learning_rate": 1.5007946181228286e-05, "loss": 0.9567, "step": 1521 }, { "epoch": 1.1998423334647221, "grad_norm": 0.353515625, "learning_rate": 1.5000000000000002e-05, "loss": 0.9096, "step": 1522 }, { "epoch": 1.2006306661411115, "grad_norm": 0.357421875, "learning_rate": 1.499204960708634e-05, "loss": 0.9103, "step": 1523 }, { "epoch": 1.201418998817501, "grad_norm": 0.33984375, "learning_rate": 1.4984095009184215e-05, "loss": 0.9392, "step": 1524 }, { "epoch": 1.2022073314938904, "grad_norm": 0.341796875, "learning_rate": 1.497613621299407e-05, "loss": 0.9136, "step": 1525 }, { "epoch": 1.20299566417028, "grad_norm": 0.34375, "learning_rate": 1.4968173225219904e-05, "loss": 0.9402, "step": 1526 }, { "epoch": 1.2037839968466693, "grad_norm": 0.408203125, "learning_rate": 1.496020605256923e-05, "loss": 0.9332, "step": 1527 }, { "epoch": 1.2045723295230588, "grad_norm": 0.357421875, "learning_rate": 1.4952234701753097e-05, "loss": 0.9428, "step": 1528 }, { "epoch": 1.2053606621994482, "grad_norm": 0.345703125, "learning_rate": 1.4944259179486068e-05, "loss": 0.9375, "step": 1529 }, { "epoch": 1.2061489948758375, "grad_norm": 0.345703125, "learning_rate": 1.4936279492486222e-05, "loss": 0.9692, "step": 1530 }, { "epoch": 1.206937327552227, "grad_norm": 0.349609375, "learning_rate": 1.4928295647475141e-05, "loss": 0.9571, "step": 1531 }, { "epoch": 1.2077256602286164, "grad_norm": 0.357421875, "learning_rate": 1.4920307651177921e-05, "loss": 0.9332, "step": 1532 }, { "epoch": 1.208513992905006, "grad_norm": 0.35546875, "learning_rate": 1.4912315510323138e-05, "loss": 0.9485, "step": 1533 }, { "epoch": 1.2093023255813953, "grad_norm": 0.353515625, "learning_rate": 1.4904319231642878e-05, "loss": 0.9607, "step": 1534 }, { "epoch": 1.2100906582577848, "grad_norm": 0.357421875, "learning_rate": 1.4896318821872697e-05, "loss": 0.9445, "step": 1535 }, { "epoch": 1.2108789909341742, "grad_norm": 0.34765625, "learning_rate": 1.488831428775164e-05, "loss": 0.9648, "step": 1536 }, { "epoch": 1.2116673236105637, "grad_norm": 0.337890625, "learning_rate": 1.4880305636022221e-05, "loss": 0.9225, "step": 1537 }, { "epoch": 1.212455656286953, "grad_norm": 0.3359375, "learning_rate": 1.4872292873430425e-05, "loss": 0.9252, "step": 1538 }, { "epoch": 1.2132439889633426, "grad_norm": 0.36328125, "learning_rate": 1.48642760067257e-05, "loss": 0.9889, "step": 1539 }, { "epoch": 1.214032321639732, "grad_norm": 0.34375, "learning_rate": 1.4856255042660945e-05, "loss": 0.9245, "step": 1540 }, { "epoch": 1.2148206543161213, "grad_norm": 0.349609375, "learning_rate": 1.4848229987992522e-05, "loss": 0.9532, "step": 1541 }, { "epoch": 1.2156089869925109, "grad_norm": 0.35546875, "learning_rate": 1.4840200849480226e-05, "loss": 0.9275, "step": 1542 }, { "epoch": 1.2163973196689002, "grad_norm": 0.390625, "learning_rate": 1.4832167633887306e-05, "loss": 0.9382, "step": 1543 }, { "epoch": 1.2171856523452897, "grad_norm": 0.345703125, "learning_rate": 1.4824130347980431e-05, "loss": 0.9377, "step": 1544 }, { "epoch": 1.217973985021679, "grad_norm": 0.345703125, "learning_rate": 1.4816088998529707e-05, "loss": 0.9384, "step": 1545 }, { "epoch": 1.2187623176980686, "grad_norm": 0.34765625, "learning_rate": 1.4808043592308661e-05, "loss": 0.9407, "step": 1546 }, { "epoch": 1.219550650374458, "grad_norm": 0.359375, "learning_rate": 1.4799994136094233e-05, "loss": 0.9297, "step": 1547 }, { "epoch": 1.2203389830508475, "grad_norm": 0.333984375, "learning_rate": 1.4791940636666785e-05, "loss": 0.9193, "step": 1548 }, { "epoch": 1.2211273157272369, "grad_norm": 0.33984375, "learning_rate": 1.4783883100810074e-05, "loss": 0.9137, "step": 1549 }, { "epoch": 1.2219156484036264, "grad_norm": 0.349609375, "learning_rate": 1.477582153531126e-05, "loss": 0.9191, "step": 1550 }, { "epoch": 1.2227039810800158, "grad_norm": 0.3515625, "learning_rate": 1.4767755946960902e-05, "loss": 0.8996, "step": 1551 }, { "epoch": 1.223492313756405, "grad_norm": 0.3515625, "learning_rate": 1.4759686342552945e-05, "loss": 0.9655, "step": 1552 }, { "epoch": 1.2242806464327947, "grad_norm": 0.33984375, "learning_rate": 1.4751612728884711e-05, "loss": 0.9088, "step": 1553 }, { "epoch": 1.225068979109184, "grad_norm": 0.33984375, "learning_rate": 1.474353511275691e-05, "loss": 0.909, "step": 1554 }, { "epoch": 1.2258573117855736, "grad_norm": 0.33984375, "learning_rate": 1.4735453500973611e-05, "loss": 0.9296, "step": 1555 }, { "epoch": 1.226645644461963, "grad_norm": 0.341796875, "learning_rate": 1.472736790034226e-05, "loss": 0.9466, "step": 1556 }, { "epoch": 1.2274339771383524, "grad_norm": 0.34375, "learning_rate": 1.4719278317673655e-05, "loss": 0.9444, "step": 1557 }, { "epoch": 1.2282223098147418, "grad_norm": 0.34375, "learning_rate": 1.4711184759781956e-05, "loss": 0.9234, "step": 1558 }, { "epoch": 1.2290106424911313, "grad_norm": 0.34765625, "learning_rate": 1.470308723348466e-05, "loss": 0.9169, "step": 1559 }, { "epoch": 1.2297989751675207, "grad_norm": 0.36328125, "learning_rate": 1.4694985745602623e-05, "loss": 0.9295, "step": 1560 }, { "epoch": 1.2305873078439102, "grad_norm": 0.361328125, "learning_rate": 1.4686880302960021e-05, "loss": 0.9142, "step": 1561 }, { "epoch": 1.2313756405202996, "grad_norm": 0.359375, "learning_rate": 1.467877091238437e-05, "loss": 0.9743, "step": 1562 }, { "epoch": 1.232163973196689, "grad_norm": 0.3671875, "learning_rate": 1.4670657580706511e-05, "loss": 0.9713, "step": 1563 }, { "epoch": 1.2329523058730785, "grad_norm": 0.341796875, "learning_rate": 1.4662540314760608e-05, "loss": 0.9618, "step": 1564 }, { "epoch": 1.2337406385494678, "grad_norm": 0.353515625, "learning_rate": 1.4654419121384126e-05, "loss": 0.9155, "step": 1565 }, { "epoch": 1.2345289712258574, "grad_norm": 0.373046875, "learning_rate": 1.4646294007417858e-05, "loss": 0.9406, "step": 1566 }, { "epoch": 1.2353173039022467, "grad_norm": 0.34375, "learning_rate": 1.4638164979705883e-05, "loss": 0.9597, "step": 1567 }, { "epoch": 1.2361056365786363, "grad_norm": 0.37890625, "learning_rate": 1.4630032045095582e-05, "loss": 0.9777, "step": 1568 }, { "epoch": 1.2368939692550256, "grad_norm": 0.37109375, "learning_rate": 1.4621895210437627e-05, "loss": 0.915, "step": 1569 }, { "epoch": 1.2376823019314152, "grad_norm": 0.357421875, "learning_rate": 1.4613754482585978e-05, "loss": 0.9616, "step": 1570 }, { "epoch": 1.2384706346078045, "grad_norm": 0.37890625, "learning_rate": 1.4605609868397874e-05, "loss": 0.9502, "step": 1571 }, { "epoch": 1.239258967284194, "grad_norm": 0.341796875, "learning_rate": 1.4597461374733817e-05, "loss": 0.918, "step": 1572 }, { "epoch": 1.2400472999605834, "grad_norm": 0.36328125, "learning_rate": 1.4589309008457594e-05, "loss": 0.9628, "step": 1573 }, { "epoch": 1.2408356326369727, "grad_norm": 0.345703125, "learning_rate": 1.458115277643624e-05, "loss": 0.9188, "step": 1574 }, { "epoch": 1.2416239653133623, "grad_norm": 0.34765625, "learning_rate": 1.4572992685540057e-05, "loss": 0.9744, "step": 1575 }, { "epoch": 1.2424122979897516, "grad_norm": 0.34765625, "learning_rate": 1.4564828742642586e-05, "loss": 0.9221, "step": 1576 }, { "epoch": 1.2432006306661412, "grad_norm": 0.353515625, "learning_rate": 1.4556660954620622e-05, "loss": 0.9206, "step": 1577 }, { "epoch": 1.2439889633425305, "grad_norm": 0.3515625, "learning_rate": 1.4548489328354197e-05, "loss": 0.9532, "step": 1578 }, { "epoch": 1.24477729601892, "grad_norm": 0.341796875, "learning_rate": 1.454031387072657e-05, "loss": 0.8961, "step": 1579 }, { "epoch": 1.2455656286953094, "grad_norm": 0.34765625, "learning_rate": 1.4532134588624236e-05, "loss": 0.9504, "step": 1580 }, { "epoch": 1.2463539613716987, "grad_norm": 0.357421875, "learning_rate": 1.4523951488936905e-05, "loss": 0.9377, "step": 1581 }, { "epoch": 1.2471422940480883, "grad_norm": 0.3515625, "learning_rate": 1.4515764578557512e-05, "loss": 0.9393, "step": 1582 }, { "epoch": 1.2479306267244779, "grad_norm": 0.36328125, "learning_rate": 1.4507573864382187e-05, "loss": 0.9827, "step": 1583 }, { "epoch": 1.2487189594008672, "grad_norm": 0.357421875, "learning_rate": 1.4499379353310275e-05, "loss": 0.9416, "step": 1584 }, { "epoch": 1.2495072920772565, "grad_norm": 0.34765625, "learning_rate": 1.4491181052244317e-05, "loss": 0.952, "step": 1585 }, { "epoch": 1.250295624753646, "grad_norm": 0.3515625, "learning_rate": 1.4482978968090044e-05, "loss": 0.9412, "step": 1586 }, { "epoch": 1.2510839574300354, "grad_norm": 0.34765625, "learning_rate": 1.4474773107756379e-05, "loss": 0.9578, "step": 1587 }, { "epoch": 1.251872290106425, "grad_norm": 0.34375, "learning_rate": 1.4466563478155422e-05, "loss": 0.9221, "step": 1588 }, { "epoch": 1.2526606227828143, "grad_norm": 0.3515625, "learning_rate": 1.4458350086202443e-05, "loss": 0.9788, "step": 1589 }, { "epoch": 1.2534489554592039, "grad_norm": 0.34765625, "learning_rate": 1.4450132938815896e-05, "loss": 0.9138, "step": 1590 }, { "epoch": 1.2542372881355932, "grad_norm": 0.3515625, "learning_rate": 1.4441912042917378e-05, "loss": 0.9747, "step": 1591 }, { "epoch": 1.2550256208119825, "grad_norm": 0.353515625, "learning_rate": 1.4433687405431663e-05, "loss": 0.9238, "step": 1592 }, { "epoch": 1.255813953488372, "grad_norm": 0.34375, "learning_rate": 1.4425459033286664e-05, "loss": 0.9445, "step": 1593 }, { "epoch": 1.2566022861647617, "grad_norm": 0.361328125, "learning_rate": 1.4417226933413446e-05, "loss": 0.958, "step": 1594 }, { "epoch": 1.257390618841151, "grad_norm": 0.3515625, "learning_rate": 1.4408991112746212e-05, "loss": 0.8961, "step": 1595 }, { "epoch": 1.2581789515175403, "grad_norm": 0.34765625, "learning_rate": 1.4400751578222293e-05, "loss": 0.9432, "step": 1596 }, { "epoch": 1.25896728419393, "grad_norm": 0.349609375, "learning_rate": 1.4392508336782167e-05, "loss": 0.9402, "step": 1597 }, { "epoch": 1.2597556168703192, "grad_norm": 0.349609375, "learning_rate": 1.4384261395369405e-05, "loss": 0.9172, "step": 1598 }, { "epoch": 1.2605439495467088, "grad_norm": 0.345703125, "learning_rate": 1.437601076093073e-05, "loss": 0.9819, "step": 1599 }, { "epoch": 1.2613322822230981, "grad_norm": 0.353515625, "learning_rate": 1.4367756440415943e-05, "loss": 0.9638, "step": 1600 }, { "epoch": 1.2621206148994877, "grad_norm": 0.345703125, "learning_rate": 1.435949844077797e-05, "loss": 0.9231, "step": 1601 }, { "epoch": 1.262908947575877, "grad_norm": 0.33984375, "learning_rate": 1.435123676897283e-05, "loss": 0.9591, "step": 1602 }, { "epoch": 1.2636972802522664, "grad_norm": 0.375, "learning_rate": 1.4342971431959634e-05, "loss": 0.9489, "step": 1603 }, { "epoch": 1.264485612928656, "grad_norm": 0.3359375, "learning_rate": 1.4334702436700583e-05, "loss": 0.9287, "step": 1604 }, { "epoch": 1.2652739456050452, "grad_norm": 0.435546875, "learning_rate": 1.4326429790160958e-05, "loss": 0.9749, "step": 1605 }, { "epoch": 1.2660622782814348, "grad_norm": 0.3359375, "learning_rate": 1.4318153499309118e-05, "loss": 0.9307, "step": 1606 }, { "epoch": 1.2668506109578241, "grad_norm": 0.349609375, "learning_rate": 1.4309873571116486e-05, "loss": 0.9596, "step": 1607 }, { "epoch": 1.2676389436342137, "grad_norm": 0.35546875, "learning_rate": 1.4301590012557553e-05, "loss": 0.93, "step": 1608 }, { "epoch": 1.268427276310603, "grad_norm": 0.33984375, "learning_rate": 1.4293302830609869e-05, "loss": 0.9382, "step": 1609 }, { "epoch": 1.2692156089869926, "grad_norm": 0.353515625, "learning_rate": 1.4285012032254035e-05, "loss": 0.9606, "step": 1610 }, { "epoch": 1.270003941663382, "grad_norm": 17.5, "learning_rate": 1.4276717624473697e-05, "loss": 0.9427, "step": 1611 }, { "epoch": 1.2707922743397715, "grad_norm": 0.35546875, "learning_rate": 1.4268419614255545e-05, "loss": 0.9299, "step": 1612 }, { "epoch": 1.2715806070161608, "grad_norm": 0.34765625, "learning_rate": 1.4260118008589294e-05, "loss": 0.9331, "step": 1613 }, { "epoch": 1.2723689396925502, "grad_norm": 0.349609375, "learning_rate": 1.4251812814467701e-05, "loss": 0.9476, "step": 1614 }, { "epoch": 1.2731572723689397, "grad_norm": 0.345703125, "learning_rate": 1.4243504038886531e-05, "loss": 0.9662, "step": 1615 }, { "epoch": 1.273945605045329, "grad_norm": 0.341796875, "learning_rate": 1.4235191688844585e-05, "loss": 0.9105, "step": 1616 }, { "epoch": 1.2747339377217186, "grad_norm": 0.35546875, "learning_rate": 1.4226875771343656e-05, "loss": 0.935, "step": 1617 }, { "epoch": 1.275522270398108, "grad_norm": 0.365234375, "learning_rate": 1.4218556293388548e-05, "loss": 0.9528, "step": 1618 }, { "epoch": 1.2763106030744975, "grad_norm": 0.34765625, "learning_rate": 1.421023326198707e-05, "loss": 0.9237, "step": 1619 }, { "epoch": 1.2770989357508868, "grad_norm": 0.353515625, "learning_rate": 1.420190668415002e-05, "loss": 0.9232, "step": 1620 }, { "epoch": 1.2778872684272762, "grad_norm": 0.34765625, "learning_rate": 1.4193576566891181e-05, "loss": 1.0001, "step": 1621 }, { "epoch": 1.2786756011036657, "grad_norm": 0.3515625, "learning_rate": 1.418524291722732e-05, "loss": 0.9149, "step": 1622 }, { "epoch": 1.2794639337800553, "grad_norm": 0.349609375, "learning_rate": 1.417690574217818e-05, "loss": 0.9599, "step": 1623 }, { "epoch": 1.2802522664564446, "grad_norm": 0.3359375, "learning_rate": 1.4168565048766475e-05, "loss": 0.9099, "step": 1624 }, { "epoch": 1.281040599132834, "grad_norm": 0.349609375, "learning_rate": 1.4160220844017874e-05, "loss": 0.9866, "step": 1625 }, { "epoch": 1.2818289318092235, "grad_norm": 0.357421875, "learning_rate": 1.4151873134961014e-05, "loss": 0.9364, "step": 1626 }, { "epoch": 1.2826172644856129, "grad_norm": 0.353515625, "learning_rate": 1.4143521928627479e-05, "loss": 0.9421, "step": 1627 }, { "epoch": 1.2834055971620024, "grad_norm": 0.3515625, "learning_rate": 1.4135167232051802e-05, "loss": 0.9441, "step": 1628 }, { "epoch": 1.2841939298383918, "grad_norm": 0.341796875, "learning_rate": 1.4126809052271453e-05, "loss": 0.9442, "step": 1629 }, { "epoch": 1.2849822625147813, "grad_norm": 0.34765625, "learning_rate": 1.4118447396326832e-05, "loss": 0.9447, "step": 1630 }, { "epoch": 1.2857705951911707, "grad_norm": 0.34765625, "learning_rate": 1.4110082271261278e-05, "loss": 0.9346, "step": 1631 }, { "epoch": 1.28655892786756, "grad_norm": 0.345703125, "learning_rate": 1.4101713684121042e-05, "loss": 0.9579, "step": 1632 }, { "epoch": 1.2873472605439495, "grad_norm": 0.36328125, "learning_rate": 1.4093341641955298e-05, "loss": 0.9446, "step": 1633 }, { "epoch": 1.288135593220339, "grad_norm": 0.35546875, "learning_rate": 1.4084966151816124e-05, "loss": 0.9588, "step": 1634 }, { "epoch": 1.2889239258967284, "grad_norm": 0.34375, "learning_rate": 1.407658722075851e-05, "loss": 0.9272, "step": 1635 }, { "epoch": 1.2897122585731178, "grad_norm": 0.33984375, "learning_rate": 1.4068204855840338e-05, "loss": 0.9316, "step": 1636 }, { "epoch": 1.2905005912495073, "grad_norm": 0.349609375, "learning_rate": 1.4059819064122382e-05, "loss": 0.9252, "step": 1637 }, { "epoch": 1.2912889239258967, "grad_norm": 0.33984375, "learning_rate": 1.4051429852668312e-05, "loss": 0.9291, "step": 1638 }, { "epoch": 1.2920772566022862, "grad_norm": 0.345703125, "learning_rate": 1.4043037228544667e-05, "loss": 0.9678, "step": 1639 }, { "epoch": 1.2928655892786756, "grad_norm": 0.373046875, "learning_rate": 1.4034641198820866e-05, "loss": 0.9409, "step": 1640 }, { "epoch": 1.2936539219550651, "grad_norm": 0.34765625, "learning_rate": 1.4026241770569198e-05, "loss": 0.9538, "step": 1641 }, { "epoch": 1.2944422546314545, "grad_norm": 0.43359375, "learning_rate": 1.4017838950864808e-05, "loss": 0.9398, "step": 1642 }, { "epoch": 1.2952305873078438, "grad_norm": 0.345703125, "learning_rate": 1.400943274678571e-05, "loss": 0.8944, "step": 1643 }, { "epoch": 1.2960189199842334, "grad_norm": 0.345703125, "learning_rate": 1.4001023165412754e-05, "loss": 0.9465, "step": 1644 }, { "epoch": 1.296807252660623, "grad_norm": 0.3359375, "learning_rate": 1.3992610213829649e-05, "loss": 0.9266, "step": 1645 }, { "epoch": 1.2975955853370122, "grad_norm": 0.34375, "learning_rate": 1.3984193899122932e-05, "loss": 0.9393, "step": 1646 }, { "epoch": 1.2983839180134016, "grad_norm": 0.337890625, "learning_rate": 1.3975774228381975e-05, "loss": 0.9029, "step": 1647 }, { "epoch": 1.2991722506897911, "grad_norm": 0.359375, "learning_rate": 1.3967351208698985e-05, "loss": 0.9877, "step": 1648 }, { "epoch": 1.2999605833661805, "grad_norm": 0.34765625, "learning_rate": 1.3958924847168977e-05, "loss": 0.9857, "step": 1649 }, { "epoch": 1.30074891604257, "grad_norm": 0.361328125, "learning_rate": 1.3950495150889793e-05, "loss": 0.9247, "step": 1650 }, { "epoch": 1.3015372487189594, "grad_norm": 0.353515625, "learning_rate": 1.3942062126962078e-05, "loss": 0.9695, "step": 1651 }, { "epoch": 1.302325581395349, "grad_norm": 0.3359375, "learning_rate": 1.3933625782489275e-05, "loss": 0.9296, "step": 1652 }, { "epoch": 1.3031139140717383, "grad_norm": 0.33984375, "learning_rate": 1.3925186124577639e-05, "loss": 0.9399, "step": 1653 }, { "epoch": 1.3039022467481276, "grad_norm": 0.341796875, "learning_rate": 1.3916743160336197e-05, "loss": 0.9235, "step": 1654 }, { "epoch": 1.3046905794245172, "grad_norm": 0.349609375, "learning_rate": 1.3908296896876778e-05, "loss": 0.9643, "step": 1655 }, { "epoch": 1.3054789121009067, "grad_norm": 0.345703125, "learning_rate": 1.3899847341313982e-05, "loss": 0.9946, "step": 1656 }, { "epoch": 1.306267244777296, "grad_norm": 0.37109375, "learning_rate": 1.3891394500765181e-05, "loss": 0.9671, "step": 1657 }, { "epoch": 1.3070555774536854, "grad_norm": 0.396484375, "learning_rate": 1.3882938382350513e-05, "loss": 0.9326, "step": 1658 }, { "epoch": 1.307843910130075, "grad_norm": 0.33984375, "learning_rate": 1.3874478993192886e-05, "loss": 0.9259, "step": 1659 }, { "epoch": 1.3086322428064643, "grad_norm": 0.345703125, "learning_rate": 1.3866016340417953e-05, "loss": 0.9508, "step": 1660 }, { "epoch": 1.3094205754828538, "grad_norm": 0.34765625, "learning_rate": 1.3857550431154123e-05, "loss": 0.9376, "step": 1661 }, { "epoch": 1.3102089081592432, "grad_norm": 0.34375, "learning_rate": 1.3849081272532545e-05, "loss": 0.947, "step": 1662 }, { "epoch": 1.3109972408356327, "grad_norm": 0.3515625, "learning_rate": 1.3840608871687104e-05, "loss": 0.957, "step": 1663 }, { "epoch": 1.311785573512022, "grad_norm": 0.3515625, "learning_rate": 1.3832133235754417e-05, "loss": 0.9465, "step": 1664 }, { "epoch": 1.3125739061884114, "grad_norm": 0.345703125, "learning_rate": 1.3823654371873827e-05, "loss": 0.9253, "step": 1665 }, { "epoch": 1.313362238864801, "grad_norm": 0.353515625, "learning_rate": 1.3815172287187394e-05, "loss": 0.9344, "step": 1666 }, { "epoch": 1.3141505715411903, "grad_norm": 0.345703125, "learning_rate": 1.3806686988839898e-05, "loss": 0.9534, "step": 1667 }, { "epoch": 1.3149389042175799, "grad_norm": 0.34765625, "learning_rate": 1.3798198483978816e-05, "loss": 0.9334, "step": 1668 }, { "epoch": 1.3157272368939692, "grad_norm": 0.45703125, "learning_rate": 1.3789706779754326e-05, "loss": 0.9242, "step": 1669 }, { "epoch": 1.3165155695703588, "grad_norm": 0.35546875, "learning_rate": 1.3781211883319315e-05, "loss": 0.9738, "step": 1670 }, { "epoch": 1.317303902246748, "grad_norm": 0.353515625, "learning_rate": 1.3772713801829338e-05, "loss": 0.9503, "step": 1671 }, { "epoch": 1.3180922349231374, "grad_norm": 0.345703125, "learning_rate": 1.3764212542442656e-05, "loss": 0.9036, "step": 1672 }, { "epoch": 1.318880567599527, "grad_norm": 0.34375, "learning_rate": 1.3755708112320187e-05, "loss": 0.9389, "step": 1673 }, { "epoch": 1.3196689002759165, "grad_norm": 0.341796875, "learning_rate": 1.374720051862553e-05, "loss": 0.9515, "step": 1674 }, { "epoch": 1.3204572329523059, "grad_norm": 0.34375, "learning_rate": 1.3738689768524946e-05, "loss": 0.9449, "step": 1675 }, { "epoch": 1.3212455656286952, "grad_norm": 0.357421875, "learning_rate": 1.373017586918736e-05, "loss": 0.9547, "step": 1676 }, { "epoch": 1.3220338983050848, "grad_norm": 0.345703125, "learning_rate": 1.3721658827784335e-05, "loss": 0.9691, "step": 1677 }, { "epoch": 1.3228222309814741, "grad_norm": 0.34375, "learning_rate": 1.37131386514901e-05, "loss": 0.9088, "step": 1678 }, { "epoch": 1.3236105636578637, "grad_norm": 0.345703125, "learning_rate": 1.3704615347481511e-05, "loss": 0.9331, "step": 1679 }, { "epoch": 1.324398896334253, "grad_norm": 0.357421875, "learning_rate": 1.3696088922938065e-05, "loss": 0.9351, "step": 1680 }, { "epoch": 1.3251872290106426, "grad_norm": 0.341796875, "learning_rate": 1.3687559385041884e-05, "loss": 0.9786, "step": 1681 }, { "epoch": 1.325975561687032, "grad_norm": 0.33984375, "learning_rate": 1.3679026740977717e-05, "loss": 0.9148, "step": 1682 }, { "epoch": 1.3267638943634212, "grad_norm": 0.341796875, "learning_rate": 1.3670490997932922e-05, "loss": 0.9296, "step": 1683 }, { "epoch": 1.3275522270398108, "grad_norm": 0.35546875, "learning_rate": 1.3661952163097474e-05, "loss": 0.9168, "step": 1684 }, { "epoch": 1.3283405597162004, "grad_norm": 0.34765625, "learning_rate": 1.3653410243663953e-05, "loss": 0.9105, "step": 1685 }, { "epoch": 1.3291288923925897, "grad_norm": 0.3515625, "learning_rate": 1.3644865246827528e-05, "loss": 0.9632, "step": 1686 }, { "epoch": 1.329917225068979, "grad_norm": 0.345703125, "learning_rate": 1.3636317179785972e-05, "loss": 0.9516, "step": 1687 }, { "epoch": 1.3307055577453686, "grad_norm": 0.349609375, "learning_rate": 1.3627766049739635e-05, "loss": 0.9041, "step": 1688 }, { "epoch": 1.331493890421758, "grad_norm": 0.341796875, "learning_rate": 1.3619211863891458e-05, "loss": 0.9168, "step": 1689 }, { "epoch": 1.3322822230981475, "grad_norm": 0.34765625, "learning_rate": 1.3610654629446938e-05, "loss": 0.947, "step": 1690 }, { "epoch": 1.3330705557745368, "grad_norm": 0.359375, "learning_rate": 1.360209435361416e-05, "loss": 0.9482, "step": 1691 }, { "epoch": 1.3338588884509264, "grad_norm": 0.349609375, "learning_rate": 1.3593531043603756e-05, "loss": 0.9619, "step": 1692 }, { "epoch": 1.3346472211273157, "grad_norm": 0.341796875, "learning_rate": 1.3584964706628923e-05, "loss": 0.9411, "step": 1693 }, { "epoch": 1.335435553803705, "grad_norm": 0.3515625, "learning_rate": 1.3576395349905403e-05, "loss": 0.9437, "step": 1694 }, { "epoch": 1.3362238864800946, "grad_norm": 0.34375, "learning_rate": 1.3567822980651481e-05, "loss": 0.937, "step": 1695 }, { "epoch": 1.3370122191564842, "grad_norm": 0.353515625, "learning_rate": 1.3559247606087987e-05, "loss": 0.9185, "step": 1696 }, { "epoch": 1.3378005518328735, "grad_norm": 0.33984375, "learning_rate": 1.3550669233438271e-05, "loss": 0.9573, "step": 1697 }, { "epoch": 1.3385888845092628, "grad_norm": 0.341796875, "learning_rate": 1.3542087869928215e-05, "loss": 0.9472, "step": 1698 }, { "epoch": 1.3393772171856524, "grad_norm": 0.3515625, "learning_rate": 1.3533503522786224e-05, "loss": 0.9308, "step": 1699 }, { "epoch": 1.3401655498620417, "grad_norm": 0.359375, "learning_rate": 1.352491619924321e-05, "loss": 0.9154, "step": 1700 }, { "epoch": 1.3409538825384313, "grad_norm": 0.34765625, "learning_rate": 1.3516325906532592e-05, "loss": 0.991, "step": 1701 }, { "epoch": 1.3417422152148206, "grad_norm": 0.34375, "learning_rate": 1.3507732651890294e-05, "loss": 0.9419, "step": 1702 }, { "epoch": 1.3425305478912102, "grad_norm": 0.34765625, "learning_rate": 1.349913644255473e-05, "loss": 0.958, "step": 1703 }, { "epoch": 1.3433188805675995, "grad_norm": 0.341796875, "learning_rate": 1.3490537285766809e-05, "loss": 0.9247, "step": 1704 }, { "epoch": 1.3441072132439889, "grad_norm": 0.3515625, "learning_rate": 1.348193518876992e-05, "loss": 0.9533, "step": 1705 }, { "epoch": 1.3448955459203784, "grad_norm": 0.34765625, "learning_rate": 1.3473330158809925e-05, "loss": 0.9484, "step": 1706 }, { "epoch": 1.345683878596768, "grad_norm": 0.349609375, "learning_rate": 1.3464722203135164e-05, "loss": 0.9945, "step": 1707 }, { "epoch": 1.3464722112731573, "grad_norm": 0.35546875, "learning_rate": 1.3456111328996431e-05, "loss": 0.9628, "step": 1708 }, { "epoch": 1.3472605439495466, "grad_norm": 0.33984375, "learning_rate": 1.3447497543646992e-05, "loss": 0.9115, "step": 1709 }, { "epoch": 1.3480488766259362, "grad_norm": 0.3359375, "learning_rate": 1.3438880854342552e-05, "loss": 0.9369, "step": 1710 }, { "epoch": 1.3488372093023255, "grad_norm": 0.345703125, "learning_rate": 1.3430261268341272e-05, "loss": 0.9271, "step": 1711 }, { "epoch": 1.349625541978715, "grad_norm": 0.357421875, "learning_rate": 1.3421638792903743e-05, "loss": 0.9571, "step": 1712 }, { "epoch": 1.3504138746551044, "grad_norm": 0.3515625, "learning_rate": 1.3413013435293004e-05, "loss": 0.9532, "step": 1713 }, { "epoch": 1.351202207331494, "grad_norm": 0.36328125, "learning_rate": 1.3404385202774506e-05, "loss": 0.982, "step": 1714 }, { "epoch": 1.3519905400078833, "grad_norm": 0.3515625, "learning_rate": 1.3395754102616135e-05, "loss": 0.9509, "step": 1715 }, { "epoch": 1.3527788726842727, "grad_norm": 0.353515625, "learning_rate": 1.3387120142088182e-05, "loss": 0.94, "step": 1716 }, { "epoch": 1.3535672053606622, "grad_norm": 0.3515625, "learning_rate": 1.3378483328463352e-05, "loss": 0.9338, "step": 1717 }, { "epoch": 1.3543555380370516, "grad_norm": 0.35546875, "learning_rate": 1.3369843669016757e-05, "loss": 0.9329, "step": 1718 }, { "epoch": 1.3551438707134411, "grad_norm": 0.359375, "learning_rate": 1.33612011710259e-05, "loss": 0.9554, "step": 1719 }, { "epoch": 1.3559322033898304, "grad_norm": 0.3515625, "learning_rate": 1.3352555841770682e-05, "loss": 0.9572, "step": 1720 }, { "epoch": 1.35672053606622, "grad_norm": 0.34375, "learning_rate": 1.3343907688533378e-05, "loss": 0.9235, "step": 1721 }, { "epoch": 1.3575088687426093, "grad_norm": 0.34375, "learning_rate": 1.333525671859865e-05, "loss": 0.9419, "step": 1722 }, { "epoch": 1.358297201418999, "grad_norm": 0.35546875, "learning_rate": 1.3326602939253532e-05, "loss": 0.9705, "step": 1723 }, { "epoch": 1.3590855340953882, "grad_norm": 0.34765625, "learning_rate": 1.3317946357787426e-05, "loss": 0.9176, "step": 1724 }, { "epoch": 1.3598738667717778, "grad_norm": 0.33984375, "learning_rate": 1.3309286981492084e-05, "loss": 0.9452, "step": 1725 }, { "epoch": 1.3606621994481671, "grad_norm": 0.34375, "learning_rate": 1.3300624817661627e-05, "loss": 0.9501, "step": 1726 }, { "epoch": 1.3614505321245565, "grad_norm": 0.345703125, "learning_rate": 1.3291959873592508e-05, "loss": 0.9589, "step": 1727 }, { "epoch": 1.362238864800946, "grad_norm": 0.34375, "learning_rate": 1.3283292156583542e-05, "loss": 0.9628, "step": 1728 }, { "epoch": 1.3630271974773354, "grad_norm": 0.345703125, "learning_rate": 1.3274621673935861e-05, "loss": 0.9771, "step": 1729 }, { "epoch": 1.363815530153725, "grad_norm": 0.345703125, "learning_rate": 1.3265948432952935e-05, "loss": 0.9246, "step": 1730 }, { "epoch": 1.3646038628301143, "grad_norm": 0.357421875, "learning_rate": 1.3257272440940559e-05, "loss": 0.9288, "step": 1731 }, { "epoch": 1.3653921955065038, "grad_norm": 0.34375, "learning_rate": 1.3248593705206838e-05, "loss": 0.9513, "step": 1732 }, { "epoch": 1.3661805281828932, "grad_norm": 0.337890625, "learning_rate": 1.3239912233062198e-05, "loss": 0.9394, "step": 1733 }, { "epoch": 1.3669688608592825, "grad_norm": 0.34765625, "learning_rate": 1.3231228031819358e-05, "loss": 0.943, "step": 1734 }, { "epoch": 1.367757193535672, "grad_norm": 0.34375, "learning_rate": 1.3222541108793352e-05, "loss": 0.9453, "step": 1735 }, { "epoch": 1.3685455262120616, "grad_norm": 0.349609375, "learning_rate": 1.3213851471301492e-05, "loss": 0.9669, "step": 1736 }, { "epoch": 1.369333858888451, "grad_norm": 0.345703125, "learning_rate": 1.320515912666338e-05, "loss": 0.9445, "step": 1737 }, { "epoch": 1.3701221915648403, "grad_norm": 0.35546875, "learning_rate": 1.3196464082200903e-05, "loss": 0.9466, "step": 1738 }, { "epoch": 1.3709105242412298, "grad_norm": 0.341796875, "learning_rate": 1.3187766345238222e-05, "loss": 0.9208, "step": 1739 }, { "epoch": 1.3716988569176192, "grad_norm": 0.34765625, "learning_rate": 1.3179065923101759e-05, "loss": 0.9318, "step": 1740 }, { "epoch": 1.3724871895940087, "grad_norm": 0.345703125, "learning_rate": 1.3170362823120204e-05, "loss": 0.9088, "step": 1741 }, { "epoch": 1.373275522270398, "grad_norm": 0.353515625, "learning_rate": 1.3161657052624497e-05, "loss": 0.9457, "step": 1742 }, { "epoch": 1.3740638549467876, "grad_norm": 0.345703125, "learning_rate": 1.3152948618947839e-05, "loss": 0.9798, "step": 1743 }, { "epoch": 1.374852187623177, "grad_norm": 0.34765625, "learning_rate": 1.3144237529425655e-05, "loss": 0.9439, "step": 1744 }, { "epoch": 1.3756405202995663, "grad_norm": 0.423828125, "learning_rate": 1.3135523791395632e-05, "loss": 0.9752, "step": 1745 }, { "epoch": 1.3764288529759559, "grad_norm": 0.34765625, "learning_rate": 1.3126807412197666e-05, "loss": 0.9367, "step": 1746 }, { "epoch": 1.3772171856523454, "grad_norm": 0.3515625, "learning_rate": 1.3118088399173888e-05, "loss": 0.9547, "step": 1747 }, { "epoch": 1.3780055183287347, "grad_norm": 0.34375, "learning_rate": 1.3109366759668647e-05, "loss": 0.9262, "step": 1748 }, { "epoch": 1.378793851005124, "grad_norm": 0.337890625, "learning_rate": 1.3100642501028502e-05, "loss": 0.9471, "step": 1749 }, { "epoch": 1.3795821836815136, "grad_norm": 0.345703125, "learning_rate": 1.3091915630602223e-05, "loss": 0.9799, "step": 1750 }, { "epoch": 1.380370516357903, "grad_norm": 0.359375, "learning_rate": 1.308318615574077e-05, "loss": 0.9391, "step": 1751 }, { "epoch": 1.3811588490342925, "grad_norm": 0.341796875, "learning_rate": 1.307445408379731e-05, "loss": 0.8963, "step": 1752 }, { "epoch": 1.3819471817106819, "grad_norm": 0.341796875, "learning_rate": 1.3065719422127188e-05, "loss": 0.9361, "step": 1753 }, { "epoch": 1.3827355143870714, "grad_norm": 0.349609375, "learning_rate": 1.3056982178087933e-05, "loss": 0.9622, "step": 1754 }, { "epoch": 1.3835238470634608, "grad_norm": 0.3984375, "learning_rate": 1.304824235903925e-05, "loss": 0.9505, "step": 1755 }, { "epoch": 1.38431217973985, "grad_norm": 0.353515625, "learning_rate": 1.3039499972343013e-05, "loss": 0.9565, "step": 1756 }, { "epoch": 1.3851005124162397, "grad_norm": 0.34375, "learning_rate": 1.3030755025363257e-05, "loss": 0.9153, "step": 1757 }, { "epoch": 1.3858888450926292, "grad_norm": 0.353515625, "learning_rate": 1.302200752546618e-05, "loss": 0.9573, "step": 1758 }, { "epoch": 1.3866771777690186, "grad_norm": 0.345703125, "learning_rate": 1.3013257480020116e-05, "loss": 0.9316, "step": 1759 }, { "epoch": 1.387465510445408, "grad_norm": 0.4375, "learning_rate": 1.3004504896395564e-05, "loss": 0.9309, "step": 1760 }, { "epoch": 1.3882538431217974, "grad_norm": 0.357421875, "learning_rate": 1.2995749781965139e-05, "loss": 0.933, "step": 1761 }, { "epoch": 1.3890421757981868, "grad_norm": 0.3515625, "learning_rate": 1.2986992144103607e-05, "loss": 0.9369, "step": 1762 }, { "epoch": 1.3898305084745763, "grad_norm": 0.34765625, "learning_rate": 1.2978231990187848e-05, "loss": 0.9399, "step": 1763 }, { "epoch": 1.3906188411509657, "grad_norm": 0.36328125, "learning_rate": 1.296946932759686e-05, "loss": 0.8858, "step": 1764 }, { "epoch": 1.3914071738273552, "grad_norm": 0.40234375, "learning_rate": 1.2960704163711769e-05, "loss": 0.9371, "step": 1765 }, { "epoch": 1.3921955065037446, "grad_norm": 0.353515625, "learning_rate": 1.2951936505915783e-05, "loss": 0.9289, "step": 1766 }, { "epoch": 1.392983839180134, "grad_norm": 0.34765625, "learning_rate": 1.2943166361594242e-05, "loss": 0.9384, "step": 1767 }, { "epoch": 1.3937721718565235, "grad_norm": 0.359375, "learning_rate": 1.2934393738134548e-05, "loss": 0.955, "step": 1768 }, { "epoch": 1.394560504532913, "grad_norm": 0.3359375, "learning_rate": 1.292561864292622e-05, "loss": 0.9556, "step": 1769 }, { "epoch": 1.3953488372093024, "grad_norm": 0.359375, "learning_rate": 1.2916841083360836e-05, "loss": 0.9256, "step": 1770 }, { "epoch": 1.3961371698856917, "grad_norm": 0.337890625, "learning_rate": 1.2908061066832064e-05, "loss": 0.9081, "step": 1771 }, { "epoch": 1.3969255025620813, "grad_norm": 0.345703125, "learning_rate": 1.2899278600735641e-05, "loss": 0.9408, "step": 1772 }, { "epoch": 1.3977138352384706, "grad_norm": 0.34765625, "learning_rate": 1.2890493692469357e-05, "loss": 0.8904, "step": 1773 }, { "epoch": 1.3985021679148602, "grad_norm": 0.396484375, "learning_rate": 1.288170634943307e-05, "loss": 0.9267, "step": 1774 }, { "epoch": 1.3992905005912495, "grad_norm": 0.341796875, "learning_rate": 1.2872916579028684e-05, "loss": 0.9174, "step": 1775 }, { "epoch": 1.400078833267639, "grad_norm": 0.341796875, "learning_rate": 1.2864124388660148e-05, "loss": 0.969, "step": 1776 }, { "epoch": 1.4008671659440284, "grad_norm": 0.353515625, "learning_rate": 1.2855329785733452e-05, "loss": 0.9412, "step": 1777 }, { "epoch": 1.4016554986204177, "grad_norm": 0.34765625, "learning_rate": 1.2846532777656613e-05, "loss": 0.9522, "step": 1778 }, { "epoch": 1.4024438312968073, "grad_norm": 0.345703125, "learning_rate": 1.283773337183968e-05, "loss": 0.9302, "step": 1779 }, { "epoch": 1.4032321639731966, "grad_norm": 0.349609375, "learning_rate": 1.2828931575694718e-05, "loss": 0.955, "step": 1780 }, { "epoch": 1.4040204966495862, "grad_norm": 0.341796875, "learning_rate": 1.2820127396635802e-05, "loss": 0.9237, "step": 1781 }, { "epoch": 1.4048088293259755, "grad_norm": 0.337890625, "learning_rate": 1.2811320842079026e-05, "loss": 0.9455, "step": 1782 }, { "epoch": 1.405597162002365, "grad_norm": 0.349609375, "learning_rate": 1.280251191944247e-05, "loss": 0.9262, "step": 1783 }, { "epoch": 1.4063854946787544, "grad_norm": 0.349609375, "learning_rate": 1.2793700636146222e-05, "loss": 0.9503, "step": 1784 }, { "epoch": 1.4071738273551437, "grad_norm": 0.35546875, "learning_rate": 1.278488699961235e-05, "loss": 0.9671, "step": 1785 }, { "epoch": 1.4079621600315333, "grad_norm": 0.369140625, "learning_rate": 1.2776071017264908e-05, "loss": 0.9481, "step": 1786 }, { "epoch": 1.4087504927079229, "grad_norm": 0.35546875, "learning_rate": 1.2767252696529922e-05, "loss": 0.9665, "step": 1787 }, { "epoch": 1.4095388253843122, "grad_norm": 0.369140625, "learning_rate": 1.275843204483539e-05, "loss": 0.9566, "step": 1788 }, { "epoch": 1.4103271580607015, "grad_norm": 0.35546875, "learning_rate": 1.2749609069611282e-05, "loss": 0.9254, "step": 1789 }, { "epoch": 1.411115490737091, "grad_norm": 0.345703125, "learning_rate": 1.2740783778289507e-05, "loss": 0.9325, "step": 1790 }, { "epoch": 1.4119038234134804, "grad_norm": 0.341796875, "learning_rate": 1.2731956178303941e-05, "loss": 0.9563, "step": 1791 }, { "epoch": 1.41269215608987, "grad_norm": 0.341796875, "learning_rate": 1.2723126277090396e-05, "loss": 0.9217, "step": 1792 }, { "epoch": 1.4134804887662593, "grad_norm": 0.365234375, "learning_rate": 1.2714294082086628e-05, "loss": 0.9121, "step": 1793 }, { "epoch": 1.4142688214426489, "grad_norm": 0.34375, "learning_rate": 1.2705459600732319e-05, "loss": 0.9329, "step": 1794 }, { "epoch": 1.4150571541190382, "grad_norm": 0.34375, "learning_rate": 1.2696622840469084e-05, "loss": 0.9549, "step": 1795 }, { "epoch": 1.4158454867954275, "grad_norm": 0.34765625, "learning_rate": 1.268778380874045e-05, "loss": 0.9048, "step": 1796 }, { "epoch": 1.416633819471817, "grad_norm": 0.34375, "learning_rate": 1.2678942512991865e-05, "loss": 0.9379, "step": 1797 }, { "epoch": 1.4174221521482067, "grad_norm": 0.33984375, "learning_rate": 1.2670098960670676e-05, "loss": 0.9409, "step": 1798 }, { "epoch": 1.418210484824596, "grad_norm": 0.341796875, "learning_rate": 1.266125315922614e-05, "loss": 0.9023, "step": 1799 }, { "epoch": 1.4189988175009853, "grad_norm": 0.37109375, "learning_rate": 1.2652405116109394e-05, "loss": 0.9396, "step": 1800 }, { "epoch": 1.419787150177375, "grad_norm": 0.34375, "learning_rate": 1.2643554838773486e-05, "loss": 0.8857, "step": 1801 }, { "epoch": 1.4205754828537642, "grad_norm": 0.361328125, "learning_rate": 1.263470233467332e-05, "loss": 0.9954, "step": 1802 }, { "epoch": 1.4213638155301538, "grad_norm": 0.357421875, "learning_rate": 1.2625847611265703e-05, "loss": 0.9501, "step": 1803 }, { "epoch": 1.4221521482065431, "grad_norm": 0.353515625, "learning_rate": 1.2616990676009283e-05, "loss": 0.9625, "step": 1804 }, { "epoch": 1.4229404808829327, "grad_norm": 0.357421875, "learning_rate": 1.260813153636459e-05, "loss": 0.9632, "step": 1805 }, { "epoch": 1.423728813559322, "grad_norm": 0.3515625, "learning_rate": 1.2599270199794008e-05, "loss": 0.9494, "step": 1806 }, { "epoch": 1.4245171462357114, "grad_norm": 0.46484375, "learning_rate": 1.2590406673761762e-05, "loss": 0.9633, "step": 1807 }, { "epoch": 1.425305478912101, "grad_norm": 0.34765625, "learning_rate": 1.258154096573394e-05, "loss": 0.9562, "step": 1808 }, { "epoch": 1.4260938115884905, "grad_norm": 0.34765625, "learning_rate": 1.2572673083178448e-05, "loss": 0.9507, "step": 1809 }, { "epoch": 1.4268821442648798, "grad_norm": 0.34765625, "learning_rate": 1.2563803033565034e-05, "loss": 0.9442, "step": 1810 }, { "epoch": 1.4276704769412691, "grad_norm": 0.353515625, "learning_rate": 1.2554930824365273e-05, "loss": 0.9519, "step": 1811 }, { "epoch": 1.4284588096176587, "grad_norm": 0.34765625, "learning_rate": 1.254605646305255e-05, "loss": 0.927, "step": 1812 }, { "epoch": 1.429247142294048, "grad_norm": 0.361328125, "learning_rate": 1.2537179957102075e-05, "loss": 0.9412, "step": 1813 }, { "epoch": 1.4300354749704376, "grad_norm": 0.353515625, "learning_rate": 1.2528301313990854e-05, "loss": 0.9544, "step": 1814 }, { "epoch": 1.430823807646827, "grad_norm": 0.349609375, "learning_rate": 1.2519420541197696e-05, "loss": 0.9423, "step": 1815 }, { "epoch": 1.4316121403232165, "grad_norm": 0.35546875, "learning_rate": 1.2510537646203209e-05, "loss": 0.9395, "step": 1816 }, { "epoch": 1.4324004729996058, "grad_norm": 0.34765625, "learning_rate": 1.250165263648978e-05, "loss": 0.941, "step": 1817 }, { "epoch": 1.4331888056759952, "grad_norm": 0.33984375, "learning_rate": 1.249276551954159e-05, "loss": 0.8963, "step": 1818 }, { "epoch": 1.4339771383523847, "grad_norm": 0.34765625, "learning_rate": 1.2483876302844579e-05, "loss": 0.9034, "step": 1819 }, { "epoch": 1.4347654710287743, "grad_norm": 0.349609375, "learning_rate": 1.2474984993886467e-05, "loss": 0.9652, "step": 1820 }, { "epoch": 1.4355538037051636, "grad_norm": 0.34765625, "learning_rate": 1.2466091600156736e-05, "loss": 0.9328, "step": 1821 }, { "epoch": 1.436342136381553, "grad_norm": 0.349609375, "learning_rate": 1.2457196129146616e-05, "loss": 0.9134, "step": 1822 }, { "epoch": 1.4371304690579425, "grad_norm": 0.34765625, "learning_rate": 1.2448298588349097e-05, "loss": 0.9464, "step": 1823 }, { "epoch": 1.4379188017343318, "grad_norm": 0.34765625, "learning_rate": 1.24393989852589e-05, "loss": 0.9559, "step": 1824 }, { "epoch": 1.4387071344107214, "grad_norm": 0.34375, "learning_rate": 1.2430497327372502e-05, "loss": 0.9189, "step": 1825 }, { "epoch": 1.4394954670871107, "grad_norm": 0.341796875, "learning_rate": 1.2421593622188088e-05, "loss": 0.9258, "step": 1826 }, { "epoch": 1.4402837997635003, "grad_norm": 0.349609375, "learning_rate": 1.2412687877205587e-05, "loss": 0.9393, "step": 1827 }, { "epoch": 1.4410721324398896, "grad_norm": 0.353515625, "learning_rate": 1.2403780099926635e-05, "loss": 0.9436, "step": 1828 }, { "epoch": 1.441860465116279, "grad_norm": 0.3671875, "learning_rate": 1.2394870297854582e-05, "loss": 0.9501, "step": 1829 }, { "epoch": 1.4426487977926685, "grad_norm": 0.34375, "learning_rate": 1.2385958478494487e-05, "loss": 0.9398, "step": 1830 }, { "epoch": 1.4434371304690579, "grad_norm": 0.359375, "learning_rate": 1.2377044649353103e-05, "loss": 0.9974, "step": 1831 }, { "epoch": 1.4442254631454474, "grad_norm": 0.337890625, "learning_rate": 1.2368128817938883e-05, "loss": 0.974, "step": 1832 }, { "epoch": 1.4450137958218368, "grad_norm": 0.3359375, "learning_rate": 1.2359210991761958e-05, "loss": 0.9152, "step": 1833 }, { "epoch": 1.4458021284982263, "grad_norm": 0.353515625, "learning_rate": 1.2350291178334145e-05, "loss": 0.9444, "step": 1834 }, { "epoch": 1.4465904611746156, "grad_norm": 0.349609375, "learning_rate": 1.2341369385168938e-05, "loss": 0.9491, "step": 1835 }, { "epoch": 1.447378793851005, "grad_norm": 0.34375, "learning_rate": 1.233244561978149e-05, "loss": 0.9291, "step": 1836 }, { "epoch": 1.4481671265273945, "grad_norm": 0.349609375, "learning_rate": 1.2323519889688615e-05, "loss": 0.9252, "step": 1837 }, { "epoch": 1.448955459203784, "grad_norm": 0.34765625, "learning_rate": 1.2314592202408798e-05, "loss": 0.9235, "step": 1838 }, { "epoch": 1.4497437918801734, "grad_norm": 0.359375, "learning_rate": 1.2305662565462146e-05, "loss": 0.9599, "step": 1839 }, { "epoch": 1.4505321245565628, "grad_norm": 0.3515625, "learning_rate": 1.2296730986370437e-05, "loss": 0.9567, "step": 1840 }, { "epoch": 1.4513204572329523, "grad_norm": 0.345703125, "learning_rate": 1.2287797472657064e-05, "loss": 0.9578, "step": 1841 }, { "epoch": 1.4521087899093417, "grad_norm": 0.341796875, "learning_rate": 1.2278862031847061e-05, "loss": 0.9062, "step": 1842 }, { "epoch": 1.4528971225857312, "grad_norm": 0.353515625, "learning_rate": 1.2269924671467075e-05, "loss": 0.9613, "step": 1843 }, { "epoch": 1.4536854552621206, "grad_norm": 0.341796875, "learning_rate": 1.2260985399045379e-05, "loss": 0.9126, "step": 1844 }, { "epoch": 1.4544737879385101, "grad_norm": 0.337890625, "learning_rate": 1.2252044222111859e-05, "loss": 0.925, "step": 1845 }, { "epoch": 1.4552621206148995, "grad_norm": 0.353515625, "learning_rate": 1.2243101148197991e-05, "loss": 0.9589, "step": 1846 }, { "epoch": 1.4560504532912888, "grad_norm": 0.35546875, "learning_rate": 1.223415618483686e-05, "loss": 0.9285, "step": 1847 }, { "epoch": 1.4568387859676784, "grad_norm": 0.33984375, "learning_rate": 1.2225209339563144e-05, "loss": 0.9575, "step": 1848 }, { "epoch": 1.457627118644068, "grad_norm": 0.341796875, "learning_rate": 1.2216260619913103e-05, "loss": 0.9337, "step": 1849 }, { "epoch": 1.4584154513204572, "grad_norm": 0.341796875, "learning_rate": 1.2207310033424569e-05, "loss": 0.9422, "step": 1850 }, { "epoch": 1.4592037839968466, "grad_norm": 0.3359375, "learning_rate": 1.2198357587636958e-05, "loss": 0.9286, "step": 1851 }, { "epoch": 1.4599921166732361, "grad_norm": 0.34375, "learning_rate": 1.2189403290091246e-05, "loss": 0.9207, "step": 1852 }, { "epoch": 1.4607804493496255, "grad_norm": 0.349609375, "learning_rate": 1.2180447148329972e-05, "loss": 0.9562, "step": 1853 }, { "epoch": 1.461568782026015, "grad_norm": 0.458984375, "learning_rate": 1.2171489169897217e-05, "loss": 0.9218, "step": 1854 }, { "epoch": 1.4623571147024044, "grad_norm": 0.34375, "learning_rate": 1.2162529362338633e-05, "loss": 0.9262, "step": 1855 }, { "epoch": 1.463145447378794, "grad_norm": 0.353515625, "learning_rate": 1.2153567733201383e-05, "loss": 0.9189, "step": 1856 }, { "epoch": 1.4639337800551833, "grad_norm": 0.345703125, "learning_rate": 1.2144604290034193e-05, "loss": 0.9357, "step": 1857 }, { "epoch": 1.4647221127315726, "grad_norm": 0.349609375, "learning_rate": 1.2135639040387291e-05, "loss": 0.9549, "step": 1858 }, { "epoch": 1.4655104454079622, "grad_norm": 0.337890625, "learning_rate": 1.212667199181245e-05, "loss": 0.9641, "step": 1859 }, { "epoch": 1.4662987780843517, "grad_norm": 0.33984375, "learning_rate": 1.211770315186294e-05, "loss": 0.9506, "step": 1860 }, { "epoch": 1.467087110760741, "grad_norm": 0.3515625, "learning_rate": 1.2108732528093549e-05, "loss": 0.9379, "step": 1861 }, { "epoch": 1.4678754434371304, "grad_norm": 0.341796875, "learning_rate": 1.2099760128060571e-05, "loss": 0.916, "step": 1862 }, { "epoch": 1.46866377611352, "grad_norm": 0.345703125, "learning_rate": 1.2090785959321783e-05, "loss": 0.9308, "step": 1863 }, { "epoch": 1.4694521087899093, "grad_norm": 0.341796875, "learning_rate": 1.208181002943647e-05, "loss": 0.9327, "step": 1864 }, { "epoch": 1.4702404414662988, "grad_norm": 0.34375, "learning_rate": 1.2072832345965381e-05, "loss": 0.9713, "step": 1865 }, { "epoch": 1.4710287741426882, "grad_norm": 0.3515625, "learning_rate": 1.2063852916470755e-05, "loss": 0.9134, "step": 1866 }, { "epoch": 1.4718171068190777, "grad_norm": 0.34375, "learning_rate": 1.2054871748516301e-05, "loss": 0.9383, "step": 1867 }, { "epoch": 1.472605439495467, "grad_norm": 0.353515625, "learning_rate": 1.2045888849667187e-05, "loss": 0.9517, "step": 1868 }, { "epoch": 1.4733937721718564, "grad_norm": 0.349609375, "learning_rate": 1.2036904227490043e-05, "loss": 0.9281, "step": 1869 }, { "epoch": 1.474182104848246, "grad_norm": 0.349609375, "learning_rate": 1.2027917889552951e-05, "loss": 0.9175, "step": 1870 }, { "epoch": 1.4749704375246355, "grad_norm": 0.33984375, "learning_rate": 1.201892984342543e-05, "loss": 0.9546, "step": 1871 }, { "epoch": 1.4757587702010249, "grad_norm": 0.35546875, "learning_rate": 1.2009940096678451e-05, "loss": 0.978, "step": 1872 }, { "epoch": 1.4765471028774142, "grad_norm": 0.36328125, "learning_rate": 1.2000948656884408e-05, "loss": 0.9542, "step": 1873 }, { "epoch": 1.4773354355538038, "grad_norm": 0.333984375, "learning_rate": 1.1991955531617126e-05, "loss": 0.9123, "step": 1874 }, { "epoch": 1.478123768230193, "grad_norm": 0.345703125, "learning_rate": 1.1982960728451847e-05, "loss": 0.9491, "step": 1875 }, { "epoch": 1.4789121009065826, "grad_norm": 0.349609375, "learning_rate": 1.1973964254965224e-05, "loss": 0.9402, "step": 1876 }, { "epoch": 1.479700433582972, "grad_norm": 0.34375, "learning_rate": 1.196496611873533e-05, "loss": 0.9411, "step": 1877 }, { "epoch": 1.4804887662593615, "grad_norm": 0.345703125, "learning_rate": 1.1955966327341614e-05, "loss": 0.8988, "step": 1878 }, { "epoch": 1.4812770989357509, "grad_norm": 0.34375, "learning_rate": 1.1946964888364949e-05, "loss": 0.9663, "step": 1879 }, { "epoch": 1.4820654316121402, "grad_norm": 0.353515625, "learning_rate": 1.1937961809387569e-05, "loss": 0.9621, "step": 1880 }, { "epoch": 1.4828537642885298, "grad_norm": 0.341796875, "learning_rate": 1.192895709799311e-05, "loss": 0.9532, "step": 1881 }, { "epoch": 1.4836420969649193, "grad_norm": 0.349609375, "learning_rate": 1.1919950761766568e-05, "loss": 0.9175, "step": 1882 }, { "epoch": 1.4844304296413087, "grad_norm": 0.345703125, "learning_rate": 1.1910942808294315e-05, "loss": 0.9263, "step": 1883 }, { "epoch": 1.485218762317698, "grad_norm": 0.357421875, "learning_rate": 1.1901933245164085e-05, "loss": 0.9687, "step": 1884 }, { "epoch": 1.4860070949940876, "grad_norm": 0.33984375, "learning_rate": 1.189292207996497e-05, "loss": 0.9096, "step": 1885 }, { "epoch": 1.486795427670477, "grad_norm": 0.3359375, "learning_rate": 1.1883909320287406e-05, "loss": 0.942, "step": 1886 }, { "epoch": 1.4875837603468665, "grad_norm": 0.341796875, "learning_rate": 1.1874894973723173e-05, "loss": 0.9146, "step": 1887 }, { "epoch": 1.4883720930232558, "grad_norm": 0.345703125, "learning_rate": 1.186587904786539e-05, "loss": 0.931, "step": 1888 }, { "epoch": 1.4891604256996454, "grad_norm": 0.34765625, "learning_rate": 1.1856861550308507e-05, "loss": 0.8884, "step": 1889 }, { "epoch": 1.4899487583760347, "grad_norm": 0.345703125, "learning_rate": 1.1847842488648296e-05, "loss": 0.9422, "step": 1890 }, { "epoch": 1.490737091052424, "grad_norm": 0.353515625, "learning_rate": 1.1838821870481847e-05, "loss": 0.9521, "step": 1891 }, { "epoch": 1.4915254237288136, "grad_norm": 0.345703125, "learning_rate": 1.1829799703407563e-05, "loss": 0.9358, "step": 1892 }, { "epoch": 1.492313756405203, "grad_norm": 0.33984375, "learning_rate": 1.1820775995025147e-05, "loss": 0.9056, "step": 1893 }, { "epoch": 1.4931020890815925, "grad_norm": 0.341796875, "learning_rate": 1.1811750752935604e-05, "loss": 0.9594, "step": 1894 }, { "epoch": 1.4938904217579818, "grad_norm": 0.34375, "learning_rate": 1.1802723984741229e-05, "loss": 0.945, "step": 1895 }, { "epoch": 1.4946787544343714, "grad_norm": 0.44921875, "learning_rate": 1.1793695698045606e-05, "loss": 0.9091, "step": 1896 }, { "epoch": 1.4954670871107607, "grad_norm": 0.35546875, "learning_rate": 1.1784665900453594e-05, "loss": 0.929, "step": 1897 }, { "epoch": 1.49625541978715, "grad_norm": 0.33984375, "learning_rate": 1.1775634599571326e-05, "loss": 0.9554, "step": 1898 }, { "epoch": 1.4970437524635396, "grad_norm": 0.34375, "learning_rate": 1.1766601803006204e-05, "loss": 0.965, "step": 1899 }, { "epoch": 1.4978320851399292, "grad_norm": 0.345703125, "learning_rate": 1.1757567518366883e-05, "loss": 0.9415, "step": 1900 }, { "epoch": 1.4986204178163185, "grad_norm": 0.380859375, "learning_rate": 1.1748531753263282e-05, "loss": 0.9642, "step": 1901 }, { "epoch": 1.4994087504927078, "grad_norm": 0.361328125, "learning_rate": 1.1739494515306553e-05, "loss": 0.9415, "step": 1902 }, { "epoch": 1.5001970831690974, "grad_norm": 0.349609375, "learning_rate": 1.1730455812109102e-05, "loss": 0.9623, "step": 1903 }, { "epoch": 1.500985415845487, "grad_norm": 0.353515625, "learning_rate": 1.1721415651284567e-05, "loss": 0.917, "step": 1904 }, { "epoch": 1.5017737485218763, "grad_norm": 0.34765625, "learning_rate": 1.1712374040447802e-05, "loss": 0.9487, "step": 1905 }, { "epoch": 1.5025620811982656, "grad_norm": 0.349609375, "learning_rate": 1.1703330987214898e-05, "loss": 0.9305, "step": 1906 }, { "epoch": 1.5033504138746552, "grad_norm": 0.345703125, "learning_rate": 1.169428649920315e-05, "loss": 0.9629, "step": 1907 }, { "epoch": 1.5041387465510445, "grad_norm": 0.359375, "learning_rate": 1.1685240584031068e-05, "loss": 0.9948, "step": 1908 }, { "epoch": 1.5049270792274339, "grad_norm": 0.345703125, "learning_rate": 1.1676193249318359e-05, "loss": 0.9509, "step": 1909 }, { "epoch": 1.5057154119038234, "grad_norm": 0.345703125, "learning_rate": 1.1667144502685932e-05, "loss": 0.9523, "step": 1910 }, { "epoch": 1.506503744580213, "grad_norm": 0.349609375, "learning_rate": 1.1658094351755883e-05, "loss": 0.9746, "step": 1911 }, { "epoch": 1.5072920772566023, "grad_norm": 0.349609375, "learning_rate": 1.164904280415148e-05, "loss": 0.9425, "step": 1912 }, { "epoch": 1.5080804099329916, "grad_norm": 0.345703125, "learning_rate": 1.163998986749719e-05, "loss": 0.9536, "step": 1913 }, { "epoch": 1.5088687426093812, "grad_norm": 0.349609375, "learning_rate": 1.1630935549418627e-05, "loss": 0.9499, "step": 1914 }, { "epoch": 1.5096570752857708, "grad_norm": 0.34375, "learning_rate": 1.1621879857542587e-05, "loss": 0.9151, "step": 1915 }, { "epoch": 1.5104454079621599, "grad_norm": 0.341796875, "learning_rate": 1.1612822799497008e-05, "loss": 0.9407, "step": 1916 }, { "epoch": 1.5112337406385494, "grad_norm": 0.33203125, "learning_rate": 1.1603764382910989e-05, "loss": 0.889, "step": 1917 }, { "epoch": 1.512022073314939, "grad_norm": 0.333984375, "learning_rate": 1.159470461541477e-05, "loss": 0.9128, "step": 1918 }, { "epoch": 1.5128104059913283, "grad_norm": 0.34375, "learning_rate": 1.1585643504639728e-05, "loss": 0.9735, "step": 1919 }, { "epoch": 1.5135987386677177, "grad_norm": 0.349609375, "learning_rate": 1.1576581058218375e-05, "loss": 0.9693, "step": 1920 }, { "epoch": 1.5143870713441072, "grad_norm": 0.33984375, "learning_rate": 1.1567517283784344e-05, "loss": 0.9532, "step": 1921 }, { "epoch": 1.5151754040204968, "grad_norm": 0.34375, "learning_rate": 1.1558452188972386e-05, "loss": 0.911, "step": 1922 }, { "epoch": 1.515963736696886, "grad_norm": 0.3984375, "learning_rate": 1.1549385781418372e-05, "loss": 0.9492, "step": 1923 }, { "epoch": 1.5167520693732754, "grad_norm": 0.341796875, "learning_rate": 1.1540318068759268e-05, "loss": 0.9203, "step": 1924 }, { "epoch": 1.517540402049665, "grad_norm": 0.35546875, "learning_rate": 1.1531249058633147e-05, "loss": 0.9296, "step": 1925 }, { "epoch": 1.5183287347260543, "grad_norm": 0.333984375, "learning_rate": 1.1522178758679172e-05, "loss": 0.9527, "step": 1926 }, { "epoch": 1.5191170674024437, "grad_norm": 0.345703125, "learning_rate": 1.1513107176537593e-05, "loss": 0.9195, "step": 1927 }, { "epoch": 1.5199054000788332, "grad_norm": 0.361328125, "learning_rate": 1.1504034319849741e-05, "loss": 0.9519, "step": 1928 }, { "epoch": 1.5206937327552228, "grad_norm": 0.33984375, "learning_rate": 1.1494960196258016e-05, "loss": 0.9647, "step": 1929 }, { "epoch": 1.5214820654316121, "grad_norm": 0.3828125, "learning_rate": 1.1485884813405893e-05, "loss": 0.9544, "step": 1930 }, { "epoch": 1.5222703981080015, "grad_norm": 0.333984375, "learning_rate": 1.1476808178937899e-05, "loss": 0.9321, "step": 1931 }, { "epoch": 1.523058730784391, "grad_norm": 0.345703125, "learning_rate": 1.1467730300499626e-05, "loss": 0.9727, "step": 1932 }, { "epoch": 1.5238470634607806, "grad_norm": 0.34765625, "learning_rate": 1.1458651185737703e-05, "loss": 0.9466, "step": 1933 }, { "epoch": 1.52463539613717, "grad_norm": 0.341796875, "learning_rate": 1.1449570842299804e-05, "loss": 0.9372, "step": 1934 }, { "epoch": 1.5254237288135593, "grad_norm": 0.337890625, "learning_rate": 1.1440489277834645e-05, "loss": 0.9077, "step": 1935 }, { "epoch": 1.5262120614899488, "grad_norm": 0.337890625, "learning_rate": 1.1431406499991955e-05, "loss": 0.9429, "step": 1936 }, { "epoch": 1.5270003941663381, "grad_norm": 0.34375, "learning_rate": 1.1422322516422506e-05, "loss": 0.9454, "step": 1937 }, { "epoch": 1.5277887268427275, "grad_norm": 0.34375, "learning_rate": 1.1413237334778064e-05, "loss": 0.9421, "step": 1938 }, { "epoch": 1.528577059519117, "grad_norm": 0.34375, "learning_rate": 1.1404150962711419e-05, "loss": 0.9365, "step": 1939 }, { "epoch": 1.5293653921955066, "grad_norm": 0.36328125, "learning_rate": 1.1395063407876358e-05, "loss": 0.9212, "step": 1940 }, { "epoch": 1.530153724871896, "grad_norm": 0.384765625, "learning_rate": 1.1385974677927667e-05, "loss": 0.9556, "step": 1941 }, { "epoch": 1.5309420575482853, "grad_norm": 0.34375, "learning_rate": 1.1376884780521117e-05, "loss": 0.9564, "step": 1942 }, { "epoch": 1.5317303902246748, "grad_norm": 0.34765625, "learning_rate": 1.1367793723313469e-05, "loss": 0.9117, "step": 1943 }, { "epoch": 1.5325187229010644, "grad_norm": 0.33984375, "learning_rate": 1.1358701513962457e-05, "loss": 0.9465, "step": 1944 }, { "epoch": 1.5333070555774537, "grad_norm": 0.349609375, "learning_rate": 1.1349608160126784e-05, "loss": 0.9815, "step": 1945 }, { "epoch": 1.534095388253843, "grad_norm": 0.34765625, "learning_rate": 1.134051366946612e-05, "loss": 0.9583, "step": 1946 }, { "epoch": 1.5348837209302326, "grad_norm": 0.3515625, "learning_rate": 1.1331418049641091e-05, "loss": 0.9606, "step": 1947 }, { "epoch": 1.535672053606622, "grad_norm": 0.337890625, "learning_rate": 1.1322321308313278e-05, "loss": 0.9099, "step": 1948 }, { "epoch": 1.5364603862830113, "grad_norm": 0.35546875, "learning_rate": 1.1313223453145202e-05, "loss": 0.9632, "step": 1949 }, { "epoch": 1.5372487189594009, "grad_norm": 0.33984375, "learning_rate": 1.130412449180032e-05, "loss": 0.9064, "step": 1950 }, { "epoch": 1.5380370516357904, "grad_norm": 0.345703125, "learning_rate": 1.1295024431943029e-05, "loss": 0.9384, "step": 1951 }, { "epoch": 1.5388253843121797, "grad_norm": 0.345703125, "learning_rate": 1.1285923281238646e-05, "loss": 0.9823, "step": 1952 }, { "epoch": 1.539613716988569, "grad_norm": 0.35546875, "learning_rate": 1.1276821047353403e-05, "loss": 0.9558, "step": 1953 }, { "epoch": 1.5404020496649586, "grad_norm": 0.337890625, "learning_rate": 1.126771773795446e-05, "loss": 0.9166, "step": 1954 }, { "epoch": 1.5411903823413482, "grad_norm": 0.3515625, "learning_rate": 1.125861336070986e-05, "loss": 0.9653, "step": 1955 }, { "epoch": 1.5419787150177375, "grad_norm": 0.36328125, "learning_rate": 1.1249507923288563e-05, "loss": 0.9425, "step": 1956 }, { "epoch": 1.5427670476941269, "grad_norm": 0.349609375, "learning_rate": 1.1240401433360417e-05, "loss": 0.9489, "step": 1957 }, { "epoch": 1.5435553803705164, "grad_norm": 0.353515625, "learning_rate": 1.1231293898596154e-05, "loss": 0.9313, "step": 1958 }, { "epoch": 1.5443437130469058, "grad_norm": 0.345703125, "learning_rate": 1.122218532666739e-05, "loss": 0.965, "step": 1959 }, { "epoch": 1.545132045723295, "grad_norm": 0.34765625, "learning_rate": 1.1213075725246612e-05, "loss": 0.934, "step": 1960 }, { "epoch": 1.5459203783996847, "grad_norm": 0.337890625, "learning_rate": 1.1203965102007176e-05, "loss": 0.9418, "step": 1961 }, { "epoch": 1.5467087110760742, "grad_norm": 0.349609375, "learning_rate": 1.1194853464623294e-05, "loss": 0.9363, "step": 1962 }, { "epoch": 1.5474970437524636, "grad_norm": 0.337890625, "learning_rate": 1.1185740820770042e-05, "loss": 0.9133, "step": 1963 }, { "epoch": 1.5482853764288529, "grad_norm": 0.353515625, "learning_rate": 1.1176627178123332e-05, "loss": 0.9681, "step": 1964 }, { "epoch": 1.5490737091052424, "grad_norm": 0.34765625, "learning_rate": 1.1167512544359929e-05, "loss": 0.9604, "step": 1965 }, { "epoch": 1.549862041781632, "grad_norm": 0.349609375, "learning_rate": 1.115839692715742e-05, "loss": 0.9357, "step": 1966 }, { "epoch": 1.5506503744580211, "grad_norm": 0.345703125, "learning_rate": 1.1149280334194238e-05, "loss": 0.9557, "step": 1967 }, { "epoch": 1.5514387071344107, "grad_norm": 0.33984375, "learning_rate": 1.1140162773149612e-05, "loss": 0.9508, "step": 1968 }, { "epoch": 1.5522270398108002, "grad_norm": 0.34375, "learning_rate": 1.1131044251703615e-05, "loss": 0.9583, "step": 1969 }, { "epoch": 1.5530153724871896, "grad_norm": 0.359375, "learning_rate": 1.1121924777537108e-05, "loss": 0.9519, "step": 1970 }, { "epoch": 1.553803705163579, "grad_norm": 0.34375, "learning_rate": 1.1112804358331766e-05, "loss": 0.9494, "step": 1971 }, { "epoch": 1.5545920378399685, "grad_norm": 0.345703125, "learning_rate": 1.1103683001770055e-05, "loss": 0.9156, "step": 1972 }, { "epoch": 1.555380370516358, "grad_norm": 0.337890625, "learning_rate": 1.1094560715535232e-05, "loss": 0.9138, "step": 1973 }, { "epoch": 1.5561687031927474, "grad_norm": 0.345703125, "learning_rate": 1.108543750731134e-05, "loss": 0.9537, "step": 1974 }, { "epoch": 1.5569570358691367, "grad_norm": 0.3359375, "learning_rate": 1.1076313384783183e-05, "loss": 0.8876, "step": 1975 }, { "epoch": 1.5577453685455263, "grad_norm": 0.34375, "learning_rate": 1.1067188355636366e-05, "loss": 0.9757, "step": 1976 }, { "epoch": 1.5585337012219158, "grad_norm": 0.341796875, "learning_rate": 1.105806242755723e-05, "loss": 0.9239, "step": 1977 }, { "epoch": 1.559322033898305, "grad_norm": 0.353515625, "learning_rate": 1.104893560823288e-05, "loss": 0.8933, "step": 1978 }, { "epoch": 1.5601103665746945, "grad_norm": 0.34765625, "learning_rate": 1.1039807905351179e-05, "loss": 0.9656, "step": 1979 }, { "epoch": 1.560898699251084, "grad_norm": 0.333984375, "learning_rate": 1.1030679326600726e-05, "loss": 0.9234, "step": 1980 }, { "epoch": 1.5616870319274734, "grad_norm": 0.34765625, "learning_rate": 1.1021549879670865e-05, "loss": 0.966, "step": 1981 }, { "epoch": 1.5624753646038627, "grad_norm": 0.34765625, "learning_rate": 1.1012419572251665e-05, "loss": 0.9421, "step": 1982 }, { "epoch": 1.5632636972802523, "grad_norm": 0.34375, "learning_rate": 1.1003288412033923e-05, "loss": 0.9496, "step": 1983 }, { "epoch": 1.5640520299566418, "grad_norm": 0.353515625, "learning_rate": 1.0994156406709155e-05, "loss": 0.9152, "step": 1984 }, { "epoch": 1.5648403626330312, "grad_norm": 0.34765625, "learning_rate": 1.0985023563969585e-05, "loss": 0.9756, "step": 1985 }, { "epoch": 1.5656286953094205, "grad_norm": 0.333984375, "learning_rate": 1.097588989150815e-05, "loss": 0.9473, "step": 1986 }, { "epoch": 1.56641702798581, "grad_norm": 0.349609375, "learning_rate": 1.0966755397018474e-05, "loss": 0.9806, "step": 1987 }, { "epoch": 1.5672053606621994, "grad_norm": 0.341796875, "learning_rate": 1.0957620088194884e-05, "loss": 0.9166, "step": 1988 }, { "epoch": 1.5679936933385887, "grad_norm": 0.34765625, "learning_rate": 1.0948483972732395e-05, "loss": 0.9995, "step": 1989 }, { "epoch": 1.5687820260149783, "grad_norm": 0.34765625, "learning_rate": 1.0939347058326684e-05, "loss": 0.9671, "step": 1990 }, { "epoch": 1.5695703586913679, "grad_norm": 0.33984375, "learning_rate": 1.0930209352674123e-05, "loss": 0.9505, "step": 1991 }, { "epoch": 1.5703586913677572, "grad_norm": 0.33984375, "learning_rate": 1.0921070863471732e-05, "loss": 0.941, "step": 1992 }, { "epoch": 1.5711470240441465, "grad_norm": 0.353515625, "learning_rate": 1.091193159841721e-05, "loss": 0.935, "step": 1993 }, { "epoch": 1.571935356720536, "grad_norm": 0.3515625, "learning_rate": 1.0902791565208887e-05, "loss": 0.9532, "step": 1994 }, { "epoch": 1.5727236893969256, "grad_norm": 0.345703125, "learning_rate": 1.089365077154576e-05, "loss": 0.9514, "step": 1995 }, { "epoch": 1.573512022073315, "grad_norm": 0.341796875, "learning_rate": 1.0884509225127453e-05, "loss": 0.9336, "step": 1996 }, { "epoch": 1.5743003547497043, "grad_norm": 0.33984375, "learning_rate": 1.0875366933654232e-05, "loss": 0.9412, "step": 1997 }, { "epoch": 1.5750886874260939, "grad_norm": 0.359375, "learning_rate": 1.0866223904826992e-05, "loss": 0.9253, "step": 1998 }, { "epoch": 1.5758770201024832, "grad_norm": 0.35546875, "learning_rate": 1.0857080146347236e-05, "loss": 0.9514, "step": 1999 }, { "epoch": 1.5766653527788725, "grad_norm": 0.34375, "learning_rate": 1.08479356659171e-05, "loss": 0.9247, "step": 2000 }, { "epoch": 1.577453685455262, "grad_norm": 0.341796875, "learning_rate": 1.0838790471239314e-05, "loss": 0.9686, "step": 2001 }, { "epoch": 1.5782420181316517, "grad_norm": 0.341796875, "learning_rate": 1.0829644570017213e-05, "loss": 0.9474, "step": 2002 }, { "epoch": 1.579030350808041, "grad_norm": 0.3359375, "learning_rate": 1.0820497969954734e-05, "loss": 0.9448, "step": 2003 }, { "epoch": 1.5798186834844303, "grad_norm": 0.349609375, "learning_rate": 1.0811350678756392e-05, "loss": 0.9528, "step": 2004 }, { "epoch": 1.5806070161608199, "grad_norm": 0.353515625, "learning_rate": 1.0802202704127293e-05, "loss": 0.8959, "step": 2005 }, { "epoch": 1.5813953488372094, "grad_norm": 0.333984375, "learning_rate": 1.0793054053773118e-05, "loss": 0.9413, "step": 2006 }, { "epoch": 1.5821836815135988, "grad_norm": 0.33984375, "learning_rate": 1.0783904735400103e-05, "loss": 0.9516, "step": 2007 }, { "epoch": 1.5829720141899881, "grad_norm": 0.333984375, "learning_rate": 1.0774754756715074e-05, "loss": 0.9165, "step": 2008 }, { "epoch": 1.5837603468663777, "grad_norm": 0.345703125, "learning_rate": 1.0765604125425381e-05, "loss": 0.9071, "step": 2009 }, { "epoch": 1.584548679542767, "grad_norm": 0.36328125, "learning_rate": 1.0756452849238955e-05, "loss": 0.9503, "step": 2010 }, { "epoch": 1.5853370122191563, "grad_norm": 0.353515625, "learning_rate": 1.0747300935864245e-05, "loss": 0.9314, "step": 2011 }, { "epoch": 1.586125344895546, "grad_norm": 0.353515625, "learning_rate": 1.0738148393010251e-05, "loss": 0.9287, "step": 2012 }, { "epoch": 1.5869136775719355, "grad_norm": 0.353515625, "learning_rate": 1.0728995228386496e-05, "loss": 0.9158, "step": 2013 }, { "epoch": 1.5877020102483248, "grad_norm": 0.36328125, "learning_rate": 1.0719841449703035e-05, "loss": 0.9906, "step": 2014 }, { "epoch": 1.5884903429247141, "grad_norm": 0.345703125, "learning_rate": 1.071068706467043e-05, "loss": 0.9406, "step": 2015 }, { "epoch": 1.5892786756011037, "grad_norm": 0.361328125, "learning_rate": 1.0701532080999762e-05, "loss": 0.9404, "step": 2016 }, { "epoch": 1.5900670082774933, "grad_norm": 0.35546875, "learning_rate": 1.0692376506402614e-05, "loss": 1.0115, "step": 2017 }, { "epoch": 1.5908553409538826, "grad_norm": 0.349609375, "learning_rate": 1.068322034859106e-05, "loss": 0.918, "step": 2018 }, { "epoch": 1.591643673630272, "grad_norm": 0.345703125, "learning_rate": 1.0674063615277681e-05, "loss": 0.9496, "step": 2019 }, { "epoch": 1.5924320063066615, "grad_norm": 0.345703125, "learning_rate": 1.0664906314175525e-05, "loss": 0.9827, "step": 2020 }, { "epoch": 1.5932203389830508, "grad_norm": 0.34765625, "learning_rate": 1.065574845299813e-05, "loss": 0.9359, "step": 2021 }, { "epoch": 1.5940086716594402, "grad_norm": 0.3515625, "learning_rate": 1.0646590039459499e-05, "loss": 0.9501, "step": 2022 }, { "epoch": 1.5947970043358297, "grad_norm": 0.337890625, "learning_rate": 1.0637431081274108e-05, "loss": 0.9421, "step": 2023 }, { "epoch": 1.5955853370122193, "grad_norm": 0.3515625, "learning_rate": 1.062827158615688e-05, "loss": 0.9544, "step": 2024 }, { "epoch": 1.5963736696886086, "grad_norm": 0.349609375, "learning_rate": 1.0619111561823208e-05, "loss": 0.9363, "step": 2025 }, { "epoch": 1.597162002364998, "grad_norm": 0.35546875, "learning_rate": 1.0609951015988907e-05, "loss": 0.9639, "step": 2026 }, { "epoch": 1.5979503350413875, "grad_norm": 0.33984375, "learning_rate": 1.0600789956370254e-05, "loss": 0.9456, "step": 2027 }, { "epoch": 1.598738667717777, "grad_norm": 0.337890625, "learning_rate": 1.0591628390683945e-05, "loss": 0.916, "step": 2028 }, { "epoch": 1.5995270003941662, "grad_norm": 0.33984375, "learning_rate": 1.058246632664711e-05, "loss": 0.8957, "step": 2029 }, { "epoch": 1.6003153330705557, "grad_norm": 0.35546875, "learning_rate": 1.057330377197729e-05, "loss": 0.89, "step": 2030 }, { "epoch": 1.6011036657469453, "grad_norm": 0.34375, "learning_rate": 1.0564140734392445e-05, "loss": 0.926, "step": 2031 }, { "epoch": 1.6018919984233346, "grad_norm": 0.345703125, "learning_rate": 1.055497722161095e-05, "loss": 0.9496, "step": 2032 }, { "epoch": 1.602680331099724, "grad_norm": 0.345703125, "learning_rate": 1.054581324135156e-05, "loss": 0.9212, "step": 2033 }, { "epoch": 1.6034686637761135, "grad_norm": 0.33984375, "learning_rate": 1.0536648801333443e-05, "loss": 0.9338, "step": 2034 }, { "epoch": 1.604256996452503, "grad_norm": 0.341796875, "learning_rate": 1.0527483909276144e-05, "loss": 0.9721, "step": 2035 }, { "epoch": 1.6050453291288924, "grad_norm": 0.3515625, "learning_rate": 1.051831857289959e-05, "loss": 0.9262, "step": 2036 }, { "epoch": 1.6058336618052818, "grad_norm": 0.34375, "learning_rate": 1.0509152799924085e-05, "loss": 0.9095, "step": 2037 }, { "epoch": 1.6066219944816713, "grad_norm": 0.349609375, "learning_rate": 1.0499986598070302e-05, "loss": 0.8865, "step": 2038 }, { "epoch": 1.6074103271580606, "grad_norm": 0.341796875, "learning_rate": 1.0490819975059268e-05, "loss": 0.9279, "step": 2039 }, { "epoch": 1.60819865983445, "grad_norm": 0.3515625, "learning_rate": 1.0481652938612374e-05, "loss": 0.9436, "step": 2040 }, { "epoch": 1.6089869925108395, "grad_norm": 0.341796875, "learning_rate": 1.0472485496451347e-05, "loss": 0.9495, "step": 2041 }, { "epoch": 1.609775325187229, "grad_norm": 0.3359375, "learning_rate": 1.0463317656298273e-05, "loss": 0.9494, "step": 2042 }, { "epoch": 1.6105636578636184, "grad_norm": 0.34375, "learning_rate": 1.045414942587556e-05, "loss": 0.945, "step": 2043 }, { "epoch": 1.6113519905400078, "grad_norm": 0.349609375, "learning_rate": 1.0444980812905945e-05, "loss": 0.95, "step": 2044 }, { "epoch": 1.6121403232163973, "grad_norm": 0.341796875, "learning_rate": 1.0435811825112496e-05, "loss": 0.9198, "step": 2045 }, { "epoch": 1.6129286558927869, "grad_norm": 0.359375, "learning_rate": 1.0426642470218587e-05, "loss": 0.9664, "step": 2046 }, { "epoch": 1.6137169885691762, "grad_norm": 0.359375, "learning_rate": 1.0417472755947908e-05, "loss": 0.9224, "step": 2047 }, { "epoch": 1.6145053212455656, "grad_norm": 0.345703125, "learning_rate": 1.0408302690024447e-05, "loss": 0.9398, "step": 2048 }, { "epoch": 1.6152936539219551, "grad_norm": 0.345703125, "learning_rate": 1.0399132280172494e-05, "loss": 0.9533, "step": 2049 }, { "epoch": 1.6160819865983445, "grad_norm": 0.341796875, "learning_rate": 1.0389961534116622e-05, "loss": 0.9418, "step": 2050 }, { "epoch": 1.6168703192747338, "grad_norm": 0.33984375, "learning_rate": 1.0380790459581695e-05, "loss": 0.9635, "step": 2051 }, { "epoch": 1.6176586519511233, "grad_norm": 0.35546875, "learning_rate": 1.0371619064292844e-05, "loss": 0.9441, "step": 2052 }, { "epoch": 1.618446984627513, "grad_norm": 0.35546875, "learning_rate": 1.0362447355975475e-05, "loss": 0.9276, "step": 2053 }, { "epoch": 1.6192353173039022, "grad_norm": 0.34765625, "learning_rate": 1.0353275342355262e-05, "loss": 0.9425, "step": 2054 }, { "epoch": 1.6200236499802916, "grad_norm": 0.359375, "learning_rate": 1.034410303115813e-05, "loss": 0.9406, "step": 2055 }, { "epoch": 1.6208119826566811, "grad_norm": 0.361328125, "learning_rate": 1.0334930430110258e-05, "loss": 0.973, "step": 2056 }, { "epoch": 1.6216003153330707, "grad_norm": 0.3359375, "learning_rate": 1.0325757546938067e-05, "loss": 0.9387, "step": 2057 }, { "epoch": 1.62238864800946, "grad_norm": 0.341796875, "learning_rate": 1.0316584389368213e-05, "loss": 0.9309, "step": 2058 }, { "epoch": 1.6231769806858494, "grad_norm": 0.34375, "learning_rate": 1.0307410965127595e-05, "loss": 0.9222, "step": 2059 }, { "epoch": 1.623965313362239, "grad_norm": 0.3359375, "learning_rate": 1.0298237281943321e-05, "loss": 0.9499, "step": 2060 }, { "epoch": 1.6247536460386283, "grad_norm": 0.341796875, "learning_rate": 1.0289063347542727e-05, "loss": 0.9568, "step": 2061 }, { "epoch": 1.6255419787150176, "grad_norm": 0.34765625, "learning_rate": 1.027988916965336e-05, "loss": 0.9477, "step": 2062 }, { "epoch": 1.6263303113914072, "grad_norm": 0.34375, "learning_rate": 1.0270714756002967e-05, "loss": 0.9238, "step": 2063 }, { "epoch": 1.6271186440677967, "grad_norm": 0.34765625, "learning_rate": 1.02615401143195e-05, "loss": 0.9175, "step": 2064 }, { "epoch": 1.627906976744186, "grad_norm": 0.33984375, "learning_rate": 1.0252365252331094e-05, "loss": 0.9307, "step": 2065 }, { "epoch": 1.6286953094205754, "grad_norm": 0.34765625, "learning_rate": 1.0243190177766084e-05, "loss": 0.9225, "step": 2066 }, { "epoch": 1.629483642096965, "grad_norm": 0.34375, "learning_rate": 1.0234014898352966e-05, "loss": 0.9432, "step": 2067 }, { "epoch": 1.6302719747733545, "grad_norm": 0.33984375, "learning_rate": 1.0224839421820426e-05, "loss": 0.8896, "step": 2068 }, { "epoch": 1.6310603074497438, "grad_norm": 0.34765625, "learning_rate": 1.0215663755897306e-05, "loss": 0.9508, "step": 2069 }, { "epoch": 1.6318486401261332, "grad_norm": 0.35546875, "learning_rate": 1.0206487908312607e-05, "loss": 0.944, "step": 2070 }, { "epoch": 1.6326369728025227, "grad_norm": 0.3515625, "learning_rate": 1.0197311886795487e-05, "loss": 0.9291, "step": 2071 }, { "epoch": 1.633425305478912, "grad_norm": 0.33984375, "learning_rate": 1.018813569907525e-05, "loss": 0.909, "step": 2072 }, { "epoch": 1.6342136381553014, "grad_norm": 0.3359375, "learning_rate": 1.0178959352881337e-05, "loss": 0.9247, "step": 2073 }, { "epoch": 1.635001970831691, "grad_norm": 0.345703125, "learning_rate": 1.0169782855943327e-05, "loss": 0.9414, "step": 2074 }, { "epoch": 1.6357903035080805, "grad_norm": 0.34375, "learning_rate": 1.0160606215990922e-05, "loss": 0.954, "step": 2075 }, { "epoch": 1.6365786361844699, "grad_norm": 0.341796875, "learning_rate": 1.0151429440753948e-05, "loss": 0.9476, "step": 2076 }, { "epoch": 1.6373669688608592, "grad_norm": 0.36328125, "learning_rate": 1.014225253796234e-05, "loss": 0.9237, "step": 2077 }, { "epoch": 1.6381553015372488, "grad_norm": 0.3515625, "learning_rate": 1.0133075515346147e-05, "loss": 0.9678, "step": 2078 }, { "epoch": 1.6389436342136383, "grad_norm": 0.34375, "learning_rate": 1.0123898380635515e-05, "loss": 0.9289, "step": 2079 }, { "epoch": 1.6397319668900274, "grad_norm": 0.357421875, "learning_rate": 1.011472114156068e-05, "loss": 0.906, "step": 2080 }, { "epoch": 1.640520299566417, "grad_norm": 0.341796875, "learning_rate": 1.0105543805851977e-05, "loss": 0.9473, "step": 2081 }, { "epoch": 1.6413086322428065, "grad_norm": 0.337890625, "learning_rate": 1.0096366381239808e-05, "loss": 0.9334, "step": 2082 }, { "epoch": 1.6420969649191959, "grad_norm": 0.341796875, "learning_rate": 1.008718887545467e-05, "loss": 0.9541, "step": 2083 }, { "epoch": 1.6428852975955852, "grad_norm": 0.33984375, "learning_rate": 1.0078011296227104e-05, "loss": 0.928, "step": 2084 }, { "epoch": 1.6436736302719748, "grad_norm": 0.34375, "learning_rate": 1.0068833651287736e-05, "loss": 0.941, "step": 2085 }, { "epoch": 1.6444619629483643, "grad_norm": 0.359375, "learning_rate": 1.0059655948367229e-05, "loss": 0.9626, "step": 2086 }, { "epoch": 1.6452502956247537, "grad_norm": 0.34375, "learning_rate": 1.0050478195196303e-05, "loss": 0.954, "step": 2087 }, { "epoch": 1.646038628301143, "grad_norm": 0.37109375, "learning_rate": 1.0041300399505724e-05, "loss": 0.9385, "step": 2088 }, { "epoch": 1.6468269609775326, "grad_norm": 0.353515625, "learning_rate": 1.0032122569026284e-05, "loss": 0.9312, "step": 2089 }, { "epoch": 1.6476152936539221, "grad_norm": 0.341796875, "learning_rate": 1.0022944711488818e-05, "loss": 0.9645, "step": 2090 }, { "epoch": 1.6484036263303112, "grad_norm": 0.341796875, "learning_rate": 1.0013766834624168e-05, "loss": 0.9304, "step": 2091 }, { "epoch": 1.6491919590067008, "grad_norm": 0.35546875, "learning_rate": 1.0004588946163203e-05, "loss": 0.9356, "step": 2092 }, { "epoch": 1.6499802916830903, "grad_norm": 0.353515625, "learning_rate": 9.995411053836798e-06, "loss": 0.952, "step": 2093 }, { "epoch": 1.6507686243594797, "grad_norm": 0.341796875, "learning_rate": 9.986233165375837e-06, "loss": 0.8913, "step": 2094 }, { "epoch": 1.651556957035869, "grad_norm": 0.333984375, "learning_rate": 9.977055288511182e-06, "loss": 0.916, "step": 2095 }, { "epoch": 1.6523452897122586, "grad_norm": 0.345703125, "learning_rate": 9.967877430973716e-06, "loss": 0.9136, "step": 2096 }, { "epoch": 1.6531336223886481, "grad_norm": 0.365234375, "learning_rate": 9.95869960049428e-06, "loss": 0.9941, "step": 2097 }, { "epoch": 1.6539219550650375, "grad_norm": 0.341796875, "learning_rate": 9.949521804803699e-06, "loss": 0.9161, "step": 2098 }, { "epoch": 1.6547102877414268, "grad_norm": 0.337890625, "learning_rate": 9.940344051632778e-06, "loss": 0.924, "step": 2099 }, { "epoch": 1.6554986204178164, "grad_norm": 0.337890625, "learning_rate": 9.931166348712268e-06, "loss": 0.8907, "step": 2100 }, { "epoch": 1.6562869530942057, "grad_norm": 0.337890625, "learning_rate": 9.921988703772897e-06, "loss": 0.9355, "step": 2101 }, { "epoch": 1.657075285770595, "grad_norm": 0.345703125, "learning_rate": 9.912811124545334e-06, "loss": 0.9115, "step": 2102 }, { "epoch": 1.6578636184469846, "grad_norm": 0.3515625, "learning_rate": 9.903633618760195e-06, "loss": 0.918, "step": 2103 }, { "epoch": 1.6586519511233742, "grad_norm": 0.337890625, "learning_rate": 9.894456194148028e-06, "loss": 0.9251, "step": 2104 }, { "epoch": 1.6594402837997635, "grad_norm": 0.359375, "learning_rate": 9.885278858439321e-06, "loss": 0.9485, "step": 2105 }, { "epoch": 1.6602286164761528, "grad_norm": 0.341796875, "learning_rate": 9.876101619364487e-06, "loss": 0.9074, "step": 2106 }, { "epoch": 1.6610169491525424, "grad_norm": 0.3359375, "learning_rate": 9.866924484653856e-06, "loss": 0.908, "step": 2107 }, { "epoch": 1.661805281828932, "grad_norm": 0.357421875, "learning_rate": 9.857747462037663e-06, "loss": 0.9816, "step": 2108 }, { "epoch": 1.6625936145053213, "grad_norm": 0.345703125, "learning_rate": 9.848570559246055e-06, "loss": 0.9523, "step": 2109 }, { "epoch": 1.6633819471817106, "grad_norm": 0.35546875, "learning_rate": 9.839393784009078e-06, "loss": 0.962, "step": 2110 }, { "epoch": 1.6641702798581002, "grad_norm": 0.349609375, "learning_rate": 9.830217144056675e-06, "loss": 1.0056, "step": 2111 }, { "epoch": 1.6649586125344895, "grad_norm": 0.345703125, "learning_rate": 9.821040647118666e-06, "loss": 0.9735, "step": 2112 }, { "epoch": 1.6657469452108788, "grad_norm": 0.3515625, "learning_rate": 9.811864300924753e-06, "loss": 0.9542, "step": 2113 }, { "epoch": 1.6665352778872684, "grad_norm": 0.34765625, "learning_rate": 9.802688113204518e-06, "loss": 0.9367, "step": 2114 }, { "epoch": 1.667323610563658, "grad_norm": 0.345703125, "learning_rate": 9.793512091687396e-06, "loss": 0.9227, "step": 2115 }, { "epoch": 1.6681119432400473, "grad_norm": 0.3515625, "learning_rate": 9.784336244102697e-06, "loss": 0.9558, "step": 2116 }, { "epoch": 1.6689002759164366, "grad_norm": 0.341796875, "learning_rate": 9.775160578179575e-06, "loss": 0.9582, "step": 2117 }, { "epoch": 1.6696886085928262, "grad_norm": 0.345703125, "learning_rate": 9.765985101647037e-06, "loss": 0.9452, "step": 2118 }, { "epoch": 1.6704769412692158, "grad_norm": 0.349609375, "learning_rate": 9.75680982223392e-06, "loss": 0.9809, "step": 2119 }, { "epoch": 1.671265273945605, "grad_norm": 0.3515625, "learning_rate": 9.747634747668906e-06, "loss": 0.91, "step": 2120 }, { "epoch": 1.6720536066219944, "grad_norm": 0.33984375, "learning_rate": 9.738459885680502e-06, "loss": 0.9424, "step": 2121 }, { "epoch": 1.672841939298384, "grad_norm": 0.341796875, "learning_rate": 9.729285243997037e-06, "loss": 0.961, "step": 2122 }, { "epoch": 1.6736302719747733, "grad_norm": 0.34375, "learning_rate": 9.720110830346643e-06, "loss": 0.9251, "step": 2123 }, { "epoch": 1.6744186046511627, "grad_norm": 0.3515625, "learning_rate": 9.710936652457276e-06, "loss": 0.972, "step": 2124 }, { "epoch": 1.6752069373275522, "grad_norm": 0.341796875, "learning_rate": 9.70176271805668e-06, "loss": 0.901, "step": 2125 }, { "epoch": 1.6759952700039418, "grad_norm": 0.349609375, "learning_rate": 9.692589034872408e-06, "loss": 0.9607, "step": 2126 }, { "epoch": 1.676783602680331, "grad_norm": 0.341796875, "learning_rate": 9.683415610631788e-06, "loss": 0.9204, "step": 2127 }, { "epoch": 1.6775719353567204, "grad_norm": 0.34375, "learning_rate": 9.674242453061938e-06, "loss": 0.9878, "step": 2128 }, { "epoch": 1.67836026803311, "grad_norm": 0.34375, "learning_rate": 9.665069569889742e-06, "loss": 0.9846, "step": 2129 }, { "epoch": 1.6791486007094996, "grad_norm": 0.34765625, "learning_rate": 9.655896968841873e-06, "loss": 0.933, "step": 2130 }, { "epoch": 1.679936933385889, "grad_norm": 0.34765625, "learning_rate": 9.64672465764474e-06, "loss": 0.9479, "step": 2131 }, { "epoch": 1.6807252660622782, "grad_norm": 0.33984375, "learning_rate": 9.637552644024526e-06, "loss": 0.9849, "step": 2132 }, { "epoch": 1.6815135987386678, "grad_norm": 0.3515625, "learning_rate": 9.62838093570716e-06, "loss": 0.9166, "step": 2133 }, { "epoch": 1.6823019314150571, "grad_norm": 0.33984375, "learning_rate": 9.619209540418307e-06, "loss": 0.947, "step": 2134 }, { "epoch": 1.6830902640914465, "grad_norm": 0.337890625, "learning_rate": 9.610038465883378e-06, "loss": 0.9618, "step": 2135 }, { "epoch": 1.683878596767836, "grad_norm": 0.345703125, "learning_rate": 9.600867719827507e-06, "loss": 0.9557, "step": 2136 }, { "epoch": 1.6846669294442256, "grad_norm": 0.341796875, "learning_rate": 9.591697309975556e-06, "loss": 0.9571, "step": 2137 }, { "epoch": 1.685455262120615, "grad_norm": 0.341796875, "learning_rate": 9.582527244052095e-06, "loss": 0.9674, "step": 2138 }, { "epoch": 1.6862435947970043, "grad_norm": 0.34375, "learning_rate": 9.573357529781413e-06, "loss": 0.9216, "step": 2139 }, { "epoch": 1.6870319274733938, "grad_norm": 0.3515625, "learning_rate": 9.564188174887505e-06, "loss": 0.9757, "step": 2140 }, { "epoch": 1.6878202601497834, "grad_norm": 0.357421875, "learning_rate": 9.555019187094058e-06, "loss": 0.9336, "step": 2141 }, { "epoch": 1.6886085928261725, "grad_norm": 0.34375, "learning_rate": 9.545850574124444e-06, "loss": 0.9454, "step": 2142 }, { "epoch": 1.689396925502562, "grad_norm": 0.3515625, "learning_rate": 9.53668234370173e-06, "loss": 0.9301, "step": 2143 }, { "epoch": 1.6901852581789516, "grad_norm": 0.345703125, "learning_rate": 9.52751450354865e-06, "loss": 0.9541, "step": 2144 }, { "epoch": 1.690973590855341, "grad_norm": 0.345703125, "learning_rate": 9.518347061387629e-06, "loss": 0.9311, "step": 2145 }, { "epoch": 1.6917619235317303, "grad_norm": 0.33203125, "learning_rate": 9.509180024940735e-06, "loss": 0.9346, "step": 2146 }, { "epoch": 1.6925502562081198, "grad_norm": 0.34765625, "learning_rate": 9.500013401929701e-06, "loss": 0.9459, "step": 2147 }, { "epoch": 1.6933385888845094, "grad_norm": 0.34765625, "learning_rate": 9.490847200075919e-06, "loss": 0.9862, "step": 2148 }, { "epoch": 1.6941269215608987, "grad_norm": 0.3359375, "learning_rate": 9.481681427100411e-06, "loss": 0.9176, "step": 2149 }, { "epoch": 1.694915254237288, "grad_norm": 0.34765625, "learning_rate": 9.47251609072386e-06, "loss": 0.9298, "step": 2150 }, { "epoch": 1.6957035869136776, "grad_norm": 0.349609375, "learning_rate": 9.46335119866656e-06, "loss": 0.9671, "step": 2151 }, { "epoch": 1.696491919590067, "grad_norm": 0.341796875, "learning_rate": 9.454186758648444e-06, "loss": 0.9301, "step": 2152 }, { "epoch": 1.6972802522664563, "grad_norm": 0.345703125, "learning_rate": 9.445022778389057e-06, "loss": 0.9296, "step": 2153 }, { "epoch": 1.6980685849428458, "grad_norm": 0.34765625, "learning_rate": 9.435859265607555e-06, "loss": 0.916, "step": 2154 }, { "epoch": 1.6988569176192354, "grad_norm": 0.34375, "learning_rate": 9.426696228022714e-06, "loss": 0.9591, "step": 2155 }, { "epoch": 1.6996452502956247, "grad_norm": 0.361328125, "learning_rate": 9.417533673352895e-06, "loss": 0.9315, "step": 2156 }, { "epoch": 1.700433582972014, "grad_norm": 0.34765625, "learning_rate": 9.40837160931606e-06, "loss": 0.9339, "step": 2157 }, { "epoch": 1.7012219156484036, "grad_norm": 0.345703125, "learning_rate": 9.399210043629751e-06, "loss": 0.9214, "step": 2158 }, { "epoch": 1.7020102483247932, "grad_norm": 0.34375, "learning_rate": 9.390048984011095e-06, "loss": 0.8978, "step": 2159 }, { "epoch": 1.7027985810011825, "grad_norm": 0.357421875, "learning_rate": 9.380888438176797e-06, "loss": 0.9516, "step": 2160 }, { "epoch": 1.7035869136775719, "grad_norm": 0.353515625, "learning_rate": 9.371728413843122e-06, "loss": 0.9428, "step": 2161 }, { "epoch": 1.7043752463539614, "grad_norm": 0.345703125, "learning_rate": 9.362568918725895e-06, "loss": 0.9589, "step": 2162 }, { "epoch": 1.7051635790303508, "grad_norm": 0.34375, "learning_rate": 9.353409960540506e-06, "loss": 0.9355, "step": 2163 }, { "epoch": 1.70595191170674, "grad_norm": 0.34765625, "learning_rate": 9.344251547001872e-06, "loss": 0.9301, "step": 2164 }, { "epoch": 1.7067402443831297, "grad_norm": 0.3359375, "learning_rate": 9.335093685824479e-06, "loss": 0.9434, "step": 2165 }, { "epoch": 1.7075285770595192, "grad_norm": 0.34765625, "learning_rate": 9.325936384722322e-06, "loss": 0.9324, "step": 2166 }, { "epoch": 1.7083169097359086, "grad_norm": 0.34375, "learning_rate": 9.316779651408941e-06, "loss": 0.9579, "step": 2167 }, { "epoch": 1.7091052424122979, "grad_norm": 0.361328125, "learning_rate": 9.307623493597388e-06, "loss": 1.0034, "step": 2168 }, { "epoch": 1.7098935750886874, "grad_norm": 0.33203125, "learning_rate": 9.29846791900024e-06, "loss": 0.9196, "step": 2169 }, { "epoch": 1.710681907765077, "grad_norm": 0.345703125, "learning_rate": 9.289312935329573e-06, "loss": 0.9498, "step": 2170 }, { "epoch": 1.7114702404414663, "grad_norm": 0.3515625, "learning_rate": 9.28015855029697e-06, "loss": 0.9405, "step": 2171 }, { "epoch": 1.7122585731178557, "grad_norm": 0.34375, "learning_rate": 9.271004771613509e-06, "loss": 0.9646, "step": 2172 }, { "epoch": 1.7130469057942452, "grad_norm": 0.345703125, "learning_rate": 9.261851606989754e-06, "loss": 0.9686, "step": 2173 }, { "epoch": 1.7138352384706346, "grad_norm": 0.33984375, "learning_rate": 9.252699064135759e-06, "loss": 0.9233, "step": 2174 }, { "epoch": 1.714623571147024, "grad_norm": 0.34375, "learning_rate": 9.243547150761047e-06, "loss": 0.9343, "step": 2175 }, { "epoch": 1.7154119038234135, "grad_norm": 0.35546875, "learning_rate": 9.23439587457462e-06, "loss": 0.9631, "step": 2176 }, { "epoch": 1.716200236499803, "grad_norm": 0.3828125, "learning_rate": 9.225245243284931e-06, "loss": 0.9289, "step": 2177 }, { "epoch": 1.7169885691761924, "grad_norm": 0.345703125, "learning_rate": 9.216095264599895e-06, "loss": 0.959, "step": 2178 }, { "epoch": 1.7177769018525817, "grad_norm": 0.333984375, "learning_rate": 9.206945946226885e-06, "loss": 0.905, "step": 2179 }, { "epoch": 1.7185652345289713, "grad_norm": 0.34765625, "learning_rate": 9.197797295872709e-06, "loss": 0.931, "step": 2180 }, { "epoch": 1.7193535672053608, "grad_norm": 0.35546875, "learning_rate": 9.18864932124361e-06, "loss": 0.942, "step": 2181 }, { "epoch": 1.7201418998817501, "grad_norm": 0.34375, "learning_rate": 9.17950203004527e-06, "loss": 0.9133, "step": 2182 }, { "epoch": 1.7209302325581395, "grad_norm": 0.3671875, "learning_rate": 9.170355429982787e-06, "loss": 0.9489, "step": 2183 }, { "epoch": 1.721718565234529, "grad_norm": 0.37109375, "learning_rate": 9.161209528760691e-06, "loss": 0.936, "step": 2184 }, { "epoch": 1.7225068979109184, "grad_norm": 0.35546875, "learning_rate": 9.152064334082905e-06, "loss": 0.9517, "step": 2185 }, { "epoch": 1.7232952305873077, "grad_norm": 0.345703125, "learning_rate": 9.142919853652766e-06, "loss": 0.9328, "step": 2186 }, { "epoch": 1.7240835632636973, "grad_norm": 0.341796875, "learning_rate": 9.133776095173015e-06, "loss": 0.9221, "step": 2187 }, { "epoch": 1.7248718959400868, "grad_norm": 0.35546875, "learning_rate": 9.12463306634577e-06, "loss": 0.9383, "step": 2188 }, { "epoch": 1.7256602286164762, "grad_norm": 0.349609375, "learning_rate": 9.115490774872549e-06, "loss": 0.9508, "step": 2189 }, { "epoch": 1.7264485612928655, "grad_norm": 0.33984375, "learning_rate": 9.106349228454242e-06, "loss": 0.9604, "step": 2190 }, { "epoch": 1.727236893969255, "grad_norm": 0.349609375, "learning_rate": 9.097208434791118e-06, "loss": 0.9281, "step": 2191 }, { "epoch": 1.7280252266456446, "grad_norm": 0.33984375, "learning_rate": 9.088068401582795e-06, "loss": 0.9251, "step": 2192 }, { "epoch": 1.7288135593220337, "grad_norm": 0.33203125, "learning_rate": 9.078929136528267e-06, "loss": 0.9399, "step": 2193 }, { "epoch": 1.7296018919984233, "grad_norm": 0.34765625, "learning_rate": 9.069790647325879e-06, "loss": 0.8771, "step": 2194 }, { "epoch": 1.7303902246748128, "grad_norm": 0.349609375, "learning_rate": 9.060652941673317e-06, "loss": 0.9738, "step": 2195 }, { "epoch": 1.7311785573512022, "grad_norm": 0.35546875, "learning_rate": 9.05151602726761e-06, "loss": 0.9622, "step": 2196 }, { "epoch": 1.7319668900275915, "grad_norm": 0.349609375, "learning_rate": 9.042379911805117e-06, "loss": 0.9296, "step": 2197 }, { "epoch": 1.732755222703981, "grad_norm": 0.3515625, "learning_rate": 9.033244602981527e-06, "loss": 0.9209, "step": 2198 }, { "epoch": 1.7335435553803706, "grad_norm": 0.341796875, "learning_rate": 9.024110108491855e-06, "loss": 0.9308, "step": 2199 }, { "epoch": 1.73433188805676, "grad_norm": 0.34375, "learning_rate": 9.014976436030417e-06, "loss": 0.9321, "step": 2200 }, { "epoch": 1.7351202207331493, "grad_norm": 0.359375, "learning_rate": 9.005843593290849e-06, "loss": 0.9256, "step": 2201 }, { "epoch": 1.7359085534095389, "grad_norm": 0.3515625, "learning_rate": 8.996711587966079e-06, "loss": 0.9307, "step": 2202 }, { "epoch": 1.7366968860859284, "grad_norm": 0.3359375, "learning_rate": 8.987580427748335e-06, "loss": 0.9147, "step": 2203 }, { "epoch": 1.7374852187623175, "grad_norm": 0.365234375, "learning_rate": 8.978450120329138e-06, "loss": 0.9259, "step": 2204 }, { "epoch": 1.738273551438707, "grad_norm": 0.380859375, "learning_rate": 8.969320673399276e-06, "loss": 0.9337, "step": 2205 }, { "epoch": 1.7390618841150967, "grad_norm": 0.359375, "learning_rate": 8.960192094648828e-06, "loss": 0.9509, "step": 2206 }, { "epoch": 1.739850216791486, "grad_norm": 0.345703125, "learning_rate": 8.95106439176712e-06, "loss": 0.9484, "step": 2207 }, { "epoch": 1.7406385494678753, "grad_norm": 0.357421875, "learning_rate": 8.941937572442773e-06, "loss": 0.8991, "step": 2208 }, { "epoch": 1.7414268821442649, "grad_norm": 0.34375, "learning_rate": 8.932811644363635e-06, "loss": 0.9565, "step": 2209 }, { "epoch": 1.7422152148206544, "grad_norm": 0.380859375, "learning_rate": 8.923686615216818e-06, "loss": 0.9306, "step": 2210 }, { "epoch": 1.7430035474970438, "grad_norm": 0.345703125, "learning_rate": 8.914562492688667e-06, "loss": 0.9369, "step": 2211 }, { "epoch": 1.7437918801734331, "grad_norm": 0.34765625, "learning_rate": 8.90543928446477e-06, "loss": 0.9766, "step": 2212 }, { "epoch": 1.7445802128498227, "grad_norm": 0.341796875, "learning_rate": 8.896316998229946e-06, "loss": 0.9494, "step": 2213 }, { "epoch": 1.745368545526212, "grad_norm": 0.345703125, "learning_rate": 8.887195641668235e-06, "loss": 0.9407, "step": 2214 }, { "epoch": 1.7461568782026013, "grad_norm": 0.3515625, "learning_rate": 8.878075222462896e-06, "loss": 0.9321, "step": 2215 }, { "epoch": 1.746945210878991, "grad_norm": 0.35546875, "learning_rate": 8.868955748296391e-06, "loss": 0.9576, "step": 2216 }, { "epoch": 1.7477335435553805, "grad_norm": 0.345703125, "learning_rate": 8.85983722685039e-06, "loss": 0.9561, "step": 2217 }, { "epoch": 1.7485218762317698, "grad_norm": 0.3515625, "learning_rate": 8.850719665805768e-06, "loss": 0.9461, "step": 2218 }, { "epoch": 1.7493102089081591, "grad_norm": 0.35546875, "learning_rate": 8.841603072842582e-06, "loss": 0.9848, "step": 2219 }, { "epoch": 1.7500985415845487, "grad_norm": 0.34375, "learning_rate": 8.832487455640074e-06, "loss": 0.9366, "step": 2220 }, { "epoch": 1.7508868742609383, "grad_norm": 0.34765625, "learning_rate": 8.823372821876673e-06, "loss": 0.9757, "step": 2221 }, { "epoch": 1.7516752069373276, "grad_norm": 0.34375, "learning_rate": 8.81425917922996e-06, "loss": 0.928, "step": 2222 }, { "epoch": 1.752463539613717, "grad_norm": 0.349609375, "learning_rate": 8.805146535376709e-06, "loss": 0.9561, "step": 2223 }, { "epoch": 1.7532518722901065, "grad_norm": 0.35546875, "learning_rate": 8.79603489799283e-06, "loss": 0.916, "step": 2224 }, { "epoch": 1.7540402049664958, "grad_norm": 0.337890625, "learning_rate": 8.786924274753391e-06, "loss": 0.9101, "step": 2225 }, { "epoch": 1.7548285376428852, "grad_norm": 0.345703125, "learning_rate": 8.777814673332615e-06, "loss": 0.9435, "step": 2226 }, { "epoch": 1.7556168703192747, "grad_norm": 0.34765625, "learning_rate": 8.76870610140385e-06, "loss": 0.9647, "step": 2227 }, { "epoch": 1.7564052029956643, "grad_norm": 0.33984375, "learning_rate": 8.759598566639586e-06, "loss": 0.9356, "step": 2228 }, { "epoch": 1.7571935356720536, "grad_norm": 0.337890625, "learning_rate": 8.750492076711439e-06, "loss": 0.9089, "step": 2229 }, { "epoch": 1.757981868348443, "grad_norm": 0.34375, "learning_rate": 8.741386639290145e-06, "loss": 0.9443, "step": 2230 }, { "epoch": 1.7587702010248325, "grad_norm": 0.349609375, "learning_rate": 8.732282262045546e-06, "loss": 0.9389, "step": 2231 }, { "epoch": 1.759558533701222, "grad_norm": 0.349609375, "learning_rate": 8.723178952646597e-06, "loss": 0.9452, "step": 2232 }, { "epoch": 1.7603468663776114, "grad_norm": 0.349609375, "learning_rate": 8.714076718761357e-06, "loss": 0.9151, "step": 2233 }, { "epoch": 1.7611351990540007, "grad_norm": 0.34375, "learning_rate": 8.704975568056975e-06, "loss": 0.9563, "step": 2234 }, { "epoch": 1.7619235317303903, "grad_norm": 0.337890625, "learning_rate": 8.695875508199683e-06, "loss": 0.9871, "step": 2235 }, { "epoch": 1.7627118644067796, "grad_norm": 0.359375, "learning_rate": 8.6867765468548e-06, "loss": 0.9004, "step": 2236 }, { "epoch": 1.763500197083169, "grad_norm": 0.337890625, "learning_rate": 8.677678691686722e-06, "loss": 0.9396, "step": 2237 }, { "epoch": 1.7642885297595585, "grad_norm": 0.341796875, "learning_rate": 8.66858195035891e-06, "loss": 0.9356, "step": 2238 }, { "epoch": 1.765076862435948, "grad_norm": 0.337890625, "learning_rate": 8.659486330533883e-06, "loss": 0.9424, "step": 2239 }, { "epoch": 1.7658651951123374, "grad_norm": 0.341796875, "learning_rate": 8.65039183987322e-06, "loss": 0.9239, "step": 2240 }, { "epoch": 1.7666535277887268, "grad_norm": 0.349609375, "learning_rate": 8.641298486037543e-06, "loss": 0.9836, "step": 2241 }, { "epoch": 1.7674418604651163, "grad_norm": 0.337890625, "learning_rate": 8.632206276686533e-06, "loss": 0.917, "step": 2242 }, { "epoch": 1.7682301931415059, "grad_norm": 0.3515625, "learning_rate": 8.623115219478884e-06, "loss": 0.9517, "step": 2243 }, { "epoch": 1.7690185258178952, "grad_norm": 0.345703125, "learning_rate": 8.614025322072338e-06, "loss": 0.9394, "step": 2244 }, { "epoch": 1.7698068584942845, "grad_norm": 0.33984375, "learning_rate": 8.604936592123647e-06, "loss": 0.9188, "step": 2245 }, { "epoch": 1.770595191170674, "grad_norm": 0.34375, "learning_rate": 8.595849037288581e-06, "loss": 0.9361, "step": 2246 }, { "epoch": 1.7713835238470634, "grad_norm": 0.357421875, "learning_rate": 8.58676266522194e-06, "loss": 0.9407, "step": 2247 }, { "epoch": 1.7721718565234528, "grad_norm": 0.357421875, "learning_rate": 8.577677483577498e-06, "loss": 0.9562, "step": 2248 }, { "epoch": 1.7729601891998423, "grad_norm": 0.349609375, "learning_rate": 8.568593500008047e-06, "loss": 0.9517, "step": 2249 }, { "epoch": 1.7737485218762319, "grad_norm": 0.337890625, "learning_rate": 8.55951072216536e-06, "loss": 0.9622, "step": 2250 }, { "epoch": 1.7745368545526212, "grad_norm": 0.345703125, "learning_rate": 8.550429157700196e-06, "loss": 0.9403, "step": 2251 }, { "epoch": 1.7753251872290106, "grad_norm": 0.357421875, "learning_rate": 8.541348814262298e-06, "loss": 0.9472, "step": 2252 }, { "epoch": 1.7761135199054001, "grad_norm": 0.341796875, "learning_rate": 8.532269699500377e-06, "loss": 0.9007, "step": 2253 }, { "epoch": 1.7769018525817897, "grad_norm": 0.33984375, "learning_rate": 8.523191821062103e-06, "loss": 0.8878, "step": 2254 }, { "epoch": 1.7776901852581788, "grad_norm": 0.349609375, "learning_rate": 8.51411518659411e-06, "loss": 0.9672, "step": 2255 }, { "epoch": 1.7784785179345683, "grad_norm": 0.3515625, "learning_rate": 8.505039803741985e-06, "loss": 0.9473, "step": 2256 }, { "epoch": 1.779266850610958, "grad_norm": 0.341796875, "learning_rate": 8.49596568015026e-06, "loss": 0.9356, "step": 2257 }, { "epoch": 1.7800551832873472, "grad_norm": 0.345703125, "learning_rate": 8.48689282346241e-06, "loss": 0.9552, "step": 2258 }, { "epoch": 1.7808435159637366, "grad_norm": 0.34765625, "learning_rate": 8.477821241320831e-06, "loss": 0.9335, "step": 2259 }, { "epoch": 1.7816318486401261, "grad_norm": 0.349609375, "learning_rate": 8.468750941366858e-06, "loss": 0.9382, "step": 2260 }, { "epoch": 1.7824201813165157, "grad_norm": 0.349609375, "learning_rate": 8.459681931240734e-06, "loss": 0.9804, "step": 2261 }, { "epoch": 1.783208513992905, "grad_norm": 0.34375, "learning_rate": 8.450614218581631e-06, "loss": 0.9178, "step": 2262 }, { "epoch": 1.7839968466692944, "grad_norm": 0.3359375, "learning_rate": 8.441547811027615e-06, "loss": 0.9165, "step": 2263 }, { "epoch": 1.784785179345684, "grad_norm": 0.341796875, "learning_rate": 8.432482716215663e-06, "loss": 0.9213, "step": 2264 }, { "epoch": 1.7855735120220733, "grad_norm": 0.34375, "learning_rate": 8.42341894178163e-06, "loss": 0.9196, "step": 2265 }, { "epoch": 1.7863618446984626, "grad_norm": 0.33984375, "learning_rate": 8.414356495360273e-06, "loss": 0.9167, "step": 2266 }, { "epoch": 1.7871501773748522, "grad_norm": 0.34765625, "learning_rate": 8.405295384585232e-06, "loss": 0.9492, "step": 2267 }, { "epoch": 1.7879385100512417, "grad_norm": 0.357421875, "learning_rate": 8.396235617089013e-06, "loss": 0.9291, "step": 2268 }, { "epoch": 1.788726842727631, "grad_norm": 0.333984375, "learning_rate": 8.387177200502996e-06, "loss": 0.9227, "step": 2269 }, { "epoch": 1.7895151754040204, "grad_norm": 0.3359375, "learning_rate": 8.378120142457415e-06, "loss": 0.9465, "step": 2270 }, { "epoch": 1.79030350808041, "grad_norm": 0.353515625, "learning_rate": 8.369064450581374e-06, "loss": 0.9078, "step": 2271 }, { "epoch": 1.7910918407567995, "grad_norm": 0.341796875, "learning_rate": 8.360010132502811e-06, "loss": 0.9575, "step": 2272 }, { "epoch": 1.7918801734331888, "grad_norm": 0.3515625, "learning_rate": 8.350957195848521e-06, "loss": 0.9787, "step": 2273 }, { "epoch": 1.7926685061095782, "grad_norm": 0.333984375, "learning_rate": 8.341905648244122e-06, "loss": 0.9434, "step": 2274 }, { "epoch": 1.7934568387859677, "grad_norm": 0.3515625, "learning_rate": 8.332855497314068e-06, "loss": 0.98, "step": 2275 }, { "epoch": 1.794245171462357, "grad_norm": 0.34765625, "learning_rate": 8.323806750681641e-06, "loss": 0.9152, "step": 2276 }, { "epoch": 1.7950335041387464, "grad_norm": 0.3359375, "learning_rate": 8.314759415968936e-06, "loss": 0.9495, "step": 2277 }, { "epoch": 1.795821836815136, "grad_norm": 0.34375, "learning_rate": 8.305713500796852e-06, "loss": 0.9493, "step": 2278 }, { "epoch": 1.7966101694915255, "grad_norm": 0.33984375, "learning_rate": 8.296669012785105e-06, "loss": 0.9362, "step": 2279 }, { "epoch": 1.7973985021679149, "grad_norm": 0.34375, "learning_rate": 8.2876259595522e-06, "loss": 0.9438, "step": 2280 }, { "epoch": 1.7981868348443042, "grad_norm": 0.33984375, "learning_rate": 8.278584348715436e-06, "loss": 0.9183, "step": 2281 }, { "epoch": 1.7989751675206938, "grad_norm": 0.341796875, "learning_rate": 8.2695441878909e-06, "loss": 0.9401, "step": 2282 }, { "epoch": 1.7997635001970833, "grad_norm": 0.333984375, "learning_rate": 8.260505484693449e-06, "loss": 0.9107, "step": 2283 }, { "epoch": 1.8005518328734726, "grad_norm": 0.341796875, "learning_rate": 8.251468246736725e-06, "loss": 0.9302, "step": 2284 }, { "epoch": 1.801340165549862, "grad_norm": 0.3515625, "learning_rate": 8.242432481633119e-06, "loss": 0.9555, "step": 2285 }, { "epoch": 1.8021284982262515, "grad_norm": 0.345703125, "learning_rate": 8.2333981969938e-06, "loss": 0.9513, "step": 2286 }, { "epoch": 1.8029168309026409, "grad_norm": 0.345703125, "learning_rate": 8.224365400428676e-06, "loss": 0.9089, "step": 2287 }, { "epoch": 1.8037051635790302, "grad_norm": 0.345703125, "learning_rate": 8.215334099546411e-06, "loss": 0.9234, "step": 2288 }, { "epoch": 1.8044934962554198, "grad_norm": 0.349609375, "learning_rate": 8.206304301954397e-06, "loss": 0.9159, "step": 2289 }, { "epoch": 1.8052818289318093, "grad_norm": 0.361328125, "learning_rate": 8.197276015258773e-06, "loss": 0.9512, "step": 2290 }, { "epoch": 1.8060701616081987, "grad_norm": 0.34765625, "learning_rate": 8.188249247064398e-06, "loss": 0.9133, "step": 2291 }, { "epoch": 1.806858494284588, "grad_norm": 0.34375, "learning_rate": 8.179224004974857e-06, "loss": 0.9645, "step": 2292 }, { "epoch": 1.8076468269609776, "grad_norm": 0.34765625, "learning_rate": 8.17020029659244e-06, "loss": 0.9525, "step": 2293 }, { "epoch": 1.8084351596373671, "grad_norm": 0.34765625, "learning_rate": 8.161178129518155e-06, "loss": 0.918, "step": 2294 }, { "epoch": 1.8092234923137565, "grad_norm": 0.34375, "learning_rate": 8.152157511351704e-06, "loss": 0.9247, "step": 2295 }, { "epoch": 1.8100118249901458, "grad_norm": 0.341796875, "learning_rate": 8.143138449691495e-06, "loss": 0.9572, "step": 2296 }, { "epoch": 1.8108001576665353, "grad_norm": 0.353515625, "learning_rate": 8.134120952134613e-06, "loss": 0.9076, "step": 2297 }, { "epoch": 1.8115884903429247, "grad_norm": 0.3515625, "learning_rate": 8.125105026276832e-06, "loss": 0.9757, "step": 2298 }, { "epoch": 1.812376823019314, "grad_norm": 0.349609375, "learning_rate": 8.116090679712601e-06, "loss": 0.9604, "step": 2299 }, { "epoch": 1.8131651556957036, "grad_norm": 0.341796875, "learning_rate": 8.107077920035032e-06, "loss": 0.9572, "step": 2300 }, { "epoch": 1.8139534883720931, "grad_norm": 0.33984375, "learning_rate": 8.098066754835916e-06, "loss": 0.9658, "step": 2301 }, { "epoch": 1.8147418210484825, "grad_norm": 0.34375, "learning_rate": 8.089057191705686e-06, "loss": 0.9462, "step": 2302 }, { "epoch": 1.8155301537248718, "grad_norm": 0.34765625, "learning_rate": 8.080049238233439e-06, "loss": 0.9557, "step": 2303 }, { "epoch": 1.8163184864012614, "grad_norm": 0.3515625, "learning_rate": 8.071042902006896e-06, "loss": 0.9244, "step": 2304 }, { "epoch": 1.817106819077651, "grad_norm": 0.34765625, "learning_rate": 8.062038190612431e-06, "loss": 0.9549, "step": 2305 }, { "epoch": 1.81789515175404, "grad_norm": 0.349609375, "learning_rate": 8.053035111635054e-06, "loss": 0.9257, "step": 2306 }, { "epoch": 1.8186834844304296, "grad_norm": 0.3515625, "learning_rate": 8.044033672658387e-06, "loss": 0.9569, "step": 2307 }, { "epoch": 1.8194718171068192, "grad_norm": 0.349609375, "learning_rate": 8.035033881264676e-06, "loss": 0.922, "step": 2308 }, { "epoch": 1.8202601497832085, "grad_norm": 0.36328125, "learning_rate": 8.026035745034774e-06, "loss": 0.9816, "step": 2309 }, { "epoch": 1.8210484824595978, "grad_norm": 0.34375, "learning_rate": 8.017039271548154e-06, "loss": 0.9479, "step": 2310 }, { "epoch": 1.8218368151359874, "grad_norm": 0.34765625, "learning_rate": 8.008044468382878e-06, "loss": 0.9619, "step": 2311 }, { "epoch": 1.822625147812377, "grad_norm": 0.359375, "learning_rate": 7.999051343115595e-06, "loss": 0.9464, "step": 2312 }, { "epoch": 1.8234134804887663, "grad_norm": 0.33984375, "learning_rate": 7.990059903321554e-06, "loss": 0.9431, "step": 2313 }, { "epoch": 1.8242018131651556, "grad_norm": 0.3359375, "learning_rate": 7.981070156574572e-06, "loss": 0.9469, "step": 2314 }, { "epoch": 1.8249901458415452, "grad_norm": 0.337890625, "learning_rate": 7.972082110447052e-06, "loss": 0.9494, "step": 2315 }, { "epoch": 1.8257784785179347, "grad_norm": 0.345703125, "learning_rate": 7.96309577250996e-06, "loss": 0.9516, "step": 2316 }, { "epoch": 1.8265668111943238, "grad_norm": 0.341796875, "learning_rate": 7.954111150332814e-06, "loss": 0.9632, "step": 2317 }, { "epoch": 1.8273551438707134, "grad_norm": 0.35546875, "learning_rate": 7.945128251483704e-06, "loss": 0.9221, "step": 2318 }, { "epoch": 1.828143476547103, "grad_norm": 0.349609375, "learning_rate": 7.936147083529245e-06, "loss": 0.9354, "step": 2319 }, { "epoch": 1.8289318092234923, "grad_norm": 0.341796875, "learning_rate": 7.927167654034622e-06, "loss": 0.9131, "step": 2320 }, { "epoch": 1.8297201418998816, "grad_norm": 0.349609375, "learning_rate": 7.918189970563534e-06, "loss": 0.9183, "step": 2321 }, { "epoch": 1.8305084745762712, "grad_norm": 0.341796875, "learning_rate": 7.90921404067822e-06, "loss": 0.9484, "step": 2322 }, { "epoch": 1.8312968072526608, "grad_norm": 0.390625, "learning_rate": 7.900239871939435e-06, "loss": 0.9504, "step": 2323 }, { "epoch": 1.83208513992905, "grad_norm": 0.34375, "learning_rate": 7.891267471906453e-06, "loss": 0.953, "step": 2324 }, { "epoch": 1.8328734726054394, "grad_norm": 0.34765625, "learning_rate": 7.882296848137063e-06, "loss": 0.9292, "step": 2325 }, { "epoch": 1.833661805281829, "grad_norm": 0.345703125, "learning_rate": 7.873328008187554e-06, "loss": 0.9623, "step": 2326 }, { "epoch": 1.8344501379582183, "grad_norm": 0.341796875, "learning_rate": 7.864360959612714e-06, "loss": 0.9275, "step": 2327 }, { "epoch": 1.8352384706346077, "grad_norm": 0.33984375, "learning_rate": 7.855395709965814e-06, "loss": 0.9542, "step": 2328 }, { "epoch": 1.8360268033109972, "grad_norm": 0.345703125, "learning_rate": 7.846432266798618e-06, "loss": 0.9324, "step": 2329 }, { "epoch": 1.8368151359873868, "grad_norm": 0.33984375, "learning_rate": 7.83747063766137e-06, "loss": 0.9322, "step": 2330 }, { "epoch": 1.837603468663776, "grad_norm": 0.345703125, "learning_rate": 7.828510830102785e-06, "loss": 0.9689, "step": 2331 }, { "epoch": 1.8383918013401654, "grad_norm": 0.353515625, "learning_rate": 7.819552851670033e-06, "loss": 0.9308, "step": 2332 }, { "epoch": 1.839180134016555, "grad_norm": 0.337890625, "learning_rate": 7.810596709908759e-06, "loss": 0.9237, "step": 2333 }, { "epoch": 1.8399684666929446, "grad_norm": 0.349609375, "learning_rate": 7.801642412363042e-06, "loss": 0.9395, "step": 2334 }, { "epoch": 1.840756799369334, "grad_norm": 0.341796875, "learning_rate": 7.792689966575433e-06, "loss": 0.9446, "step": 2335 }, { "epoch": 1.8415451320457232, "grad_norm": 0.33984375, "learning_rate": 7.7837393800869e-06, "loss": 0.9627, "step": 2336 }, { "epoch": 1.8423334647221128, "grad_norm": 0.349609375, "learning_rate": 7.774790660436857e-06, "loss": 0.9724, "step": 2337 }, { "epoch": 1.8431217973985021, "grad_norm": 0.361328125, "learning_rate": 7.765843815163143e-06, "loss": 0.9491, "step": 2338 }, { "epoch": 1.8439101300748915, "grad_norm": 0.341796875, "learning_rate": 7.756898851802014e-06, "loss": 0.9494, "step": 2339 }, { "epoch": 1.844698462751281, "grad_norm": 0.3671875, "learning_rate": 7.747955777888145e-06, "loss": 0.8972, "step": 2340 }, { "epoch": 1.8454867954276706, "grad_norm": 0.345703125, "learning_rate": 7.739014600954623e-06, "loss": 0.911, "step": 2341 }, { "epoch": 1.84627512810406, "grad_norm": 0.3359375, "learning_rate": 7.73007532853293e-06, "loss": 0.9526, "step": 2342 }, { "epoch": 1.8470634607804493, "grad_norm": 0.3359375, "learning_rate": 7.721137968152944e-06, "loss": 0.9167, "step": 2343 }, { "epoch": 1.8478517934568388, "grad_norm": 0.34375, "learning_rate": 7.712202527342937e-06, "loss": 0.9168, "step": 2344 }, { "epoch": 1.8486401261332284, "grad_norm": 0.345703125, "learning_rate": 7.703269013629565e-06, "loss": 0.9131, "step": 2345 }, { "epoch": 1.8494284588096177, "grad_norm": 0.341796875, "learning_rate": 7.694337434537856e-06, "loss": 0.9308, "step": 2346 }, { "epoch": 1.850216791486007, "grad_norm": 0.349609375, "learning_rate": 7.685407797591207e-06, "loss": 0.9304, "step": 2347 }, { "epoch": 1.8510051241623966, "grad_norm": 0.33984375, "learning_rate": 7.676480110311385e-06, "loss": 0.9329, "step": 2348 }, { "epoch": 1.851793456838786, "grad_norm": 0.345703125, "learning_rate": 7.667554380218513e-06, "loss": 0.9476, "step": 2349 }, { "epoch": 1.8525817895151753, "grad_norm": 0.34765625, "learning_rate": 7.658630614831066e-06, "loss": 0.9478, "step": 2350 }, { "epoch": 1.8533701221915648, "grad_norm": 0.34765625, "learning_rate": 7.649708821665856e-06, "loss": 0.9159, "step": 2351 }, { "epoch": 1.8541584548679544, "grad_norm": 0.3515625, "learning_rate": 7.640789008238044e-06, "loss": 0.8851, "step": 2352 }, { "epoch": 1.8549467875443437, "grad_norm": 0.349609375, "learning_rate": 7.631871182061117e-06, "loss": 0.9299, "step": 2353 }, { "epoch": 1.855735120220733, "grad_norm": 0.345703125, "learning_rate": 7.622955350646899e-06, "loss": 0.9233, "step": 2354 }, { "epoch": 1.8565234528971226, "grad_norm": 0.34375, "learning_rate": 7.614041521505517e-06, "loss": 0.9418, "step": 2355 }, { "epoch": 1.8573117855735122, "grad_norm": 0.345703125, "learning_rate": 7.605129702145422e-06, "loss": 0.943, "step": 2356 }, { "epoch": 1.8581001182499015, "grad_norm": 0.33984375, "learning_rate": 7.596219900073372e-06, "loss": 0.9406, "step": 2357 }, { "epoch": 1.8588884509262908, "grad_norm": 0.34375, "learning_rate": 7.587312122794414e-06, "loss": 0.9534, "step": 2358 }, { "epoch": 1.8596767836026804, "grad_norm": 0.345703125, "learning_rate": 7.578406377811914e-06, "loss": 0.9661, "step": 2359 }, { "epoch": 1.8604651162790697, "grad_norm": 0.34375, "learning_rate": 7.569502672627502e-06, "loss": 0.9684, "step": 2360 }, { "epoch": 1.861253448955459, "grad_norm": 0.353515625, "learning_rate": 7.560601014741103e-06, "loss": 0.9668, "step": 2361 }, { "epoch": 1.8620417816318486, "grad_norm": 0.349609375, "learning_rate": 7.5517014116509094e-06, "loss": 0.9829, "step": 2362 }, { "epoch": 1.8628301143082382, "grad_norm": 0.361328125, "learning_rate": 7.5428038708533856e-06, "loss": 0.9407, "step": 2363 }, { "epoch": 1.8636184469846275, "grad_norm": 0.369140625, "learning_rate": 7.533908399843266e-06, "loss": 0.9718, "step": 2364 }, { "epoch": 1.8644067796610169, "grad_norm": 0.34375, "learning_rate": 7.525015006113537e-06, "loss": 0.9365, "step": 2365 }, { "epoch": 1.8651951123374064, "grad_norm": 0.345703125, "learning_rate": 7.516123697155424e-06, "loss": 0.9267, "step": 2366 }, { "epoch": 1.865983445013796, "grad_norm": 0.345703125, "learning_rate": 7.507234480458414e-06, "loss": 0.9286, "step": 2367 }, { "epoch": 1.866771777690185, "grad_norm": 0.345703125, "learning_rate": 7.49834736351022e-06, "loss": 0.9821, "step": 2368 }, { "epoch": 1.8675601103665747, "grad_norm": 0.34375, "learning_rate": 7.489462353796792e-06, "loss": 0.9306, "step": 2369 }, { "epoch": 1.8683484430429642, "grad_norm": 0.345703125, "learning_rate": 7.4805794588023086e-06, "loss": 0.9392, "step": 2370 }, { "epoch": 1.8691367757193535, "grad_norm": 0.341796875, "learning_rate": 7.47169868600915e-06, "loss": 0.948, "step": 2371 }, { "epoch": 1.8699251083957429, "grad_norm": 0.341796875, "learning_rate": 7.462820042897932e-06, "loss": 0.9271, "step": 2372 }, { "epoch": 1.8707134410721324, "grad_norm": 0.337890625, "learning_rate": 7.45394353694745e-06, "loss": 0.922, "step": 2373 }, { "epoch": 1.871501773748522, "grad_norm": 0.341796875, "learning_rate": 7.4450691756347315e-06, "loss": 0.9121, "step": 2374 }, { "epoch": 1.8722901064249113, "grad_norm": 0.337890625, "learning_rate": 7.436196966434968e-06, "loss": 0.9315, "step": 2375 }, { "epoch": 1.8730784391013007, "grad_norm": 0.34765625, "learning_rate": 7.427326916821557e-06, "loss": 0.9509, "step": 2376 }, { "epoch": 1.8738667717776902, "grad_norm": 0.34375, "learning_rate": 7.418459034266061e-06, "loss": 0.9564, "step": 2377 }, { "epoch": 1.8746551044540796, "grad_norm": 0.341796875, "learning_rate": 7.409593326238239e-06, "loss": 0.9299, "step": 2378 }, { "epoch": 1.875443437130469, "grad_norm": 0.337890625, "learning_rate": 7.4007298002059965e-06, "loss": 0.9452, "step": 2379 }, { "epoch": 1.8762317698068585, "grad_norm": 0.349609375, "learning_rate": 7.391868463635414e-06, "loss": 0.9208, "step": 2380 }, { "epoch": 1.877020102483248, "grad_norm": 0.34375, "learning_rate": 7.383009323990723e-06, "loss": 0.9393, "step": 2381 }, { "epoch": 1.8778084351596374, "grad_norm": 0.34765625, "learning_rate": 7.3741523887343015e-06, "loss": 0.9611, "step": 2382 }, { "epoch": 1.8785967678360267, "grad_norm": 0.34765625, "learning_rate": 7.3652976653266785e-06, "loss": 0.9763, "step": 2383 }, { "epoch": 1.8793851005124163, "grad_norm": 0.34375, "learning_rate": 7.356445161226516e-06, "loss": 0.9205, "step": 2384 }, { "epoch": 1.8801734331888058, "grad_norm": 0.34375, "learning_rate": 7.347594883890608e-06, "loss": 0.9115, "step": 2385 }, { "epoch": 1.8809617658651951, "grad_norm": 0.345703125, "learning_rate": 7.338746840773866e-06, "loss": 0.9255, "step": 2386 }, { "epoch": 1.8817500985415845, "grad_norm": 0.333984375, "learning_rate": 7.3299010393293255e-06, "loss": 0.9302, "step": 2387 }, { "epoch": 1.882538431217974, "grad_norm": 0.345703125, "learning_rate": 7.321057487008136e-06, "loss": 0.9487, "step": 2388 }, { "epoch": 1.8833267638943634, "grad_norm": 0.337890625, "learning_rate": 7.312216191259552e-06, "loss": 0.8924, "step": 2389 }, { "epoch": 1.8841150965707527, "grad_norm": 0.345703125, "learning_rate": 7.303377159530919e-06, "loss": 0.9558, "step": 2390 }, { "epoch": 1.8849034292471423, "grad_norm": 0.337890625, "learning_rate": 7.294540399267682e-06, "loss": 0.9722, "step": 2391 }, { "epoch": 1.8856917619235318, "grad_norm": 0.341796875, "learning_rate": 7.285705917913372e-06, "loss": 0.9511, "step": 2392 }, { "epoch": 1.8864800945999212, "grad_norm": 0.3515625, "learning_rate": 7.276873722909606e-06, "loss": 0.9367, "step": 2393 }, { "epoch": 1.8872684272763105, "grad_norm": 0.353515625, "learning_rate": 7.268043821696062e-06, "loss": 0.9044, "step": 2394 }, { "epoch": 1.8880567599527, "grad_norm": 0.341796875, "learning_rate": 7.259216221710496e-06, "loss": 0.9523, "step": 2395 }, { "epoch": 1.8888450926290896, "grad_norm": 0.333984375, "learning_rate": 7.250390930388725e-06, "loss": 0.9149, "step": 2396 }, { "epoch": 1.889633425305479, "grad_norm": 0.359375, "learning_rate": 7.24156795516461e-06, "loss": 0.9351, "step": 2397 }, { "epoch": 1.8904217579818683, "grad_norm": 0.37109375, "learning_rate": 7.232747303470082e-06, "loss": 0.9813, "step": 2398 }, { "epoch": 1.8912100906582578, "grad_norm": 0.349609375, "learning_rate": 7.223928982735096e-06, "loss": 0.9254, "step": 2399 }, { "epoch": 1.8919984233346472, "grad_norm": 0.341796875, "learning_rate": 7.215113000387654e-06, "loss": 0.9146, "step": 2400 }, { "epoch": 1.8927867560110365, "grad_norm": 0.3359375, "learning_rate": 7.2062993638537815e-06, "loss": 0.9242, "step": 2401 }, { "epoch": 1.893575088687426, "grad_norm": 0.34375, "learning_rate": 7.197488080557531e-06, "loss": 0.9473, "step": 2402 }, { "epoch": 1.8943634213638156, "grad_norm": 0.34765625, "learning_rate": 7.188679157920977e-06, "loss": 0.9433, "step": 2403 }, { "epoch": 1.895151754040205, "grad_norm": 0.3515625, "learning_rate": 7.1798726033642e-06, "loss": 0.9697, "step": 2404 }, { "epoch": 1.8959400867165943, "grad_norm": 0.341796875, "learning_rate": 7.171068424305286e-06, "loss": 0.9639, "step": 2405 }, { "epoch": 1.8967284193929839, "grad_norm": 0.341796875, "learning_rate": 7.1622666281603235e-06, "loss": 0.9322, "step": 2406 }, { "epoch": 1.8975167520693734, "grad_norm": 0.33984375, "learning_rate": 7.153467222343386e-06, "loss": 0.959, "step": 2407 }, { "epoch": 1.8983050847457628, "grad_norm": 0.359375, "learning_rate": 7.144670214266551e-06, "loss": 0.9579, "step": 2408 }, { "epoch": 1.899093417422152, "grad_norm": 0.349609375, "learning_rate": 7.1358756113398545e-06, "loss": 0.9549, "step": 2409 }, { "epoch": 1.8998817500985417, "grad_norm": 0.345703125, "learning_rate": 7.127083420971319e-06, "loss": 0.9558, "step": 2410 }, { "epoch": 1.900670082774931, "grad_norm": 0.34765625, "learning_rate": 7.118293650566931e-06, "loss": 0.9275, "step": 2411 }, { "epoch": 1.9014584154513203, "grad_norm": 0.34765625, "learning_rate": 7.109506307530646e-06, "loss": 0.9323, "step": 2412 }, { "epoch": 1.9022467481277099, "grad_norm": 0.34375, "learning_rate": 7.100721399264363e-06, "loss": 0.9339, "step": 2413 }, { "epoch": 1.9030350808040994, "grad_norm": 0.341796875, "learning_rate": 7.0919389331679365e-06, "loss": 0.9016, "step": 2414 }, { "epoch": 1.9038234134804888, "grad_norm": 0.33984375, "learning_rate": 7.083158916639169e-06, "loss": 0.9162, "step": 2415 }, { "epoch": 1.9046117461568781, "grad_norm": 0.34375, "learning_rate": 7.074381357073782e-06, "loss": 0.9441, "step": 2416 }, { "epoch": 1.9054000788332677, "grad_norm": 0.341796875, "learning_rate": 7.065606261865453e-06, "loss": 0.9597, "step": 2417 }, { "epoch": 1.9061884115096572, "grad_norm": 0.345703125, "learning_rate": 7.056833638405762e-06, "loss": 0.8788, "step": 2418 }, { "epoch": 1.9069767441860463, "grad_norm": 0.345703125, "learning_rate": 7.048063494084218e-06, "loss": 0.9501, "step": 2419 }, { "epoch": 1.907765076862436, "grad_norm": 0.3515625, "learning_rate": 7.039295836288238e-06, "loss": 0.9325, "step": 2420 }, { "epoch": 1.9085534095388255, "grad_norm": 0.341796875, "learning_rate": 7.0305306724031396e-06, "loss": 0.9419, "step": 2421 }, { "epoch": 1.9093417422152148, "grad_norm": 0.34375, "learning_rate": 7.021768009812155e-06, "loss": 0.9344, "step": 2422 }, { "epoch": 1.9101300748916041, "grad_norm": 0.361328125, "learning_rate": 7.013007855896396e-06, "loss": 0.9783, "step": 2423 }, { "epoch": 1.9109184075679937, "grad_norm": 0.361328125, "learning_rate": 7.0042502180348635e-06, "loss": 0.9577, "step": 2424 }, { "epoch": 1.9117067402443833, "grad_norm": 0.34375, "learning_rate": 6.995495103604442e-06, "loss": 0.9469, "step": 2425 }, { "epoch": 1.9124950729207726, "grad_norm": 0.345703125, "learning_rate": 6.9867425199798834e-06, "loss": 0.9787, "step": 2426 }, { "epoch": 1.913283405597162, "grad_norm": 0.34765625, "learning_rate": 6.977992474533823e-06, "loss": 0.9591, "step": 2427 }, { "epoch": 1.9140717382735515, "grad_norm": 0.33984375, "learning_rate": 6.969244974636745e-06, "loss": 0.9552, "step": 2428 }, { "epoch": 1.914860070949941, "grad_norm": 0.33984375, "learning_rate": 6.96050002765699e-06, "loss": 0.9195, "step": 2429 }, { "epoch": 1.9156484036263302, "grad_norm": 0.3359375, "learning_rate": 6.9517576409607545e-06, "loss": 0.9454, "step": 2430 }, { "epoch": 1.9164367363027197, "grad_norm": 0.35546875, "learning_rate": 6.943017821912068e-06, "loss": 0.946, "step": 2431 }, { "epoch": 1.9172250689791093, "grad_norm": 0.34375, "learning_rate": 6.934280577872814e-06, "loss": 0.9342, "step": 2432 }, { "epoch": 1.9180134016554986, "grad_norm": 0.337890625, "learning_rate": 6.925545916202692e-06, "loss": 0.9598, "step": 2433 }, { "epoch": 1.918801734331888, "grad_norm": 0.341796875, "learning_rate": 6.916813844259234e-06, "loss": 0.9001, "step": 2434 }, { "epoch": 1.9195900670082775, "grad_norm": 0.345703125, "learning_rate": 6.908084369397783e-06, "loss": 0.96, "step": 2435 }, { "epoch": 1.920378399684667, "grad_norm": 0.35546875, "learning_rate": 6.8993574989714995e-06, "loss": 0.9653, "step": 2436 }, { "epoch": 1.9211667323610564, "grad_norm": 0.33984375, "learning_rate": 6.890633240331355e-06, "loss": 0.9111, "step": 2437 }, { "epoch": 1.9219550650374457, "grad_norm": 0.33984375, "learning_rate": 6.8819116008261145e-06, "loss": 0.9288, "step": 2438 }, { "epoch": 1.9227433977138353, "grad_norm": 0.3515625, "learning_rate": 6.87319258780234e-06, "loss": 0.923, "step": 2439 }, { "epoch": 1.9235317303902246, "grad_norm": 0.5078125, "learning_rate": 6.8644762086043734e-06, "loss": 0.8853, "step": 2440 }, { "epoch": 1.924320063066614, "grad_norm": 0.34765625, "learning_rate": 6.855762470574345e-06, "loss": 0.9278, "step": 2441 }, { "epoch": 1.9251083957430035, "grad_norm": 0.34375, "learning_rate": 6.847051381052165e-06, "loss": 0.9349, "step": 2442 }, { "epoch": 1.925896728419393, "grad_norm": 0.349609375, "learning_rate": 6.838342947375507e-06, "loss": 0.9668, "step": 2443 }, { "epoch": 1.9266850610957824, "grad_norm": 0.34765625, "learning_rate": 6.829637176879802e-06, "loss": 0.9413, "step": 2444 }, { "epoch": 1.9274733937721718, "grad_norm": 0.337890625, "learning_rate": 6.820934076898247e-06, "loss": 0.9065, "step": 2445 }, { "epoch": 1.9282617264485613, "grad_norm": 0.384765625, "learning_rate": 6.812233654761779e-06, "loss": 0.9338, "step": 2446 }, { "epoch": 1.9290500591249509, "grad_norm": 0.35546875, "learning_rate": 6.8035359177990976e-06, "loss": 0.9321, "step": 2447 }, { "epoch": 1.9298383918013402, "grad_norm": 0.34765625, "learning_rate": 6.794840873336622e-06, "loss": 0.9582, "step": 2448 }, { "epoch": 1.9306267244777295, "grad_norm": 0.33984375, "learning_rate": 6.786148528698512e-06, "loss": 0.8855, "step": 2449 }, { "epoch": 1.931415057154119, "grad_norm": 0.357421875, "learning_rate": 6.777458891206649e-06, "loss": 0.9257, "step": 2450 }, { "epoch": 1.9322033898305084, "grad_norm": 0.34375, "learning_rate": 6.768771968180643e-06, "loss": 0.9285, "step": 2451 }, { "epoch": 1.9329917225068978, "grad_norm": 0.353515625, "learning_rate": 6.760087766937806e-06, "loss": 0.9278, "step": 2452 }, { "epoch": 1.9337800551832873, "grad_norm": 0.3515625, "learning_rate": 6.7514062947931655e-06, "loss": 0.9275, "step": 2453 }, { "epoch": 1.9345683878596769, "grad_norm": 0.3515625, "learning_rate": 6.742727559059448e-06, "loss": 0.9443, "step": 2454 }, { "epoch": 1.9353567205360662, "grad_norm": 0.341796875, "learning_rate": 6.734051567047068e-06, "loss": 0.9643, "step": 2455 }, { "epoch": 1.9361450532124556, "grad_norm": 0.345703125, "learning_rate": 6.725378326064141e-06, "loss": 0.95, "step": 2456 }, { "epoch": 1.9369333858888451, "grad_norm": 0.34765625, "learning_rate": 6.71670784341646e-06, "loss": 0.9507, "step": 2457 }, { "epoch": 1.9377217185652347, "grad_norm": 0.345703125, "learning_rate": 6.708040126407493e-06, "loss": 0.9768, "step": 2458 }, { "epoch": 1.938510051241624, "grad_norm": 0.34375, "learning_rate": 6.699375182338379e-06, "loss": 0.9591, "step": 2459 }, { "epoch": 1.9392983839180133, "grad_norm": 0.357421875, "learning_rate": 6.690713018507917e-06, "loss": 0.9221, "step": 2460 }, { "epoch": 1.940086716594403, "grad_norm": 0.353515625, "learning_rate": 6.682053642212576e-06, "loss": 1.0042, "step": 2461 }, { "epoch": 1.9408750492707922, "grad_norm": 0.353515625, "learning_rate": 6.673397060746469e-06, "loss": 0.9632, "step": 2462 }, { "epoch": 1.9416633819471816, "grad_norm": 0.33203125, "learning_rate": 6.664743281401351e-06, "loss": 0.8897, "step": 2463 }, { "epoch": 1.9424517146235711, "grad_norm": 0.33984375, "learning_rate": 6.656092311466624e-06, "loss": 0.9208, "step": 2464 }, { "epoch": 1.9432400472999607, "grad_norm": 0.34375, "learning_rate": 6.647444158229319e-06, "loss": 0.9201, "step": 2465 }, { "epoch": 1.94402837997635, "grad_norm": 0.34375, "learning_rate": 6.6387988289741e-06, "loss": 0.9368, "step": 2466 }, { "epoch": 1.9448167126527394, "grad_norm": 0.33984375, "learning_rate": 6.630156330983244e-06, "loss": 0.9547, "step": 2467 }, { "epoch": 1.945605045329129, "grad_norm": 0.3515625, "learning_rate": 6.62151667153665e-06, "loss": 0.9615, "step": 2468 }, { "epoch": 1.9463933780055185, "grad_norm": 0.341796875, "learning_rate": 6.612879857911825e-06, "loss": 0.9128, "step": 2469 }, { "epoch": 1.9471817106819078, "grad_norm": 0.44140625, "learning_rate": 6.6042458973838696e-06, "loss": 0.9297, "step": 2470 }, { "epoch": 1.9479700433582972, "grad_norm": 0.345703125, "learning_rate": 6.595614797225497e-06, "loss": 0.9274, "step": 2471 }, { "epoch": 1.9487583760346867, "grad_norm": 0.34765625, "learning_rate": 6.5869865647069995e-06, "loss": 0.9591, "step": 2472 }, { "epoch": 1.949546708711076, "grad_norm": 0.361328125, "learning_rate": 6.578361207096261e-06, "loss": 0.9476, "step": 2473 }, { "epoch": 1.9503350413874654, "grad_norm": 0.349609375, "learning_rate": 6.569738731658735e-06, "loss": 0.9126, "step": 2474 }, { "epoch": 1.951123374063855, "grad_norm": 0.345703125, "learning_rate": 6.561119145657451e-06, "loss": 0.9236, "step": 2475 }, { "epoch": 1.9519117067402445, "grad_norm": 0.34375, "learning_rate": 6.552502456353011e-06, "loss": 0.9429, "step": 2476 }, { "epoch": 1.9527000394166338, "grad_norm": 0.349609375, "learning_rate": 6.543888671003573e-06, "loss": 0.9448, "step": 2477 }, { "epoch": 1.9534883720930232, "grad_norm": 0.33984375, "learning_rate": 6.535277796864842e-06, "loss": 0.9487, "step": 2478 }, { "epoch": 1.9542767047694127, "grad_norm": 0.353515625, "learning_rate": 6.526669841190078e-06, "loss": 0.9384, "step": 2479 }, { "epoch": 1.9550650374458023, "grad_norm": 0.341796875, "learning_rate": 6.518064811230083e-06, "loss": 0.9161, "step": 2480 }, { "epoch": 1.9558533701221914, "grad_norm": 0.341796875, "learning_rate": 6.509462714233194e-06, "loss": 0.9148, "step": 2481 }, { "epoch": 1.956641702798581, "grad_norm": 0.33984375, "learning_rate": 6.500863557445274e-06, "loss": 0.9214, "step": 2482 }, { "epoch": 1.9574300354749705, "grad_norm": 0.345703125, "learning_rate": 6.492267348109711e-06, "loss": 0.9233, "step": 2483 }, { "epoch": 1.9582183681513599, "grad_norm": 0.341796875, "learning_rate": 6.483674093467409e-06, "loss": 0.939, "step": 2484 }, { "epoch": 1.9590067008277492, "grad_norm": 0.341796875, "learning_rate": 6.4750838007567915e-06, "loss": 0.9253, "step": 2485 }, { "epoch": 1.9597950335041388, "grad_norm": 0.34375, "learning_rate": 6.466496477213777e-06, "loss": 0.9134, "step": 2486 }, { "epoch": 1.9605833661805283, "grad_norm": 0.34765625, "learning_rate": 6.457912130071786e-06, "loss": 0.9626, "step": 2487 }, { "epoch": 1.9613716988569176, "grad_norm": 0.33984375, "learning_rate": 6.449330766561735e-06, "loss": 0.9374, "step": 2488 }, { "epoch": 1.962160031533307, "grad_norm": 0.345703125, "learning_rate": 6.4407523939120154e-06, "loss": 0.9555, "step": 2489 }, { "epoch": 1.9629483642096965, "grad_norm": 0.341796875, "learning_rate": 6.432177019348521e-06, "loss": 0.9263, "step": 2490 }, { "epoch": 1.9637366968860859, "grad_norm": 0.341796875, "learning_rate": 6.423604650094601e-06, "loss": 0.9503, "step": 2491 }, { "epoch": 1.9645250295624752, "grad_norm": 0.341796875, "learning_rate": 6.415035293371081e-06, "loss": 0.9433, "step": 2492 }, { "epoch": 1.9653133622388648, "grad_norm": 0.341796875, "learning_rate": 6.4064689563962505e-06, "loss": 0.9271, "step": 2493 }, { "epoch": 1.9661016949152543, "grad_norm": 0.34765625, "learning_rate": 6.397905646385844e-06, "loss": 0.9735, "step": 2494 }, { "epoch": 1.9668900275916437, "grad_norm": 0.341796875, "learning_rate": 6.389345370553065e-06, "loss": 0.9234, "step": 2495 }, { "epoch": 1.967678360268033, "grad_norm": 0.359375, "learning_rate": 6.3807881361085465e-06, "loss": 0.9447, "step": 2496 }, { "epoch": 1.9684666929444226, "grad_norm": 0.345703125, "learning_rate": 6.372233950260368e-06, "loss": 0.9401, "step": 2497 }, { "epoch": 1.9692550256208121, "grad_norm": 0.34375, "learning_rate": 6.363682820214032e-06, "loss": 0.9199, "step": 2498 }, { "epoch": 1.9700433582972015, "grad_norm": 0.345703125, "learning_rate": 6.355134753172474e-06, "loss": 0.9411, "step": 2499 }, { "epoch": 1.9708316909735908, "grad_norm": 0.34375, "learning_rate": 6.34658975633605e-06, "loss": 0.964, "step": 2500 }, { "epoch": 1.9716200236499803, "grad_norm": 0.3359375, "learning_rate": 6.338047836902528e-06, "loss": 0.9186, "step": 2501 }, { "epoch": 1.9724083563263697, "grad_norm": 0.35546875, "learning_rate": 6.32950900206708e-06, "loss": 0.9769, "step": 2502 }, { "epoch": 1.973196689002759, "grad_norm": 0.3515625, "learning_rate": 6.320973259022286e-06, "loss": 0.9374, "step": 2503 }, { "epoch": 1.9739850216791486, "grad_norm": 0.3515625, "learning_rate": 6.3124406149581154e-06, "loss": 1.0066, "step": 2504 }, { "epoch": 1.9747733543555381, "grad_norm": 0.33984375, "learning_rate": 6.303911077061937e-06, "loss": 0.9305, "step": 2505 }, { "epoch": 1.9755616870319275, "grad_norm": 0.345703125, "learning_rate": 6.295384652518491e-06, "loss": 0.942, "step": 2506 }, { "epoch": 1.9763500197083168, "grad_norm": 0.3359375, "learning_rate": 6.286861348509903e-06, "loss": 0.8971, "step": 2507 }, { "epoch": 1.9771383523847064, "grad_norm": 0.34375, "learning_rate": 6.278341172215669e-06, "loss": 0.968, "step": 2508 }, { "epoch": 1.977926685061096, "grad_norm": 0.345703125, "learning_rate": 6.269824130812645e-06, "loss": 0.9436, "step": 2509 }, { "epoch": 1.9787150177374853, "grad_norm": 0.34375, "learning_rate": 6.261310231475055e-06, "loss": 0.9599, "step": 2510 }, { "epoch": 1.9795033504138746, "grad_norm": 0.349609375, "learning_rate": 6.252799481374472e-06, "loss": 0.9681, "step": 2511 }, { "epoch": 1.9802916830902642, "grad_norm": 0.349609375, "learning_rate": 6.244291887679819e-06, "loss": 0.9476, "step": 2512 }, { "epoch": 1.9810800157666535, "grad_norm": 0.341796875, "learning_rate": 6.23578745755735e-06, "loss": 0.9405, "step": 2513 }, { "epoch": 1.9818683484430428, "grad_norm": 0.353515625, "learning_rate": 6.227286198170663e-06, "loss": 0.9435, "step": 2514 }, { "epoch": 1.9826566811194324, "grad_norm": 0.337890625, "learning_rate": 6.218788116680689e-06, "loss": 0.9323, "step": 2515 }, { "epoch": 1.983445013795822, "grad_norm": 0.365234375, "learning_rate": 6.210293220245678e-06, "loss": 0.9339, "step": 2516 }, { "epoch": 1.9842333464722113, "grad_norm": 0.33984375, "learning_rate": 6.20180151602119e-06, "loss": 0.948, "step": 2517 }, { "epoch": 1.9850216791486006, "grad_norm": 0.345703125, "learning_rate": 6.193313011160104e-06, "loss": 0.9594, "step": 2518 }, { "epoch": 1.9858100118249902, "grad_norm": 0.388671875, "learning_rate": 6.184827712812605e-06, "loss": 0.9686, "step": 2519 }, { "epoch": 1.9865983445013797, "grad_norm": 0.345703125, "learning_rate": 6.176345628126176e-06, "loss": 0.9397, "step": 2520 }, { "epoch": 1.987386677177769, "grad_norm": 0.337890625, "learning_rate": 6.167866764245586e-06, "loss": 0.9361, "step": 2521 }, { "epoch": 1.9881750098541584, "grad_norm": 0.34375, "learning_rate": 6.159391128312899e-06, "loss": 0.967, "step": 2522 }, { "epoch": 1.988963342530548, "grad_norm": 0.48046875, "learning_rate": 6.150918727467455e-06, "loss": 0.9423, "step": 2523 }, { "epoch": 1.9897516752069373, "grad_norm": 0.34375, "learning_rate": 6.1424495688458785e-06, "loss": 0.9708, "step": 2524 }, { "epoch": 1.9905400078833266, "grad_norm": 0.33984375, "learning_rate": 6.133983659582048e-06, "loss": 0.9077, "step": 2525 }, { "epoch": 1.9913283405597162, "grad_norm": 0.341796875, "learning_rate": 6.125521006807116e-06, "loss": 0.9352, "step": 2526 }, { "epoch": 1.9921166732361058, "grad_norm": 0.345703125, "learning_rate": 6.1170616176494916e-06, "loss": 0.9209, "step": 2527 }, { "epoch": 1.992905005912495, "grad_norm": 0.373046875, "learning_rate": 6.108605499234821e-06, "loss": 0.9442, "step": 2528 }, { "epoch": 1.9936933385888844, "grad_norm": 0.34375, "learning_rate": 6.10015265868602e-06, "loss": 0.9621, "step": 2529 }, { "epoch": 1.994481671265274, "grad_norm": 0.341796875, "learning_rate": 6.091703103123223e-06, "loss": 0.9213, "step": 2530 }, { "epoch": 1.9952700039416635, "grad_norm": 0.341796875, "learning_rate": 6.083256839663807e-06, "loss": 0.9612, "step": 2531 }, { "epoch": 1.9960583366180527, "grad_norm": 0.333984375, "learning_rate": 6.0748138754223665e-06, "loss": 0.9342, "step": 2532 }, { "epoch": 1.9968466692944422, "grad_norm": 0.353515625, "learning_rate": 6.066374217510725e-06, "loss": 0.9446, "step": 2533 }, { "epoch": 1.9976350019708318, "grad_norm": 0.33984375, "learning_rate": 6.057937873037925e-06, "loss": 0.9314, "step": 2534 }, { "epoch": 1.998423334647221, "grad_norm": 0.345703125, "learning_rate": 6.04950484911021e-06, "loss": 0.9263, "step": 2535 }, { "epoch": 1.9992116673236104, "grad_norm": 0.34765625, "learning_rate": 6.041075152831025e-06, "loss": 0.9513, "step": 2536 }, { "epoch": 2.0, "grad_norm": 0.349609375, "learning_rate": 6.032648791301019e-06, "loss": 0.9438, "step": 2537 }, { "epoch": 2.0, "eval_loss": 0.9453941583633423, "eval_runtime": 615.3851, "eval_samples_per_second": 26.705, "eval_steps_per_second": 1.67, "step": 2537 }, { "epoch": 2.0007883326763896, "grad_norm": 0.337890625, "learning_rate": 6.024225771618024e-06, "loss": 0.9302, "step": 2538 }, { "epoch": 2.0015766653527787, "grad_norm": 0.34375, "learning_rate": 6.015806100877069e-06, "loss": 0.9629, "step": 2539 }, { "epoch": 2.0023649980291682, "grad_norm": 0.34765625, "learning_rate": 6.007389786170355e-06, "loss": 0.9266, "step": 2540 }, { "epoch": 2.003153330705558, "grad_norm": 0.341796875, "learning_rate": 5.998976834587246e-06, "loss": 0.9086, "step": 2541 }, { "epoch": 2.0039416633819473, "grad_norm": 0.345703125, "learning_rate": 5.9905672532142955e-06, "loss": 0.9351, "step": 2542 }, { "epoch": 2.0047299960583365, "grad_norm": 0.365234375, "learning_rate": 5.982161049135191e-06, "loss": 0.951, "step": 2543 }, { "epoch": 2.005518328734726, "grad_norm": 0.353515625, "learning_rate": 5.973758229430806e-06, "loss": 0.9732, "step": 2544 }, { "epoch": 2.0063066614111156, "grad_norm": 0.3515625, "learning_rate": 5.965358801179138e-06, "loss": 0.9294, "step": 2545 }, { "epoch": 2.007094994087505, "grad_norm": 0.34765625, "learning_rate": 5.956962771455338e-06, "loss": 0.9654, "step": 2546 }, { "epoch": 2.0078833267638942, "grad_norm": 0.34375, "learning_rate": 5.9485701473316925e-06, "loss": 0.9189, "step": 2547 }, { "epoch": 2.008671659440284, "grad_norm": 0.345703125, "learning_rate": 5.94018093587762e-06, "loss": 0.9805, "step": 2548 }, { "epoch": 2.0094599921166734, "grad_norm": 0.35546875, "learning_rate": 5.9317951441596656e-06, "loss": 0.9719, "step": 2549 }, { "epoch": 2.0102483247930625, "grad_norm": 0.3984375, "learning_rate": 5.923412779241493e-06, "loss": 0.9101, "step": 2550 }, { "epoch": 2.011036657469452, "grad_norm": 0.3515625, "learning_rate": 5.91503384818388e-06, "loss": 0.9454, "step": 2551 }, { "epoch": 2.0118249901458416, "grad_norm": 0.341796875, "learning_rate": 5.906658358044704e-06, "loss": 0.9226, "step": 2552 }, { "epoch": 2.012613322822231, "grad_norm": 0.337890625, "learning_rate": 5.8982863158789605e-06, "loss": 0.938, "step": 2553 }, { "epoch": 2.0134016554986203, "grad_norm": 0.341796875, "learning_rate": 5.889917728738725e-06, "loss": 0.9524, "step": 2554 }, { "epoch": 2.01418998817501, "grad_norm": 0.34765625, "learning_rate": 5.881552603673171e-06, "loss": 0.9144, "step": 2555 }, { "epoch": 2.0149783208513994, "grad_norm": 0.34765625, "learning_rate": 5.873190947728552e-06, "loss": 0.9408, "step": 2556 }, { "epoch": 2.015766653527789, "grad_norm": 0.349609375, "learning_rate": 5.8648327679481984e-06, "loss": 0.9108, "step": 2557 }, { "epoch": 2.016554986204178, "grad_norm": 0.375, "learning_rate": 5.856478071372521e-06, "loss": 0.9237, "step": 2558 }, { "epoch": 2.0173433188805676, "grad_norm": 0.349609375, "learning_rate": 5.84812686503899e-06, "loss": 0.9842, "step": 2559 }, { "epoch": 2.018131651556957, "grad_norm": 0.33984375, "learning_rate": 5.839779155982131e-06, "loss": 0.917, "step": 2560 }, { "epoch": 2.0189199842333463, "grad_norm": 0.34375, "learning_rate": 5.83143495123353e-06, "loss": 0.9454, "step": 2561 }, { "epoch": 2.019708316909736, "grad_norm": 0.33203125, "learning_rate": 5.823094257821822e-06, "loss": 0.9221, "step": 2562 }, { "epoch": 2.0204966495861254, "grad_norm": 0.3359375, "learning_rate": 5.814757082772683e-06, "loss": 0.937, "step": 2563 }, { "epoch": 2.021284982262515, "grad_norm": 0.365234375, "learning_rate": 5.806423433108822e-06, "loss": 0.9664, "step": 2564 }, { "epoch": 2.022073314938904, "grad_norm": 0.34375, "learning_rate": 5.798093315849984e-06, "loss": 0.9673, "step": 2565 }, { "epoch": 2.0228616476152936, "grad_norm": 0.33984375, "learning_rate": 5.789766738012932e-06, "loss": 0.9222, "step": 2566 }, { "epoch": 2.023649980291683, "grad_norm": 0.34375, "learning_rate": 5.781443706611455e-06, "loss": 0.8949, "step": 2567 }, { "epoch": 2.0244383129680723, "grad_norm": 0.33984375, "learning_rate": 5.773124228656348e-06, "loss": 0.9103, "step": 2568 }, { "epoch": 2.025226645644462, "grad_norm": 0.341796875, "learning_rate": 5.764808311155419e-06, "loss": 0.9454, "step": 2569 }, { "epoch": 2.0260149783208514, "grad_norm": 0.3984375, "learning_rate": 5.7564959611134685e-06, "loss": 0.9402, "step": 2570 }, { "epoch": 2.026803310997241, "grad_norm": 0.369140625, "learning_rate": 5.748187185532306e-06, "loss": 0.9283, "step": 2571 }, { "epoch": 2.02759164367363, "grad_norm": 0.341796875, "learning_rate": 5.739881991410707e-06, "loss": 0.9283, "step": 2572 }, { "epoch": 2.0283799763500197, "grad_norm": 0.34375, "learning_rate": 5.731580385744457e-06, "loss": 0.9371, "step": 2573 }, { "epoch": 2.029168309026409, "grad_norm": 0.341796875, "learning_rate": 5.723282375526302e-06, "loss": 0.9478, "step": 2574 }, { "epoch": 2.0299566417027988, "grad_norm": 0.34765625, "learning_rate": 5.714987967745969e-06, "loss": 0.9304, "step": 2575 }, { "epoch": 2.030744974379188, "grad_norm": 0.34375, "learning_rate": 5.706697169390134e-06, "loss": 0.9222, "step": 2576 }, { "epoch": 2.0315333070555774, "grad_norm": 0.34375, "learning_rate": 5.698409987442448e-06, "loss": 0.9318, "step": 2577 }, { "epoch": 2.032321639731967, "grad_norm": 0.353515625, "learning_rate": 5.690126428883516e-06, "loss": 0.9536, "step": 2578 }, { "epoch": 2.033109972408356, "grad_norm": 0.33984375, "learning_rate": 5.681846500690884e-06, "loss": 0.9309, "step": 2579 }, { "epoch": 2.0338983050847457, "grad_norm": 0.341796875, "learning_rate": 5.6735702098390454e-06, "loss": 0.8859, "step": 2580 }, { "epoch": 2.0346866377611352, "grad_norm": 0.34375, "learning_rate": 5.6652975632994214e-06, "loss": 0.9171, "step": 2581 }, { "epoch": 2.035474970437525, "grad_norm": 0.353515625, "learning_rate": 5.657028568040366e-06, "loss": 0.9127, "step": 2582 }, { "epoch": 2.036263303113914, "grad_norm": 0.33984375, "learning_rate": 5.648763231027171e-06, "loss": 0.9597, "step": 2583 }, { "epoch": 2.0370516357903035, "grad_norm": 0.33984375, "learning_rate": 5.640501559222034e-06, "loss": 0.9247, "step": 2584 }, { "epoch": 2.037839968466693, "grad_norm": 0.337890625, "learning_rate": 5.632243559584061e-06, "loss": 0.9174, "step": 2585 }, { "epoch": 2.0386283011430826, "grad_norm": 0.337890625, "learning_rate": 5.623989239069275e-06, "loss": 0.9211, "step": 2586 }, { "epoch": 2.0394166338194717, "grad_norm": 0.34375, "learning_rate": 5.615738604630592e-06, "loss": 0.9519, "step": 2587 }, { "epoch": 2.0402049664958612, "grad_norm": 0.34765625, "learning_rate": 5.607491663217839e-06, "loss": 0.96, "step": 2588 }, { "epoch": 2.040993299172251, "grad_norm": 0.33984375, "learning_rate": 5.5992484217777074e-06, "loss": 0.9332, "step": 2589 }, { "epoch": 2.04178163184864, "grad_norm": 0.341796875, "learning_rate": 5.591008887253792e-06, "loss": 0.9383, "step": 2590 }, { "epoch": 2.0425699645250295, "grad_norm": 0.34375, "learning_rate": 5.582773066586553e-06, "loss": 0.9405, "step": 2591 }, { "epoch": 2.043358297201419, "grad_norm": 0.35546875, "learning_rate": 5.574540966713338e-06, "loss": 0.9287, "step": 2592 }, { "epoch": 2.0441466298778086, "grad_norm": 0.349609375, "learning_rate": 5.56631259456834e-06, "loss": 0.9685, "step": 2593 }, { "epoch": 2.0449349625541977, "grad_norm": 0.34375, "learning_rate": 5.558087957082624e-06, "loss": 0.9134, "step": 2594 }, { "epoch": 2.0457232952305873, "grad_norm": 0.341796875, "learning_rate": 5.549867061184108e-06, "loss": 0.9383, "step": 2595 }, { "epoch": 2.046511627906977, "grad_norm": 0.34765625, "learning_rate": 5.541649913797559e-06, "loss": 0.9313, "step": 2596 }, { "epoch": 2.0472999605833664, "grad_norm": 0.345703125, "learning_rate": 5.533436521844582e-06, "loss": 0.9179, "step": 2597 }, { "epoch": 2.0480882932597555, "grad_norm": 0.35546875, "learning_rate": 5.525226892243623e-06, "loss": 0.9612, "step": 2598 }, { "epoch": 2.048876625936145, "grad_norm": 0.35546875, "learning_rate": 5.5170210319099595e-06, "loss": 0.9258, "step": 2599 }, { "epoch": 2.0496649586125346, "grad_norm": 0.341796875, "learning_rate": 5.508818947755687e-06, "loss": 0.9477, "step": 2600 }, { "epoch": 2.0504532912889237, "grad_norm": 0.34375, "learning_rate": 5.50062064668973e-06, "loss": 0.9545, "step": 2601 }, { "epoch": 2.0512416239653133, "grad_norm": 0.333984375, "learning_rate": 5.492426135617816e-06, "loss": 0.8776, "step": 2602 }, { "epoch": 2.052029956641703, "grad_norm": 0.349609375, "learning_rate": 5.484235421442492e-06, "loss": 0.9341, "step": 2603 }, { "epoch": 2.0528182893180924, "grad_norm": 0.328125, "learning_rate": 5.4760485110630956e-06, "loss": 0.926, "step": 2604 }, { "epoch": 2.0536066219944815, "grad_norm": 0.34375, "learning_rate": 5.467865411375766e-06, "loss": 0.9489, "step": 2605 }, { "epoch": 2.054394954670871, "grad_norm": 0.337890625, "learning_rate": 5.459686129273433e-06, "loss": 0.9293, "step": 2606 }, { "epoch": 2.0551832873472606, "grad_norm": 0.3359375, "learning_rate": 5.451510671645806e-06, "loss": 0.8994, "step": 2607 }, { "epoch": 2.05597162002365, "grad_norm": 0.349609375, "learning_rate": 5.44333904537938e-06, "loss": 0.9497, "step": 2608 }, { "epoch": 2.0567599527000393, "grad_norm": 0.34375, "learning_rate": 5.435171257357417e-06, "loss": 0.9482, "step": 2609 }, { "epoch": 2.057548285376429, "grad_norm": 0.333984375, "learning_rate": 5.427007314459949e-06, "loss": 0.9154, "step": 2610 }, { "epoch": 2.0583366180528184, "grad_norm": 0.34375, "learning_rate": 5.418847223563761e-06, "loss": 0.9456, "step": 2611 }, { "epoch": 2.0591249507292075, "grad_norm": 0.33984375, "learning_rate": 5.4106909915424075e-06, "loss": 0.9444, "step": 2612 }, { "epoch": 2.059913283405597, "grad_norm": 0.33984375, "learning_rate": 5.402538625266184e-06, "loss": 0.9472, "step": 2613 }, { "epoch": 2.0607016160819867, "grad_norm": 0.349609375, "learning_rate": 5.394390131602133e-06, "loss": 0.9285, "step": 2614 }, { "epoch": 2.061489948758376, "grad_norm": 0.33984375, "learning_rate": 5.386245517414026e-06, "loss": 0.9209, "step": 2615 }, { "epoch": 2.0622782814347653, "grad_norm": 0.34375, "learning_rate": 5.378104789562373e-06, "loss": 0.9215, "step": 2616 }, { "epoch": 2.063066614111155, "grad_norm": 0.34375, "learning_rate": 5.36996795490442e-06, "loss": 0.9195, "step": 2617 }, { "epoch": 2.0638549467875444, "grad_norm": 0.33984375, "learning_rate": 5.3618350202941225e-06, "loss": 0.9502, "step": 2618 }, { "epoch": 2.064643279463934, "grad_norm": 0.345703125, "learning_rate": 5.353705992582147e-06, "loss": 0.9609, "step": 2619 }, { "epoch": 2.065431612140323, "grad_norm": 0.330078125, "learning_rate": 5.345580878615877e-06, "loss": 0.8972, "step": 2620 }, { "epoch": 2.0662199448167127, "grad_norm": 0.349609375, "learning_rate": 5.337459685239395e-06, "loss": 0.9319, "step": 2621 }, { "epoch": 2.0670082774931022, "grad_norm": 0.345703125, "learning_rate": 5.329342419293488e-06, "loss": 0.9144, "step": 2622 }, { "epoch": 2.0677966101694913, "grad_norm": 0.3359375, "learning_rate": 5.321229087615635e-06, "loss": 0.933, "step": 2623 }, { "epoch": 2.068584942845881, "grad_norm": 0.35546875, "learning_rate": 5.313119697039985e-06, "loss": 0.9665, "step": 2624 }, { "epoch": 2.0693732755222705, "grad_norm": 0.345703125, "learning_rate": 5.305014254397378e-06, "loss": 0.9566, "step": 2625 }, { "epoch": 2.07016160819866, "grad_norm": 0.361328125, "learning_rate": 5.296912766515338e-06, "loss": 0.9378, "step": 2626 }, { "epoch": 2.070949940875049, "grad_norm": 0.3671875, "learning_rate": 5.288815240218048e-06, "loss": 0.8965, "step": 2627 }, { "epoch": 2.0717382735514387, "grad_norm": 0.333984375, "learning_rate": 5.280721682326349e-06, "loss": 0.9057, "step": 2628 }, { "epoch": 2.0725266062278282, "grad_norm": 0.341796875, "learning_rate": 5.272632099657744e-06, "loss": 0.9623, "step": 2629 }, { "epoch": 2.0733149389042174, "grad_norm": 0.33984375, "learning_rate": 5.264546499026388e-06, "loss": 0.9082, "step": 2630 }, { "epoch": 2.074103271580607, "grad_norm": 0.34765625, "learning_rate": 5.256464887243095e-06, "loss": 0.981, "step": 2631 }, { "epoch": 2.0748916042569965, "grad_norm": 0.341796875, "learning_rate": 5.248387271115292e-06, "loss": 0.9069, "step": 2632 }, { "epoch": 2.075679936933386, "grad_norm": 0.34765625, "learning_rate": 5.240313657447058e-06, "loss": 0.9188, "step": 2633 }, { "epoch": 2.076468269609775, "grad_norm": 0.349609375, "learning_rate": 5.232244053039099e-06, "loss": 0.9857, "step": 2634 }, { "epoch": 2.0772566022861647, "grad_norm": 0.349609375, "learning_rate": 5.224178464688742e-06, "loss": 0.908, "step": 2635 }, { "epoch": 2.0780449349625543, "grad_norm": 0.34375, "learning_rate": 5.216116899189929e-06, "loss": 0.9507, "step": 2636 }, { "epoch": 2.078833267638944, "grad_norm": 0.357421875, "learning_rate": 5.208059363333218e-06, "loss": 0.9241, "step": 2637 }, { "epoch": 2.079621600315333, "grad_norm": 0.341796875, "learning_rate": 5.200005863905768e-06, "loss": 0.917, "step": 2638 }, { "epoch": 2.0804099329917225, "grad_norm": 0.341796875, "learning_rate": 5.191956407691343e-06, "loss": 0.9371, "step": 2639 }, { "epoch": 2.081198265668112, "grad_norm": 0.34765625, "learning_rate": 5.183911001470296e-06, "loss": 0.9163, "step": 2640 }, { "epoch": 2.081986598344501, "grad_norm": 0.33984375, "learning_rate": 5.175869652019571e-06, "loss": 0.9195, "step": 2641 }, { "epoch": 2.0827749310208907, "grad_norm": 0.34375, "learning_rate": 5.167832366112695e-06, "loss": 0.9182, "step": 2642 }, { "epoch": 2.0835632636972803, "grad_norm": 0.33984375, "learning_rate": 5.159799150519773e-06, "loss": 0.9671, "step": 2643 }, { "epoch": 2.08435159637367, "grad_norm": 0.34375, "learning_rate": 5.15177001200748e-06, "loss": 0.9187, "step": 2644 }, { "epoch": 2.085139929050059, "grad_norm": 0.33984375, "learning_rate": 5.143744957339056e-06, "loss": 0.9702, "step": 2645 }, { "epoch": 2.0859282617264485, "grad_norm": 0.345703125, "learning_rate": 5.135723993274304e-06, "loss": 0.9254, "step": 2646 }, { "epoch": 2.086716594402838, "grad_norm": 0.34765625, "learning_rate": 5.127707126569577e-06, "loss": 0.9408, "step": 2647 }, { "epoch": 2.0875049270792276, "grad_norm": 0.34375, "learning_rate": 5.11969436397778e-06, "loss": 0.9026, "step": 2648 }, { "epoch": 2.0882932597556167, "grad_norm": 0.33984375, "learning_rate": 5.111685712248364e-06, "loss": 0.9399, "step": 2649 }, { "epoch": 2.0890815924320063, "grad_norm": 0.345703125, "learning_rate": 5.103681178127303e-06, "loss": 0.9338, "step": 2650 }, { "epoch": 2.089869925108396, "grad_norm": 0.34375, "learning_rate": 5.095680768357123e-06, "loss": 0.9272, "step": 2651 }, { "epoch": 2.090658257784785, "grad_norm": 0.34765625, "learning_rate": 5.087684489676862e-06, "loss": 0.9291, "step": 2652 }, { "epoch": 2.0914465904611745, "grad_norm": 0.341796875, "learning_rate": 5.079692348822085e-06, "loss": 0.9433, "step": 2653 }, { "epoch": 2.092234923137564, "grad_norm": 0.345703125, "learning_rate": 5.071704352524863e-06, "loss": 0.9703, "step": 2654 }, { "epoch": 2.0930232558139537, "grad_norm": 0.345703125, "learning_rate": 5.063720507513781e-06, "loss": 0.9522, "step": 2655 }, { "epoch": 2.0938115884903428, "grad_norm": 0.349609375, "learning_rate": 5.055740820513932e-06, "loss": 0.9479, "step": 2656 }, { "epoch": 2.0945999211667323, "grad_norm": 0.345703125, "learning_rate": 5.047765298246907e-06, "loss": 0.94, "step": 2657 }, { "epoch": 2.095388253843122, "grad_norm": 0.359375, "learning_rate": 5.039793947430774e-06, "loss": 0.9486, "step": 2658 }, { "epoch": 2.0961765865195114, "grad_norm": 0.35546875, "learning_rate": 5.031826774780098e-06, "loss": 0.9656, "step": 2659 }, { "epoch": 2.0969649191959006, "grad_norm": 0.349609375, "learning_rate": 5.0238637870059296e-06, "loss": 0.9296, "step": 2660 }, { "epoch": 2.09775325187229, "grad_norm": 0.349609375, "learning_rate": 5.015904990815792e-06, "loss": 0.919, "step": 2661 }, { "epoch": 2.0985415845486797, "grad_norm": 0.33984375, "learning_rate": 5.007950392913663e-06, "loss": 0.9319, "step": 2662 }, { "epoch": 2.099329917225069, "grad_norm": 0.33984375, "learning_rate": 5.000000000000003e-06, "loss": 0.9438, "step": 2663 }, { "epoch": 2.1001182499014583, "grad_norm": 0.341796875, "learning_rate": 4.992053818771715e-06, "loss": 0.9804, "step": 2664 }, { "epoch": 2.100906582577848, "grad_norm": 0.33984375, "learning_rate": 4.984111855922177e-06, "loss": 0.9383, "step": 2665 }, { "epoch": 2.1016949152542375, "grad_norm": 0.357421875, "learning_rate": 4.9761741181411845e-06, "loss": 0.9403, "step": 2666 }, { "epoch": 2.1024832479306266, "grad_norm": 0.34765625, "learning_rate": 4.968240612114995e-06, "loss": 0.918, "step": 2667 }, { "epoch": 2.103271580607016, "grad_norm": 0.341796875, "learning_rate": 4.9603113445262915e-06, "loss": 0.9556, "step": 2668 }, { "epoch": 2.1040599132834057, "grad_norm": 0.34375, "learning_rate": 4.952386322054189e-06, "loss": 0.9335, "step": 2669 }, { "epoch": 2.1048482459597952, "grad_norm": 0.337890625, "learning_rate": 4.944465551374238e-06, "loss": 0.9435, "step": 2670 }, { "epoch": 2.1056365786361844, "grad_norm": 0.357421875, "learning_rate": 4.936549039158386e-06, "loss": 0.9272, "step": 2671 }, { "epoch": 2.106424911312574, "grad_norm": 0.337890625, "learning_rate": 4.9286367920750075e-06, "loss": 0.9278, "step": 2672 }, { "epoch": 2.1072132439889635, "grad_norm": 0.34765625, "learning_rate": 4.920728816788885e-06, "loss": 0.9252, "step": 2673 }, { "epoch": 2.1080015766653526, "grad_norm": 0.341796875, "learning_rate": 4.912825119961194e-06, "loss": 0.906, "step": 2674 }, { "epoch": 2.108789909341742, "grad_norm": 0.341796875, "learning_rate": 4.904925708249516e-06, "loss": 0.9311, "step": 2675 }, { "epoch": 2.1095782420181317, "grad_norm": 0.44140625, "learning_rate": 4.897030588307816e-06, "loss": 0.8884, "step": 2676 }, { "epoch": 2.1103665746945213, "grad_norm": 0.34375, "learning_rate": 4.889139766786447e-06, "loss": 0.8833, "step": 2677 }, { "epoch": 2.1111549073709104, "grad_norm": 0.34375, "learning_rate": 4.881253250332141e-06, "loss": 0.8966, "step": 2678 }, { "epoch": 2.1119432400473, "grad_norm": 0.337890625, "learning_rate": 4.873371045588002e-06, "loss": 0.9267, "step": 2679 }, { "epoch": 2.1127315727236895, "grad_norm": 0.33984375, "learning_rate": 4.865493159193504e-06, "loss": 0.9394, "step": 2680 }, { "epoch": 2.1135199054000786, "grad_norm": 0.349609375, "learning_rate": 4.8576195977844835e-06, "loss": 0.9072, "step": 2681 }, { "epoch": 2.114308238076468, "grad_norm": 0.34375, "learning_rate": 4.84975036799313e-06, "loss": 0.9348, "step": 2682 }, { "epoch": 2.1150965707528577, "grad_norm": 0.3359375, "learning_rate": 4.841885476447996e-06, "loss": 0.9281, "step": 2683 }, { "epoch": 2.1158849034292473, "grad_norm": 0.349609375, "learning_rate": 4.834024929773956e-06, "loss": 0.9626, "step": 2684 }, { "epoch": 2.1166732361056364, "grad_norm": 0.34765625, "learning_rate": 4.826168734592254e-06, "loss": 0.9596, "step": 2685 }, { "epoch": 2.117461568782026, "grad_norm": 0.3359375, "learning_rate": 4.81831689752045e-06, "loss": 0.891, "step": 2686 }, { "epoch": 2.1182499014584155, "grad_norm": 0.37109375, "learning_rate": 4.810469425172439e-06, "loss": 0.9429, "step": 2687 }, { "epoch": 2.119038234134805, "grad_norm": 0.349609375, "learning_rate": 4.802626324158432e-06, "loss": 0.9566, "step": 2688 }, { "epoch": 2.119826566811194, "grad_norm": 0.37109375, "learning_rate": 4.7947876010849655e-06, "loss": 0.9478, "step": 2689 }, { "epoch": 2.1206148994875837, "grad_norm": 0.357421875, "learning_rate": 4.786953262554892e-06, "loss": 0.9669, "step": 2690 }, { "epoch": 2.1214032321639733, "grad_norm": 0.345703125, "learning_rate": 4.779123315167362e-06, "loss": 0.9348, "step": 2691 }, { "epoch": 2.1221915648403624, "grad_norm": 0.33984375, "learning_rate": 4.771297765517834e-06, "loss": 0.9455, "step": 2692 }, { "epoch": 2.122979897516752, "grad_norm": 0.337890625, "learning_rate": 4.763476620198048e-06, "loss": 0.9672, "step": 2693 }, { "epoch": 2.1237682301931415, "grad_norm": 0.34765625, "learning_rate": 4.755659885796054e-06, "loss": 0.9196, "step": 2694 }, { "epoch": 2.124556562869531, "grad_norm": 0.349609375, "learning_rate": 4.747847568896178e-06, "loss": 0.9319, "step": 2695 }, { "epoch": 2.12534489554592, "grad_norm": 0.357421875, "learning_rate": 4.740039676079022e-06, "loss": 0.9048, "step": 2696 }, { "epoch": 2.1261332282223098, "grad_norm": 0.345703125, "learning_rate": 4.73223621392146e-06, "loss": 0.9465, "step": 2697 }, { "epoch": 2.1269215608986993, "grad_norm": 0.337890625, "learning_rate": 4.7244371889966374e-06, "loss": 0.9467, "step": 2698 }, { "epoch": 2.127709893575089, "grad_norm": 0.341796875, "learning_rate": 4.716642607873968e-06, "loss": 0.9403, "step": 2699 }, { "epoch": 2.128498226251478, "grad_norm": 0.34765625, "learning_rate": 4.708852477119117e-06, "loss": 0.946, "step": 2700 }, { "epoch": 2.1292865589278676, "grad_norm": 0.34765625, "learning_rate": 4.701066803293993e-06, "loss": 0.9117, "step": 2701 }, { "epoch": 2.130074891604257, "grad_norm": 0.349609375, "learning_rate": 4.693285592956761e-06, "loss": 0.9406, "step": 2702 }, { "epoch": 2.1308632242806462, "grad_norm": 0.337890625, "learning_rate": 4.6855088526618205e-06, "loss": 0.9053, "step": 2703 }, { "epoch": 2.131651556957036, "grad_norm": 0.337890625, "learning_rate": 4.677736588959818e-06, "loss": 0.9162, "step": 2704 }, { "epoch": 2.1324398896334253, "grad_norm": 0.349609375, "learning_rate": 4.669968808397609e-06, "loss": 0.9442, "step": 2705 }, { "epoch": 2.133228222309815, "grad_norm": 0.337890625, "learning_rate": 4.662205517518286e-06, "loss": 0.9209, "step": 2706 }, { "epoch": 2.134016554986204, "grad_norm": 0.337890625, "learning_rate": 4.654446722861159e-06, "loss": 0.9246, "step": 2707 }, { "epoch": 2.1348048876625936, "grad_norm": 0.34765625, "learning_rate": 4.646692430961745e-06, "loss": 0.9177, "step": 2708 }, { "epoch": 2.135593220338983, "grad_norm": 0.361328125, "learning_rate": 4.638942648351774e-06, "loss": 0.9183, "step": 2709 }, { "epoch": 2.1363815530153727, "grad_norm": 0.35546875, "learning_rate": 4.631197381559173e-06, "loss": 0.9535, "step": 2710 }, { "epoch": 2.137169885691762, "grad_norm": 0.345703125, "learning_rate": 4.62345663710807e-06, "loss": 0.9121, "step": 2711 }, { "epoch": 2.1379582183681514, "grad_norm": 0.34765625, "learning_rate": 4.61572042151878e-06, "loss": 0.953, "step": 2712 }, { "epoch": 2.138746551044541, "grad_norm": 0.345703125, "learning_rate": 4.607988741307804e-06, "loss": 0.9295, "step": 2713 }, { "epoch": 2.13953488372093, "grad_norm": 0.3359375, "learning_rate": 4.6002616029878235e-06, "loss": 0.9421, "step": 2714 }, { "epoch": 2.1403232163973196, "grad_norm": 0.341796875, "learning_rate": 4.592539013067692e-06, "loss": 0.94, "step": 2715 }, { "epoch": 2.141111549073709, "grad_norm": 0.345703125, "learning_rate": 4.584820978052434e-06, "loss": 0.9288, "step": 2716 }, { "epoch": 2.1418998817500987, "grad_norm": 0.337890625, "learning_rate": 4.577107504443239e-06, "loss": 0.9226, "step": 2717 }, { "epoch": 2.142688214426488, "grad_norm": 0.341796875, "learning_rate": 4.569398598737448e-06, "loss": 0.9201, "step": 2718 }, { "epoch": 2.1434765471028774, "grad_norm": 0.33984375, "learning_rate": 4.56169426742856e-06, "loss": 0.9532, "step": 2719 }, { "epoch": 2.144264879779267, "grad_norm": 0.353515625, "learning_rate": 4.553994517006219e-06, "loss": 0.9355, "step": 2720 }, { "epoch": 2.1450532124556565, "grad_norm": 0.337890625, "learning_rate": 4.546299353956211e-06, "loss": 0.9165, "step": 2721 }, { "epoch": 2.1458415451320456, "grad_norm": 0.33984375, "learning_rate": 4.538608784760459e-06, "loss": 0.9573, "step": 2722 }, { "epoch": 2.146629877808435, "grad_norm": 0.34375, "learning_rate": 4.530922815897003e-06, "loss": 0.9668, "step": 2723 }, { "epoch": 2.1474182104848247, "grad_norm": 0.3515625, "learning_rate": 4.5232414538400336e-06, "loss": 0.955, "step": 2724 }, { "epoch": 2.148206543161214, "grad_norm": 0.33984375, "learning_rate": 4.515564705059841e-06, "loss": 0.9057, "step": 2725 }, { "epoch": 2.1489948758376034, "grad_norm": 0.337890625, "learning_rate": 4.507892576022838e-06, "loss": 0.9309, "step": 2726 }, { "epoch": 2.149783208513993, "grad_norm": 0.34375, "learning_rate": 4.50022507319154e-06, "loss": 0.9198, "step": 2727 }, { "epoch": 2.1505715411903825, "grad_norm": 0.353515625, "learning_rate": 4.492562203024565e-06, "loss": 0.9295, "step": 2728 }, { "epoch": 2.1513598738667716, "grad_norm": 0.341796875, "learning_rate": 4.484903971976642e-06, "loss": 0.9844, "step": 2729 }, { "epoch": 2.152148206543161, "grad_norm": 0.337890625, "learning_rate": 4.477250386498582e-06, "loss": 0.9284, "step": 2730 }, { "epoch": 2.1529365392195507, "grad_norm": 0.33984375, "learning_rate": 4.469601453037277e-06, "loss": 0.9334, "step": 2731 }, { "epoch": 2.15372487189594, "grad_norm": 0.337890625, "learning_rate": 4.461957178035705e-06, "loss": 0.926, "step": 2732 }, { "epoch": 2.1545132045723294, "grad_norm": 0.341796875, "learning_rate": 4.4543175679329345e-06, "loss": 0.926, "step": 2733 }, { "epoch": 2.155301537248719, "grad_norm": 0.341796875, "learning_rate": 4.446682629164088e-06, "loss": 0.8903, "step": 2734 }, { "epoch": 2.1560898699251085, "grad_norm": 0.3515625, "learning_rate": 4.439052368160351e-06, "loss": 0.9216, "step": 2735 }, { "epoch": 2.1568782026014977, "grad_norm": 0.353515625, "learning_rate": 4.431426791348981e-06, "loss": 0.9476, "step": 2736 }, { "epoch": 2.157666535277887, "grad_norm": 0.333984375, "learning_rate": 4.423805905153278e-06, "loss": 0.9325, "step": 2737 }, { "epoch": 2.1584548679542768, "grad_norm": 0.34375, "learning_rate": 4.416189715992605e-06, "loss": 0.9446, "step": 2738 }, { "epoch": 2.1592432006306663, "grad_norm": 0.3515625, "learning_rate": 4.408578230282361e-06, "loss": 0.9408, "step": 2739 }, { "epoch": 2.1600315333070554, "grad_norm": 0.33203125, "learning_rate": 4.4009714544339755e-06, "loss": 0.8808, "step": 2740 }, { "epoch": 2.160819865983445, "grad_norm": 0.34375, "learning_rate": 4.39336939485492e-06, "loss": 0.9275, "step": 2741 }, { "epoch": 2.1616081986598346, "grad_norm": 0.34765625, "learning_rate": 4.38577205794869e-06, "loss": 0.9646, "step": 2742 }, { "epoch": 2.162396531336224, "grad_norm": 0.345703125, "learning_rate": 4.3781794501148116e-06, "loss": 0.9292, "step": 2743 }, { "epoch": 2.1631848640126132, "grad_norm": 0.341796875, "learning_rate": 4.370591577748811e-06, "loss": 0.9307, "step": 2744 }, { "epoch": 2.163973196689003, "grad_norm": 0.345703125, "learning_rate": 4.36300844724224e-06, "loss": 0.9505, "step": 2745 }, { "epoch": 2.1647615293653923, "grad_norm": 0.337890625, "learning_rate": 4.355430064982647e-06, "loss": 0.9452, "step": 2746 }, { "epoch": 2.1655498620417815, "grad_norm": 0.341796875, "learning_rate": 4.347856437353584e-06, "loss": 0.9164, "step": 2747 }, { "epoch": 2.166338194718171, "grad_norm": 0.341796875, "learning_rate": 4.340287570734604e-06, "loss": 0.9362, "step": 2748 }, { "epoch": 2.1671265273945606, "grad_norm": 0.34765625, "learning_rate": 4.332723471501238e-06, "loss": 0.9214, "step": 2749 }, { "epoch": 2.16791486007095, "grad_norm": 0.3359375, "learning_rate": 4.325164146025009e-06, "loss": 0.8981, "step": 2750 }, { "epoch": 2.1687031927473392, "grad_norm": 0.34765625, "learning_rate": 4.317609600673418e-06, "loss": 0.9264, "step": 2751 }, { "epoch": 2.169491525423729, "grad_norm": 0.337890625, "learning_rate": 4.310059841809938e-06, "loss": 0.8971, "step": 2752 }, { "epoch": 2.1702798581001184, "grad_norm": 0.337890625, "learning_rate": 4.30251487579401e-06, "loss": 0.9416, "step": 2753 }, { "epoch": 2.1710681907765075, "grad_norm": 0.33984375, "learning_rate": 4.294974708981041e-06, "loss": 0.9082, "step": 2754 }, { "epoch": 2.171856523452897, "grad_norm": 0.333984375, "learning_rate": 4.2874393477223915e-06, "loss": 0.8916, "step": 2755 }, { "epoch": 2.1726448561292866, "grad_norm": 0.34375, "learning_rate": 4.279908798365379e-06, "loss": 0.9165, "step": 2756 }, { "epoch": 2.173433188805676, "grad_norm": 0.34765625, "learning_rate": 4.272383067253254e-06, "loss": 0.9686, "step": 2757 }, { "epoch": 2.1742215214820653, "grad_norm": 0.353515625, "learning_rate": 4.264862160725229e-06, "loss": 0.915, "step": 2758 }, { "epoch": 2.175009854158455, "grad_norm": 0.341796875, "learning_rate": 4.257346085116441e-06, "loss": 0.9101, "step": 2759 }, { "epoch": 2.1757981868348444, "grad_norm": 0.33984375, "learning_rate": 4.2498348467579555e-06, "loss": 0.9534, "step": 2760 }, { "epoch": 2.176586519511234, "grad_norm": 0.3515625, "learning_rate": 4.242328451976774e-06, "loss": 0.9168, "step": 2761 }, { "epoch": 2.177374852187623, "grad_norm": 0.33984375, "learning_rate": 4.2348269070957986e-06, "loss": 0.922, "step": 2762 }, { "epoch": 2.1781631848640126, "grad_norm": 0.337890625, "learning_rate": 4.22733021843387e-06, "loss": 0.9208, "step": 2763 }, { "epoch": 2.178951517540402, "grad_norm": 0.341796875, "learning_rate": 4.219838392305723e-06, "loss": 0.9079, "step": 2764 }, { "epoch": 2.1797398502167913, "grad_norm": 0.34765625, "learning_rate": 4.212351435022005e-06, "loss": 0.9003, "step": 2765 }, { "epoch": 2.180528182893181, "grad_norm": 0.341796875, "learning_rate": 4.204869352889246e-06, "loss": 0.9046, "step": 2766 }, { "epoch": 2.1813165155695704, "grad_norm": 0.3515625, "learning_rate": 4.197392152209892e-06, "loss": 0.9922, "step": 2767 }, { "epoch": 2.18210484824596, "grad_norm": 0.345703125, "learning_rate": 4.189919839282265e-06, "loss": 0.9397, "step": 2768 }, { "epoch": 2.182893180922349, "grad_norm": 0.345703125, "learning_rate": 4.182452420400571e-06, "loss": 0.9267, "step": 2769 }, { "epoch": 2.1836815135987386, "grad_norm": 0.3359375, "learning_rate": 4.174989901854889e-06, "loss": 0.9009, "step": 2770 }, { "epoch": 2.184469846275128, "grad_norm": 0.345703125, "learning_rate": 4.167532289931175e-06, "loss": 0.9556, "step": 2771 }, { "epoch": 2.1852581789515177, "grad_norm": 0.345703125, "learning_rate": 4.160079590911257e-06, "loss": 0.9159, "step": 2772 }, { "epoch": 2.186046511627907, "grad_norm": 0.345703125, "learning_rate": 4.152631811072822e-06, "loss": 0.9344, "step": 2773 }, { "epoch": 2.1868348443042964, "grad_norm": 0.337890625, "learning_rate": 4.145188956689405e-06, "loss": 0.9143, "step": 2774 }, { "epoch": 2.187623176980686, "grad_norm": 0.345703125, "learning_rate": 4.1377510340304e-06, "loss": 0.9704, "step": 2775 }, { "epoch": 2.188411509657075, "grad_norm": 0.341796875, "learning_rate": 4.130318049361039e-06, "loss": 0.9427, "step": 2776 }, { "epoch": 2.1891998423334647, "grad_norm": 0.34765625, "learning_rate": 4.122890008942417e-06, "loss": 0.9451, "step": 2777 }, { "epoch": 2.189988175009854, "grad_norm": 0.345703125, "learning_rate": 4.1154669190314315e-06, "loss": 0.9241, "step": 2778 }, { "epoch": 2.1907765076862438, "grad_norm": 0.337890625, "learning_rate": 4.1080487858808335e-06, "loss": 0.9174, "step": 2779 }, { "epoch": 2.191564840362633, "grad_norm": 0.341796875, "learning_rate": 4.10063561573919e-06, "loss": 0.9111, "step": 2780 }, { "epoch": 2.1923531730390224, "grad_norm": 0.345703125, "learning_rate": 4.093227414850887e-06, "loss": 0.9453, "step": 2781 }, { "epoch": 2.193141505715412, "grad_norm": 0.33984375, "learning_rate": 4.085824189456136e-06, "loss": 0.9211, "step": 2782 }, { "epoch": 2.193929838391801, "grad_norm": 0.34765625, "learning_rate": 4.078425945790937e-06, "loss": 0.948, "step": 2783 }, { "epoch": 2.1947181710681907, "grad_norm": 0.34375, "learning_rate": 4.071032690087111e-06, "loss": 0.8982, "step": 2784 }, { "epoch": 2.1955065037445802, "grad_norm": 0.341796875, "learning_rate": 4.063644428572268e-06, "loss": 0.9558, "step": 2785 }, { "epoch": 2.19629483642097, "grad_norm": 0.34375, "learning_rate": 4.0562611674698186e-06, "loss": 0.9152, "step": 2786 }, { "epoch": 2.197083169097359, "grad_norm": 0.33984375, "learning_rate": 4.0488829129989536e-06, "loss": 0.9539, "step": 2787 }, { "epoch": 2.1978715017737485, "grad_norm": 0.33984375, "learning_rate": 4.041509671374653e-06, "loss": 0.9535, "step": 2788 }, { "epoch": 2.198659834450138, "grad_norm": 0.3359375, "learning_rate": 4.03414144880767e-06, "loss": 0.9381, "step": 2789 }, { "epoch": 2.1994481671265276, "grad_norm": 0.333984375, "learning_rate": 4.026778251504533e-06, "loss": 0.8971, "step": 2790 }, { "epoch": 2.2002364998029167, "grad_norm": 0.353515625, "learning_rate": 4.019420085667534e-06, "loss": 0.9076, "step": 2791 }, { "epoch": 2.2010248324793062, "grad_norm": 0.34765625, "learning_rate": 4.01206695749473e-06, "loss": 0.9435, "step": 2792 }, { "epoch": 2.201813165155696, "grad_norm": 0.34765625, "learning_rate": 4.0047188731799345e-06, "loss": 0.9517, "step": 2793 }, { "epoch": 2.2026014978320854, "grad_norm": 0.375, "learning_rate": 3.99737583891271e-06, "loss": 0.9376, "step": 2794 }, { "epoch": 2.2033898305084745, "grad_norm": 0.349609375, "learning_rate": 3.990037860878371e-06, "loss": 0.9373, "step": 2795 }, { "epoch": 2.204178163184864, "grad_norm": 0.34765625, "learning_rate": 3.982704945257957e-06, "loss": 0.9541, "step": 2796 }, { "epoch": 2.2049664958612536, "grad_norm": 0.34765625, "learning_rate": 3.975377098228266e-06, "loss": 0.9009, "step": 2797 }, { "epoch": 2.2057548285376427, "grad_norm": 0.34375, "learning_rate": 3.9680543259618105e-06, "loss": 0.898, "step": 2798 }, { "epoch": 2.2065431612140323, "grad_norm": 0.341796875, "learning_rate": 3.960736634626838e-06, "loss": 0.9161, "step": 2799 }, { "epoch": 2.207331493890422, "grad_norm": 0.33984375, "learning_rate": 3.953424030387301e-06, "loss": 0.9087, "step": 2800 }, { "epoch": 2.2081198265668114, "grad_norm": 0.345703125, "learning_rate": 3.946116519402886e-06, "loss": 0.9426, "step": 2801 }, { "epoch": 2.2089081592432005, "grad_norm": 0.34375, "learning_rate": 3.9388141078289775e-06, "loss": 0.9442, "step": 2802 }, { "epoch": 2.20969649191959, "grad_norm": 0.34375, "learning_rate": 3.931516801816668e-06, "loss": 0.9081, "step": 2803 }, { "epoch": 2.2104848245959796, "grad_norm": 0.3359375, "learning_rate": 3.9242246075127536e-06, "loss": 0.9322, "step": 2804 }, { "epoch": 2.2112731572723687, "grad_norm": 0.337890625, "learning_rate": 3.916937531059706e-06, "loss": 0.9293, "step": 2805 }, { "epoch": 2.2120614899487583, "grad_norm": 0.341796875, "learning_rate": 3.909655578595714e-06, "loss": 0.8869, "step": 2806 }, { "epoch": 2.212849822625148, "grad_norm": 0.34765625, "learning_rate": 3.902378756254629e-06, "loss": 0.941, "step": 2807 }, { "epoch": 2.2136381553015374, "grad_norm": 0.33984375, "learning_rate": 3.895107070165995e-06, "loss": 0.9117, "step": 2808 }, { "epoch": 2.2144264879779265, "grad_norm": 0.349609375, "learning_rate": 3.887840526455014e-06, "loss": 0.9167, "step": 2809 }, { "epoch": 2.215214820654316, "grad_norm": 0.34765625, "learning_rate": 3.880579131242567e-06, "loss": 0.9298, "step": 2810 }, { "epoch": 2.2160031533307056, "grad_norm": 0.341796875, "learning_rate": 3.873322890645202e-06, "loss": 0.9124, "step": 2811 }, { "epoch": 2.216791486007095, "grad_norm": 0.33984375, "learning_rate": 3.866071810775118e-06, "loss": 0.9213, "step": 2812 }, { "epoch": 2.2175798186834843, "grad_norm": 0.341796875, "learning_rate": 3.858825897740164e-06, "loss": 0.9502, "step": 2813 }, { "epoch": 2.218368151359874, "grad_norm": 0.337890625, "learning_rate": 3.851585157643845e-06, "loss": 0.9199, "step": 2814 }, { "epoch": 2.2191564840362634, "grad_norm": 0.3515625, "learning_rate": 3.844349596585299e-06, "loss": 0.9758, "step": 2815 }, { "epoch": 2.2199448167126525, "grad_norm": 0.337890625, "learning_rate": 3.837119220659318e-06, "loss": 0.9099, "step": 2816 }, { "epoch": 2.220733149389042, "grad_norm": 0.34375, "learning_rate": 3.829894035956306e-06, "loss": 0.9394, "step": 2817 }, { "epoch": 2.2215214820654317, "grad_norm": 0.34765625, "learning_rate": 3.822674048562309e-06, "loss": 0.9366, "step": 2818 }, { "epoch": 2.222309814741821, "grad_norm": 0.337890625, "learning_rate": 3.815459264558988e-06, "loss": 0.9478, "step": 2819 }, { "epoch": 2.2230981474182103, "grad_norm": 0.345703125, "learning_rate": 3.8082496900236244e-06, "loss": 0.9046, "step": 2820 }, { "epoch": 2.2238864800946, "grad_norm": 0.341796875, "learning_rate": 3.8010453310291086e-06, "loss": 0.9205, "step": 2821 }, { "epoch": 2.2246748127709894, "grad_norm": 0.34375, "learning_rate": 3.793846193643941e-06, "loss": 0.9611, "step": 2822 }, { "epoch": 2.225463145447379, "grad_norm": 0.337890625, "learning_rate": 3.7866522839322207e-06, "loss": 0.9057, "step": 2823 }, { "epoch": 2.226251478123768, "grad_norm": 0.34375, "learning_rate": 3.779463607953644e-06, "loss": 0.9232, "step": 2824 }, { "epoch": 2.2270398108001577, "grad_norm": 0.34765625, "learning_rate": 3.7722801717635016e-06, "loss": 0.9537, "step": 2825 }, { "epoch": 2.2278281434765472, "grad_norm": 0.35546875, "learning_rate": 3.7651019814126656e-06, "loss": 0.9763, "step": 2826 }, { "epoch": 2.2286164761529363, "grad_norm": 0.34765625, "learning_rate": 3.7579290429475933e-06, "loss": 0.9558, "step": 2827 }, { "epoch": 2.229404808829326, "grad_norm": 0.345703125, "learning_rate": 3.7507613624103167e-06, "loss": 0.9, "step": 2828 }, { "epoch": 2.2301931415057155, "grad_norm": 0.353515625, "learning_rate": 3.743598945838438e-06, "loss": 0.9454, "step": 2829 }, { "epoch": 2.230981474182105, "grad_norm": 0.3515625, "learning_rate": 3.736441799265127e-06, "loss": 0.8959, "step": 2830 }, { "epoch": 2.231769806858494, "grad_norm": 0.3515625, "learning_rate": 3.729289928719113e-06, "loss": 0.9258, "step": 2831 }, { "epoch": 2.2325581395348837, "grad_norm": 0.34765625, "learning_rate": 3.722143340224682e-06, "loss": 0.9788, "step": 2832 }, { "epoch": 2.2333464722112732, "grad_norm": 0.33984375, "learning_rate": 3.7150020398016717e-06, "loss": 0.9497, "step": 2833 }, { "epoch": 2.2341348048876624, "grad_norm": 0.353515625, "learning_rate": 3.7078660334654616e-06, "loss": 0.9041, "step": 2834 }, { "epoch": 2.234923137564052, "grad_norm": 0.341796875, "learning_rate": 3.7007353272269764e-06, "loss": 0.9105, "step": 2835 }, { "epoch": 2.2357114702404415, "grad_norm": 0.337890625, "learning_rate": 3.693609927092674e-06, "loss": 0.924, "step": 2836 }, { "epoch": 2.236499802916831, "grad_norm": 0.33984375, "learning_rate": 3.6864898390645434e-06, "loss": 0.9221, "step": 2837 }, { "epoch": 2.23728813559322, "grad_norm": 0.353515625, "learning_rate": 3.6793750691400996e-06, "loss": 0.9116, "step": 2838 }, { "epoch": 2.2380764682696097, "grad_norm": 0.349609375, "learning_rate": 3.672265623312371e-06, "loss": 0.9874, "step": 2839 }, { "epoch": 2.2388648009459993, "grad_norm": 0.333984375, "learning_rate": 3.665161507569914e-06, "loss": 0.9351, "step": 2840 }, { "epoch": 2.239653133622389, "grad_norm": 0.341796875, "learning_rate": 3.6580627278967883e-06, "loss": 0.9147, "step": 2841 }, { "epoch": 2.240441466298778, "grad_norm": 0.337890625, "learning_rate": 3.65096929027256e-06, "loss": 0.8864, "step": 2842 }, { "epoch": 2.2412297989751675, "grad_norm": 0.349609375, "learning_rate": 3.643881200672289e-06, "loss": 0.9336, "step": 2843 }, { "epoch": 2.242018131651557, "grad_norm": 0.341796875, "learning_rate": 3.636798465066537e-06, "loss": 0.9442, "step": 2844 }, { "epoch": 2.2428064643279466, "grad_norm": 0.341796875, "learning_rate": 3.62972108942136e-06, "loss": 0.905, "step": 2845 }, { "epoch": 2.2435947970043357, "grad_norm": 0.34375, "learning_rate": 3.622649079698293e-06, "loss": 0.9191, "step": 2846 }, { "epoch": 2.2443831296807253, "grad_norm": 0.341796875, "learning_rate": 3.6155824418543482e-06, "loss": 0.9343, "step": 2847 }, { "epoch": 2.245171462357115, "grad_norm": 0.33984375, "learning_rate": 3.6085211818420176e-06, "loss": 0.917, "step": 2848 }, { "epoch": 2.245959795033504, "grad_norm": 0.33984375, "learning_rate": 3.6014653056092598e-06, "loss": 0.9203, "step": 2849 }, { "epoch": 2.2467481277098935, "grad_norm": 0.341796875, "learning_rate": 3.5944148190995077e-06, "loss": 0.9225, "step": 2850 }, { "epoch": 2.247536460386283, "grad_norm": 0.34375, "learning_rate": 3.587369728251647e-06, "loss": 0.9387, "step": 2851 }, { "epoch": 2.2483247930626726, "grad_norm": 0.34375, "learning_rate": 3.580330039000014e-06, "loss": 0.9304, "step": 2852 }, { "epoch": 2.2491131257390617, "grad_norm": 0.359375, "learning_rate": 3.573295757274401e-06, "loss": 0.9583, "step": 2853 }, { "epoch": 2.2499014584154513, "grad_norm": 0.34375, "learning_rate": 3.5662668890000416e-06, "loss": 0.909, "step": 2854 }, { "epoch": 2.250689791091841, "grad_norm": 0.33984375, "learning_rate": 3.559243440097623e-06, "loss": 0.9578, "step": 2855 }, { "epoch": 2.25147812376823, "grad_norm": 0.3359375, "learning_rate": 3.5522254164832458e-06, "loss": 0.9353, "step": 2856 }, { "epoch": 2.2522664564446195, "grad_norm": 0.349609375, "learning_rate": 3.545212824068456e-06, "loss": 0.8952, "step": 2857 }, { "epoch": 2.253054789121009, "grad_norm": 0.34375, "learning_rate": 3.5382056687602185e-06, "loss": 0.945, "step": 2858 }, { "epoch": 2.2538431217973987, "grad_norm": 0.349609375, "learning_rate": 3.5312039564609203e-06, "loss": 0.9426, "step": 2859 }, { "epoch": 2.2546314544737878, "grad_norm": 0.341796875, "learning_rate": 3.5242076930683644e-06, "loss": 0.928, "step": 2860 }, { "epoch": 2.2554197871501773, "grad_norm": 0.34375, "learning_rate": 3.5172168844757625e-06, "loss": 0.9422, "step": 2861 }, { "epoch": 2.256208119826567, "grad_norm": 0.3515625, "learning_rate": 3.510231536571731e-06, "loss": 0.9615, "step": 2862 }, { "epoch": 2.2569964525029564, "grad_norm": 0.3515625, "learning_rate": 3.5032516552402885e-06, "loss": 0.9019, "step": 2863 }, { "epoch": 2.2577847851793456, "grad_norm": 0.345703125, "learning_rate": 3.4962772463608463e-06, "loss": 0.9238, "step": 2864 }, { "epoch": 2.258573117855735, "grad_norm": 0.345703125, "learning_rate": 3.4893083158082096e-06, "loss": 0.9212, "step": 2865 }, { "epoch": 2.2593614505321247, "grad_norm": 0.33984375, "learning_rate": 3.482344869452565e-06, "loss": 0.9201, "step": 2866 }, { "epoch": 2.2601497832085142, "grad_norm": 0.337890625, "learning_rate": 3.4753869131594832e-06, "loss": 0.9437, "step": 2867 }, { "epoch": 2.2609381158849033, "grad_norm": 0.3359375, "learning_rate": 3.4684344527899117e-06, "loss": 0.9069, "step": 2868 }, { "epoch": 2.261726448561293, "grad_norm": 0.34375, "learning_rate": 3.4614874942001543e-06, "loss": 0.9626, "step": 2869 }, { "epoch": 2.2625147812376825, "grad_norm": 0.345703125, "learning_rate": 3.454546043241904e-06, "loss": 0.9532, "step": 2870 }, { "epoch": 2.2633031139140716, "grad_norm": 0.33984375, "learning_rate": 3.447610105762197e-06, "loss": 0.9544, "step": 2871 }, { "epoch": 2.264091446590461, "grad_norm": 0.33984375, "learning_rate": 3.4406796876034323e-06, "loss": 0.9332, "step": 2872 }, { "epoch": 2.2648797792668507, "grad_norm": 0.396484375, "learning_rate": 3.4337547946033557e-06, "loss": 0.9409, "step": 2873 }, { "epoch": 2.2656681119432402, "grad_norm": 0.345703125, "learning_rate": 3.4268354325950637e-06, "loss": 0.9484, "step": 2874 }, { "epoch": 2.2664564446196294, "grad_norm": 0.33984375, "learning_rate": 3.4199216074069906e-06, "loss": 0.9368, "step": 2875 }, { "epoch": 2.267244777296019, "grad_norm": 0.33984375, "learning_rate": 3.413013324862907e-06, "loss": 0.9061, "step": 2876 }, { "epoch": 2.2680331099724085, "grad_norm": 0.341796875, "learning_rate": 3.4061105907819202e-06, "loss": 0.9381, "step": 2877 }, { "epoch": 2.2688214426487976, "grad_norm": 0.345703125, "learning_rate": 3.399213410978447e-06, "loss": 0.9463, "step": 2878 }, { "epoch": 2.269609775325187, "grad_norm": 0.341796875, "learning_rate": 3.3923217912622495e-06, "loss": 0.9314, "step": 2879 }, { "epoch": 2.2703981080015767, "grad_norm": 0.34375, "learning_rate": 3.3854357374383905e-06, "loss": 0.9476, "step": 2880 }, { "epoch": 2.2711864406779663, "grad_norm": 0.34765625, "learning_rate": 3.378555255307252e-06, "loss": 0.9451, "step": 2881 }, { "epoch": 2.2719747733543554, "grad_norm": 0.34765625, "learning_rate": 3.3716803506645125e-06, "loss": 0.9312, "step": 2882 }, { "epoch": 2.272763106030745, "grad_norm": 0.359375, "learning_rate": 3.3648110293011592e-06, "loss": 0.9466, "step": 2883 }, { "epoch": 2.2735514387071345, "grad_norm": 0.345703125, "learning_rate": 3.357947297003482e-06, "loss": 0.9504, "step": 2884 }, { "epoch": 2.2743397713835236, "grad_norm": 0.3515625, "learning_rate": 3.351089159553057e-06, "loss": 0.9455, "step": 2885 }, { "epoch": 2.275128104059913, "grad_norm": 0.33984375, "learning_rate": 3.344236622726743e-06, "loss": 0.9149, "step": 2886 }, { "epoch": 2.2759164367363027, "grad_norm": 0.33984375, "learning_rate": 3.3373896922966863e-06, "loss": 0.915, "step": 2887 }, { "epoch": 2.2767047694126923, "grad_norm": 0.3515625, "learning_rate": 3.330548374030309e-06, "loss": 0.9508, "step": 2888 }, { "epoch": 2.2774931020890814, "grad_norm": 0.34765625, "learning_rate": 3.3237126736903168e-06, "loss": 0.9559, "step": 2889 }, { "epoch": 2.278281434765471, "grad_norm": 0.34765625, "learning_rate": 3.316882597034663e-06, "loss": 0.9106, "step": 2890 }, { "epoch": 2.2790697674418605, "grad_norm": 0.337890625, "learning_rate": 3.3100581498165783e-06, "loss": 0.9112, "step": 2891 }, { "epoch": 2.27985810011825, "grad_norm": 0.33984375, "learning_rate": 3.303239337784547e-06, "loss": 0.9478, "step": 2892 }, { "epoch": 2.280646432794639, "grad_norm": 0.33984375, "learning_rate": 3.296426166682304e-06, "loss": 0.9416, "step": 2893 }, { "epoch": 2.2814347654710287, "grad_norm": 0.345703125, "learning_rate": 3.2896186422488463e-06, "loss": 0.9424, "step": 2894 }, { "epoch": 2.2822230981474183, "grad_norm": 0.34375, "learning_rate": 3.2828167702183945e-06, "loss": 0.9334, "step": 2895 }, { "epoch": 2.283011430823808, "grad_norm": 0.34375, "learning_rate": 3.2760205563204195e-06, "loss": 0.9199, "step": 2896 }, { "epoch": 2.283799763500197, "grad_norm": 0.33984375, "learning_rate": 3.2692300062796257e-06, "loss": 0.9331, "step": 2897 }, { "epoch": 2.2845880961765865, "grad_norm": 0.33984375, "learning_rate": 3.262445125815945e-06, "loss": 0.95, "step": 2898 }, { "epoch": 2.285376428852976, "grad_norm": 0.33203125, "learning_rate": 3.255665920644533e-06, "loss": 0.8997, "step": 2899 }, { "epoch": 2.286164761529365, "grad_norm": 0.345703125, "learning_rate": 3.2488923964757656e-06, "loss": 0.9323, "step": 2900 }, { "epoch": 2.2869530942057548, "grad_norm": 0.345703125, "learning_rate": 3.242124559015234e-06, "loss": 0.9479, "step": 2901 }, { "epoch": 2.2877414268821443, "grad_norm": 0.345703125, "learning_rate": 3.2353624139637383e-06, "loss": 0.9002, "step": 2902 }, { "epoch": 2.288529759558534, "grad_norm": 0.33984375, "learning_rate": 3.228605967017284e-06, "loss": 0.862, "step": 2903 }, { "epoch": 2.289318092234923, "grad_norm": 0.337890625, "learning_rate": 3.221855223867076e-06, "loss": 0.9202, "step": 2904 }, { "epoch": 2.2901064249113126, "grad_norm": 0.33984375, "learning_rate": 3.2151101901995184e-06, "loss": 0.964, "step": 2905 }, { "epoch": 2.290894757587702, "grad_norm": 0.337890625, "learning_rate": 3.208370871696199e-06, "loss": 0.9047, "step": 2906 }, { "epoch": 2.2916830902640912, "grad_norm": 0.337890625, "learning_rate": 3.2016372740339e-06, "loss": 0.9141, "step": 2907 }, { "epoch": 2.292471422940481, "grad_norm": 0.35546875, "learning_rate": 3.194909402884576e-06, "loss": 0.9269, "step": 2908 }, { "epoch": 2.2932597556168703, "grad_norm": 0.33984375, "learning_rate": 3.1881872639153655e-06, "loss": 0.9307, "step": 2909 }, { "epoch": 2.29404808829326, "grad_norm": 0.34375, "learning_rate": 3.1814708627885736e-06, "loss": 0.9478, "step": 2910 }, { "epoch": 2.294836420969649, "grad_norm": 0.3515625, "learning_rate": 3.1747602051616787e-06, "loss": 0.9022, "step": 2911 }, { "epoch": 2.2956247536460386, "grad_norm": 0.341796875, "learning_rate": 3.1680552966873057e-06, "loss": 0.9287, "step": 2912 }, { "epoch": 2.296413086322428, "grad_norm": 0.376953125, "learning_rate": 3.161356143013258e-06, "loss": 0.9053, "step": 2913 }, { "epoch": 2.2972014189988177, "grad_norm": 0.34765625, "learning_rate": 3.1546627497824767e-06, "loss": 0.9639, "step": 2914 }, { "epoch": 2.297989751675207, "grad_norm": 0.341796875, "learning_rate": 3.1479751226330567e-06, "loss": 0.9288, "step": 2915 }, { "epoch": 2.2987780843515964, "grad_norm": 0.345703125, "learning_rate": 3.1412932671982368e-06, "loss": 0.9446, "step": 2916 }, { "epoch": 2.299566417027986, "grad_norm": 0.34375, "learning_rate": 3.13461718910638e-06, "loss": 0.9338, "step": 2917 }, { "epoch": 2.3003547497043755, "grad_norm": 0.34375, "learning_rate": 3.127946893981009e-06, "loss": 0.9417, "step": 2918 }, { "epoch": 2.3011430823807646, "grad_norm": 0.349609375, "learning_rate": 3.1212823874407517e-06, "loss": 0.9447, "step": 2919 }, { "epoch": 2.301931415057154, "grad_norm": 0.34375, "learning_rate": 3.1146236750993763e-06, "loss": 0.9387, "step": 2920 }, { "epoch": 2.3027197477335437, "grad_norm": 0.357421875, "learning_rate": 3.107970762565755e-06, "loss": 0.953, "step": 2921 }, { "epoch": 2.303508080409933, "grad_norm": 0.33984375, "learning_rate": 3.101323655443882e-06, "loss": 0.9499, "step": 2922 }, { "epoch": 2.3042964130863224, "grad_norm": 0.34375, "learning_rate": 3.094682359332871e-06, "loss": 0.9232, "step": 2923 }, { "epoch": 2.305084745762712, "grad_norm": 0.34375, "learning_rate": 3.0880468798269293e-06, "loss": 0.9262, "step": 2924 }, { "epoch": 2.3058730784391015, "grad_norm": 0.33984375, "learning_rate": 3.0814172225153626e-06, "loss": 0.9326, "step": 2925 }, { "epoch": 2.3066614111154906, "grad_norm": 0.341796875, "learning_rate": 3.074793392982579e-06, "loss": 0.922, "step": 2926 }, { "epoch": 2.30744974379188, "grad_norm": 0.34765625, "learning_rate": 3.068175396808074e-06, "loss": 0.9639, "step": 2927 }, { "epoch": 2.3082380764682697, "grad_norm": 0.337890625, "learning_rate": 3.0615632395664395e-06, "loss": 0.8824, "step": 2928 }, { "epoch": 2.309026409144659, "grad_norm": 0.345703125, "learning_rate": 3.0549569268273316e-06, "loss": 0.9334, "step": 2929 }, { "epoch": 2.3098147418210484, "grad_norm": 0.3359375, "learning_rate": 3.0483564641554953e-06, "loss": 0.9126, "step": 2930 }, { "epoch": 2.310603074497438, "grad_norm": 0.3515625, "learning_rate": 3.0417618571107443e-06, "loss": 0.9578, "step": 2931 }, { "epoch": 2.3113914071738275, "grad_norm": 0.34375, "learning_rate": 3.035173111247963e-06, "loss": 0.9586, "step": 2932 }, { "epoch": 2.3121797398502166, "grad_norm": 0.33984375, "learning_rate": 3.0285902321170945e-06, "loss": 0.9143, "step": 2933 }, { "epoch": 2.312968072526606, "grad_norm": 0.34375, "learning_rate": 3.022013225263142e-06, "loss": 0.9594, "step": 2934 }, { "epoch": 2.3137564052029957, "grad_norm": 0.361328125, "learning_rate": 3.015442096226163e-06, "loss": 0.9593, "step": 2935 }, { "epoch": 2.314544737879385, "grad_norm": 0.33984375, "learning_rate": 3.0088768505412623e-06, "loss": 0.9234, "step": 2936 }, { "epoch": 2.3153330705557744, "grad_norm": 0.34375, "learning_rate": 3.00231749373859e-06, "loss": 0.9344, "step": 2937 }, { "epoch": 2.316121403232164, "grad_norm": 0.33984375, "learning_rate": 2.9957640313433366e-06, "loss": 0.9468, "step": 2938 }, { "epoch": 2.3169097359085535, "grad_norm": 0.353515625, "learning_rate": 2.989216468875725e-06, "loss": 0.9587, "step": 2939 }, { "epoch": 2.317698068584943, "grad_norm": 0.345703125, "learning_rate": 2.9826748118510107e-06, "loss": 0.9403, "step": 2940 }, { "epoch": 2.318486401261332, "grad_norm": 0.34765625, "learning_rate": 2.976139065779473e-06, "loss": 0.9459, "step": 2941 }, { "epoch": 2.3192747339377218, "grad_norm": 0.341796875, "learning_rate": 2.969609236166413e-06, "loss": 0.9572, "step": 2942 }, { "epoch": 2.3200630666141113, "grad_norm": 0.341796875, "learning_rate": 2.9630853285121506e-06, "loss": 0.9067, "step": 2943 }, { "epoch": 2.3208513992905004, "grad_norm": 0.345703125, "learning_rate": 2.9565673483120126e-06, "loss": 0.936, "step": 2944 }, { "epoch": 2.32163973196689, "grad_norm": 0.3515625, "learning_rate": 2.950055301056336e-06, "loss": 0.9698, "step": 2945 }, { "epoch": 2.3224280646432796, "grad_norm": 0.33984375, "learning_rate": 2.9435491922304603e-06, "loss": 0.9298, "step": 2946 }, { "epoch": 2.323216397319669, "grad_norm": 0.345703125, "learning_rate": 2.9370490273147224e-06, "loss": 0.9762, "step": 2947 }, { "epoch": 2.3240047299960582, "grad_norm": 0.33984375, "learning_rate": 2.930554811784451e-06, "loss": 0.9442, "step": 2948 }, { "epoch": 2.324793062672448, "grad_norm": 0.34765625, "learning_rate": 2.9240665511099643e-06, "loss": 0.9412, "step": 2949 }, { "epoch": 2.3255813953488373, "grad_norm": 0.34375, "learning_rate": 2.91758425075657e-06, "loss": 0.9298, "step": 2950 }, { "epoch": 2.3263697280252265, "grad_norm": 0.3515625, "learning_rate": 2.911107916184539e-06, "loss": 0.9576, "step": 2951 }, { "epoch": 2.327158060701616, "grad_norm": 0.349609375, "learning_rate": 2.9046375528491378e-06, "loss": 0.9384, "step": 2952 }, { "epoch": 2.3279463933780056, "grad_norm": 0.34765625, "learning_rate": 2.898173166200591e-06, "loss": 0.9808, "step": 2953 }, { "epoch": 2.328734726054395, "grad_norm": 0.353515625, "learning_rate": 2.8917147616840933e-06, "loss": 0.9668, "step": 2954 }, { "epoch": 2.3295230587307842, "grad_norm": 0.35546875, "learning_rate": 2.885262344739792e-06, "loss": 0.9191, "step": 2955 }, { "epoch": 2.330311391407174, "grad_norm": 0.34375, "learning_rate": 2.8788159208027975e-06, "loss": 0.9378, "step": 2956 }, { "epoch": 2.3310997240835634, "grad_norm": 0.3359375, "learning_rate": 2.872375495303178e-06, "loss": 0.9396, "step": 2957 }, { "epoch": 2.3318880567599525, "grad_norm": 0.365234375, "learning_rate": 2.865941073665942e-06, "loss": 0.9082, "step": 2958 }, { "epoch": 2.332676389436342, "grad_norm": 0.3515625, "learning_rate": 2.859512661311037e-06, "loss": 0.9671, "step": 2959 }, { "epoch": 2.3334647221127316, "grad_norm": 0.345703125, "learning_rate": 2.853090263653354e-06, "loss": 0.937, "step": 2960 }, { "epoch": 2.334253054789121, "grad_norm": 0.337890625, "learning_rate": 2.8466738861027143e-06, "loss": 0.9212, "step": 2961 }, { "epoch": 2.3350413874655103, "grad_norm": 0.34375, "learning_rate": 2.8402635340638775e-06, "loss": 0.9282, "step": 2962 }, { "epoch": 2.3358297201419, "grad_norm": 0.3515625, "learning_rate": 2.8338592129365194e-06, "loss": 0.9493, "step": 2963 }, { "epoch": 2.3366180528182894, "grad_norm": 0.34765625, "learning_rate": 2.8274609281152322e-06, "loss": 0.9274, "step": 2964 }, { "epoch": 2.337406385494679, "grad_norm": 0.34375, "learning_rate": 2.821068684989531e-06, "loss": 0.9564, "step": 2965 }, { "epoch": 2.338194718171068, "grad_norm": 0.33984375, "learning_rate": 2.814682488943836e-06, "loss": 0.941, "step": 2966 }, { "epoch": 2.3389830508474576, "grad_norm": 0.353515625, "learning_rate": 2.8083023453574867e-06, "loss": 0.9533, "step": 2967 }, { "epoch": 2.339771383523847, "grad_norm": 0.341796875, "learning_rate": 2.801928259604705e-06, "loss": 0.9453, "step": 2968 }, { "epoch": 2.3405597162002367, "grad_norm": 0.34375, "learning_rate": 2.795560237054623e-06, "loss": 0.9333, "step": 2969 }, { "epoch": 2.341348048876626, "grad_norm": 0.353515625, "learning_rate": 2.7891982830712614e-06, "loss": 0.9592, "step": 2970 }, { "epoch": 2.3421363815530154, "grad_norm": 0.345703125, "learning_rate": 2.7828424030135305e-06, "loss": 0.9144, "step": 2971 }, { "epoch": 2.342924714229405, "grad_norm": 0.337890625, "learning_rate": 2.7764926022352232e-06, "loss": 0.9228, "step": 2972 }, { "epoch": 2.343713046905794, "grad_norm": 0.345703125, "learning_rate": 2.7701488860850134e-06, "loss": 0.9371, "step": 2973 }, { "epoch": 2.3445013795821836, "grad_norm": 0.3515625, "learning_rate": 2.763811259906447e-06, "loss": 0.924, "step": 2974 }, { "epoch": 2.345289712258573, "grad_norm": 0.349609375, "learning_rate": 2.757479729037942e-06, "loss": 0.9714, "step": 2975 }, { "epoch": 2.3460780449349627, "grad_norm": 0.34375, "learning_rate": 2.7511542988127815e-06, "loss": 0.9299, "step": 2976 }, { "epoch": 2.346866377611352, "grad_norm": 0.33984375, "learning_rate": 2.7448349745591108e-06, "loss": 0.9668, "step": 2977 }, { "epoch": 2.3476547102877414, "grad_norm": 0.34765625, "learning_rate": 2.7385217615999303e-06, "loss": 0.9247, "step": 2978 }, { "epoch": 2.348443042964131, "grad_norm": 0.337890625, "learning_rate": 2.732214665253092e-06, "loss": 0.9507, "step": 2979 }, { "epoch": 2.34923137564052, "grad_norm": 0.3359375, "learning_rate": 2.7259136908313e-06, "loss": 0.9051, "step": 2980 }, { "epoch": 2.3500197083169097, "grad_norm": 0.34375, "learning_rate": 2.7196188436420955e-06, "loss": 0.9254, "step": 2981 }, { "epoch": 2.350808040993299, "grad_norm": 0.341796875, "learning_rate": 2.7133301289878644e-06, "loss": 0.9196, "step": 2982 }, { "epoch": 2.3515963736696888, "grad_norm": 0.337890625, "learning_rate": 2.7070475521658226e-06, "loss": 0.9345, "step": 2983 }, { "epoch": 2.352384706346078, "grad_norm": 0.3359375, "learning_rate": 2.7007711184680176e-06, "loss": 0.9395, "step": 2984 }, { "epoch": 2.3531730390224674, "grad_norm": 0.34765625, "learning_rate": 2.694500833181323e-06, "loss": 0.9166, "step": 2985 }, { "epoch": 2.353961371698857, "grad_norm": 0.337890625, "learning_rate": 2.6882367015874313e-06, "loss": 0.9187, "step": 2986 }, { "epoch": 2.354749704375246, "grad_norm": 0.34375, "learning_rate": 2.681978728962853e-06, "loss": 0.9243, "step": 2987 }, { "epoch": 2.3555380370516357, "grad_norm": 0.37109375, "learning_rate": 2.6757269205789118e-06, "loss": 0.942, "step": 2988 }, { "epoch": 2.3563263697280252, "grad_norm": 0.341796875, "learning_rate": 2.669481281701739e-06, "loss": 0.9221, "step": 2989 }, { "epoch": 2.357114702404415, "grad_norm": 0.33984375, "learning_rate": 2.6632418175922613e-06, "loss": 0.9184, "step": 2990 }, { "epoch": 2.3579030350808043, "grad_norm": 0.341796875, "learning_rate": 2.6570085335062166e-06, "loss": 0.9486, "step": 2991 }, { "epoch": 2.3586913677571935, "grad_norm": 0.345703125, "learning_rate": 2.65078143469413e-06, "loss": 0.9507, "step": 2992 }, { "epoch": 2.359479700433583, "grad_norm": 0.33984375, "learning_rate": 2.6445605264013206e-06, "loss": 0.9032, "step": 2993 }, { "epoch": 2.3602680331099726, "grad_norm": 0.3515625, "learning_rate": 2.638345813867883e-06, "loss": 0.9476, "step": 2994 }, { "epoch": 2.3610563657863617, "grad_norm": 0.349609375, "learning_rate": 2.632137302328701e-06, "loss": 0.9576, "step": 2995 }, { "epoch": 2.3618446984627512, "grad_norm": 0.341796875, "learning_rate": 2.6259349970134406e-06, "loss": 0.8704, "step": 2996 }, { "epoch": 2.362633031139141, "grad_norm": 0.33203125, "learning_rate": 2.6197389031465328e-06, "loss": 0.8844, "step": 2997 }, { "epoch": 2.3634213638155304, "grad_norm": 0.33984375, "learning_rate": 2.6135490259471695e-06, "loss": 0.9051, "step": 2998 }, { "epoch": 2.3642096964919195, "grad_norm": 0.341796875, "learning_rate": 2.6073653706293202e-06, "loss": 0.9317, "step": 2999 }, { "epoch": 2.364998029168309, "grad_norm": 0.34765625, "learning_rate": 2.6011879424017006e-06, "loss": 0.92, "step": 3000 }, { "epoch": 2.3657863618446986, "grad_norm": 0.341796875, "learning_rate": 2.595016746467799e-06, "loss": 0.9334, "step": 3001 }, { "epoch": 2.3665746945210877, "grad_norm": 0.33984375, "learning_rate": 2.5888517880258323e-06, "loss": 0.9511, "step": 3002 }, { "epoch": 2.3673630271974773, "grad_norm": 0.34375, "learning_rate": 2.582693072268778e-06, "loss": 0.919, "step": 3003 }, { "epoch": 2.368151359873867, "grad_norm": 0.33984375, "learning_rate": 2.576540604384349e-06, "loss": 0.9131, "step": 3004 }, { "epoch": 2.3689396925502564, "grad_norm": 0.3359375, "learning_rate": 2.5703943895549974e-06, "loss": 0.9368, "step": 3005 }, { "epoch": 2.3697280252266455, "grad_norm": 0.359375, "learning_rate": 2.5642544329579088e-06, "loss": 0.9431, "step": 3006 }, { "epoch": 2.370516357903035, "grad_norm": 0.349609375, "learning_rate": 2.5581207397649953e-06, "loss": 0.9023, "step": 3007 }, { "epoch": 2.3713046905794246, "grad_norm": 0.34375, "learning_rate": 2.5519933151428943e-06, "loss": 0.9589, "step": 3008 }, { "epoch": 2.3720930232558137, "grad_norm": 0.3359375, "learning_rate": 2.5458721642529637e-06, "loss": 0.9467, "step": 3009 }, { "epoch": 2.3728813559322033, "grad_norm": 0.357421875, "learning_rate": 2.5397572922512735e-06, "loss": 0.929, "step": 3010 }, { "epoch": 2.373669688608593, "grad_norm": 0.3515625, "learning_rate": 2.5336487042886106e-06, "loss": 0.9126, "step": 3011 }, { "epoch": 2.3744580212849824, "grad_norm": 0.34375, "learning_rate": 2.5275464055104615e-06, "loss": 0.9187, "step": 3012 }, { "epoch": 2.3752463539613715, "grad_norm": 0.349609375, "learning_rate": 2.5214504010570217e-06, "loss": 0.9136, "step": 3013 }, { "epoch": 2.376034686637761, "grad_norm": 0.361328125, "learning_rate": 2.515360696063179e-06, "loss": 0.9189, "step": 3014 }, { "epoch": 2.3768230193141506, "grad_norm": 0.341796875, "learning_rate": 2.509277295658521e-06, "loss": 0.9605, "step": 3015 }, { "epoch": 2.37761135199054, "grad_norm": 0.345703125, "learning_rate": 2.5032002049673175e-06, "loss": 0.9599, "step": 3016 }, { "epoch": 2.3783996846669293, "grad_norm": 0.33984375, "learning_rate": 2.4971294291085313e-06, "loss": 0.9493, "step": 3017 }, { "epoch": 2.379188017343319, "grad_norm": 0.34765625, "learning_rate": 2.491064973195798e-06, "loss": 0.9416, "step": 3018 }, { "epoch": 2.3799763500197084, "grad_norm": 0.341796875, "learning_rate": 2.4850068423374376e-06, "loss": 0.9329, "step": 3019 }, { "epoch": 2.380764682696098, "grad_norm": 0.345703125, "learning_rate": 2.478955041636435e-06, "loss": 0.9168, "step": 3020 }, { "epoch": 2.381553015372487, "grad_norm": 0.34375, "learning_rate": 2.4729095761904487e-06, "loss": 0.9537, "step": 3021 }, { "epoch": 2.3823413480488767, "grad_norm": 0.349609375, "learning_rate": 2.466870451091796e-06, "loss": 0.9198, "step": 3022 }, { "epoch": 2.383129680725266, "grad_norm": 0.341796875, "learning_rate": 2.4608376714274617e-06, "loss": 0.9407, "step": 3023 }, { "epoch": 2.3839180134016553, "grad_norm": 0.345703125, "learning_rate": 2.4548112422790695e-06, "loss": 0.9114, "step": 3024 }, { "epoch": 2.384706346078045, "grad_norm": 0.34375, "learning_rate": 2.4487911687229116e-06, "loss": 0.9315, "step": 3025 }, { "epoch": 2.3854946787544344, "grad_norm": 0.33984375, "learning_rate": 2.442777455829919e-06, "loss": 0.9146, "step": 3026 }, { "epoch": 2.386283011430824, "grad_norm": 0.341796875, "learning_rate": 2.4367701086656625e-06, "loss": 0.913, "step": 3027 }, { "epoch": 2.387071344107213, "grad_norm": 0.3359375, "learning_rate": 2.430769132290357e-06, "loss": 0.9286, "step": 3028 }, { "epoch": 2.3878596767836027, "grad_norm": 0.34375, "learning_rate": 2.42477453175884e-06, "loss": 0.9167, "step": 3029 }, { "epoch": 2.3886480094599922, "grad_norm": 0.3359375, "learning_rate": 2.4187863121205933e-06, "loss": 0.9033, "step": 3030 }, { "epoch": 2.3894363421363813, "grad_norm": 0.341796875, "learning_rate": 2.4128044784197124e-06, "loss": 0.9436, "step": 3031 }, { "epoch": 2.390224674812771, "grad_norm": 0.359375, "learning_rate": 2.406829035694923e-06, "loss": 0.971, "step": 3032 }, { "epoch": 2.3910130074891605, "grad_norm": 0.353515625, "learning_rate": 2.400859988979555e-06, "loss": 0.9274, "step": 3033 }, { "epoch": 2.39180134016555, "grad_norm": 0.34375, "learning_rate": 2.3948973433015564e-06, "loss": 0.9074, "step": 3034 }, { "epoch": 2.392589672841939, "grad_norm": 0.337890625, "learning_rate": 2.388941103683493e-06, "loss": 0.925, "step": 3035 }, { "epoch": 2.3933780055183287, "grad_norm": 0.34765625, "learning_rate": 2.3829912751425244e-06, "loss": 0.939, "step": 3036 }, { "epoch": 2.3941663381947182, "grad_norm": 0.365234375, "learning_rate": 2.377047862690407e-06, "loss": 0.9248, "step": 3037 }, { "epoch": 2.3949546708711074, "grad_norm": 0.34375, "learning_rate": 2.3711108713334995e-06, "loss": 0.9386, "step": 3038 }, { "epoch": 2.395743003547497, "grad_norm": 0.349609375, "learning_rate": 2.3651803060727484e-06, "loss": 0.9272, "step": 3039 }, { "epoch": 2.3965313362238865, "grad_norm": 0.345703125, "learning_rate": 2.3592561719036956e-06, "loss": 0.9233, "step": 3040 }, { "epoch": 2.397319668900276, "grad_norm": 0.337890625, "learning_rate": 2.353338473816451e-06, "loss": 0.9201, "step": 3041 }, { "epoch": 2.3981080015766656, "grad_norm": 0.33984375, "learning_rate": 2.3474272167957144e-06, "loss": 0.9122, "step": 3042 }, { "epoch": 2.3988963342530547, "grad_norm": 0.341796875, "learning_rate": 2.3415224058207565e-06, "loss": 0.9491, "step": 3043 }, { "epoch": 2.3996846669294443, "grad_norm": 0.35546875, "learning_rate": 2.335624045865419e-06, "loss": 0.9245, "step": 3044 }, { "epoch": 2.400472999605834, "grad_norm": 0.349609375, "learning_rate": 2.3297321418981077e-06, "loss": 0.9554, "step": 3045 }, { "epoch": 2.401261332282223, "grad_norm": 0.341796875, "learning_rate": 2.3238466988817934e-06, "loss": 0.9014, "step": 3046 }, { "epoch": 2.4020496649586125, "grad_norm": 0.33984375, "learning_rate": 2.3179677217740015e-06, "loss": 0.9143, "step": 3047 }, { "epoch": 2.402837997635002, "grad_norm": 0.3515625, "learning_rate": 2.312095215526814e-06, "loss": 0.909, "step": 3048 }, { "epoch": 2.4036263303113916, "grad_norm": 0.34765625, "learning_rate": 2.306229185086859e-06, "loss": 0.9115, "step": 3049 }, { "epoch": 2.4044146629877807, "grad_norm": 0.3828125, "learning_rate": 2.300369635395312e-06, "loss": 0.9457, "step": 3050 }, { "epoch": 2.4052029956641703, "grad_norm": 0.3359375, "learning_rate": 2.29451657138789e-06, "loss": 0.9111, "step": 3051 }, { "epoch": 2.40599132834056, "grad_norm": 0.34375, "learning_rate": 2.2886699979948445e-06, "loss": 0.9052, "step": 3052 }, { "epoch": 2.406779661016949, "grad_norm": 0.353515625, "learning_rate": 2.282829920140962e-06, "loss": 0.8943, "step": 3053 }, { "epoch": 2.4075679936933385, "grad_norm": 0.33984375, "learning_rate": 2.2769963427455555e-06, "loss": 0.9475, "step": 3054 }, { "epoch": 2.408356326369728, "grad_norm": 0.33984375, "learning_rate": 2.271169270722464e-06, "loss": 0.9229, "step": 3055 }, { "epoch": 2.4091446590461176, "grad_norm": 0.34375, "learning_rate": 2.265348708980046e-06, "loss": 0.9212, "step": 3056 }, { "epoch": 2.4099329917225067, "grad_norm": 0.341796875, "learning_rate": 2.259534662421179e-06, "loss": 0.9157, "step": 3057 }, { "epoch": 2.4107213243988963, "grad_norm": 0.33984375, "learning_rate": 2.2537271359432457e-06, "loss": 0.9432, "step": 3058 }, { "epoch": 2.411509657075286, "grad_norm": 0.34375, "learning_rate": 2.247926134438144e-06, "loss": 0.9481, "step": 3059 }, { "epoch": 2.412297989751675, "grad_norm": 0.345703125, "learning_rate": 2.242131662792272e-06, "loss": 0.9326, "step": 3060 }, { "epoch": 2.4130863224280645, "grad_norm": 0.33984375, "learning_rate": 2.2363437258865273e-06, "loss": 0.9078, "step": 3061 }, { "epoch": 2.413874655104454, "grad_norm": 0.34375, "learning_rate": 2.230562328596306e-06, "loss": 0.914, "step": 3062 }, { "epoch": 2.4146629877808437, "grad_norm": 0.453125, "learning_rate": 2.2247874757914865e-06, "loss": 0.9083, "step": 3063 }, { "epoch": 2.4154513204572328, "grad_norm": 0.341796875, "learning_rate": 2.2190191723364495e-06, "loss": 0.9284, "step": 3064 }, { "epoch": 2.4162396531336223, "grad_norm": 0.33203125, "learning_rate": 2.2132574230900484e-06, "loss": 0.9255, "step": 3065 }, { "epoch": 2.417027985810012, "grad_norm": 0.349609375, "learning_rate": 2.2075022329056193e-06, "loss": 0.9225, "step": 3066 }, { "epoch": 2.4178163184864014, "grad_norm": 0.34375, "learning_rate": 2.2017536066309687e-06, "loss": 0.9203, "step": 3067 }, { "epoch": 2.4186046511627906, "grad_norm": 0.3515625, "learning_rate": 2.1960115491083754e-06, "loss": 0.9709, "step": 3068 }, { "epoch": 2.41939298383918, "grad_norm": 0.365234375, "learning_rate": 2.190276065174596e-06, "loss": 0.9293, "step": 3069 }, { "epoch": 2.4201813165155697, "grad_norm": 0.3515625, "learning_rate": 2.1845471596608382e-06, "loss": 0.9087, "step": 3070 }, { "epoch": 2.4209696491919592, "grad_norm": 0.3359375, "learning_rate": 2.178824837392768e-06, "loss": 0.9122, "step": 3071 }, { "epoch": 2.4217579818683483, "grad_norm": 0.33984375, "learning_rate": 2.1731091031905118e-06, "loss": 0.8918, "step": 3072 }, { "epoch": 2.422546314544738, "grad_norm": 0.341796875, "learning_rate": 2.1673999618686403e-06, "loss": 0.9381, "step": 3073 }, { "epoch": 2.4233346472211275, "grad_norm": 0.341796875, "learning_rate": 2.1616974182361826e-06, "loss": 0.9447, "step": 3074 }, { "epoch": 2.4241229798975166, "grad_norm": 0.353515625, "learning_rate": 2.156001477096601e-06, "loss": 0.9745, "step": 3075 }, { "epoch": 2.424911312573906, "grad_norm": 0.353515625, "learning_rate": 2.1503121432477936e-06, "loss": 0.9537, "step": 3076 }, { "epoch": 2.4256996452502957, "grad_norm": 0.33984375, "learning_rate": 2.1446294214820995e-06, "loss": 0.9573, "step": 3077 }, { "epoch": 2.4264879779266852, "grad_norm": 0.349609375, "learning_rate": 2.138953316586283e-06, "loss": 0.9323, "step": 3078 }, { "epoch": 2.4272763106030744, "grad_norm": 0.33984375, "learning_rate": 2.133283833341545e-06, "loss": 0.9374, "step": 3079 }, { "epoch": 2.428064643279464, "grad_norm": 0.33984375, "learning_rate": 2.1276209765234956e-06, "loss": 0.9369, "step": 3080 }, { "epoch": 2.4288529759558535, "grad_norm": 0.341796875, "learning_rate": 2.12196475090217e-06, "loss": 0.919, "step": 3081 }, { "epoch": 2.4296413086322426, "grad_norm": 0.34375, "learning_rate": 2.1163151612420153e-06, "loss": 0.9358, "step": 3082 }, { "epoch": 2.430429641308632, "grad_norm": 0.33984375, "learning_rate": 2.1106722123018965e-06, "loss": 0.9665, "step": 3083 }, { "epoch": 2.4312179739850217, "grad_norm": 0.34765625, "learning_rate": 2.1050359088350724e-06, "loss": 0.9374, "step": 3084 }, { "epoch": 2.4320063066614113, "grad_norm": 0.345703125, "learning_rate": 2.0994062555892123e-06, "loss": 0.9336, "step": 3085 }, { "epoch": 2.4327946393378004, "grad_norm": 0.345703125, "learning_rate": 2.0937832573063823e-06, "loss": 0.9311, "step": 3086 }, { "epoch": 2.43358297201419, "grad_norm": 0.34765625, "learning_rate": 2.0881669187230415e-06, "loss": 0.9413, "step": 3087 }, { "epoch": 2.4343713046905795, "grad_norm": 0.35546875, "learning_rate": 2.0825572445700406e-06, "loss": 0.9084, "step": 3088 }, { "epoch": 2.435159637366969, "grad_norm": 0.375, "learning_rate": 2.076954239572616e-06, "loss": 0.9135, "step": 3089 }, { "epoch": 2.435947970043358, "grad_norm": 0.34765625, "learning_rate": 2.0713579084503877e-06, "loss": 0.9552, "step": 3090 }, { "epoch": 2.4367363027197477, "grad_norm": 0.3359375, "learning_rate": 2.065768255917351e-06, "loss": 0.8995, "step": 3091 }, { "epoch": 2.4375246353961373, "grad_norm": 0.33984375, "learning_rate": 2.0601852866818784e-06, "loss": 0.944, "step": 3092 }, { "epoch": 2.438312968072527, "grad_norm": 0.357421875, "learning_rate": 2.0546090054467118e-06, "loss": 0.9044, "step": 3093 }, { "epoch": 2.439101300748916, "grad_norm": 0.34765625, "learning_rate": 2.04903941690896e-06, "loss": 0.8768, "step": 3094 }, { "epoch": 2.4398896334253055, "grad_norm": 0.34375, "learning_rate": 2.043476525760093e-06, "loss": 0.9785, "step": 3095 }, { "epoch": 2.440677966101695, "grad_norm": 0.3515625, "learning_rate": 2.0379203366859413e-06, "loss": 0.9392, "step": 3096 }, { "epoch": 2.441466298778084, "grad_norm": 0.33984375, "learning_rate": 2.0323708543666888e-06, "loss": 0.9624, "step": 3097 }, { "epoch": 2.4422546314544737, "grad_norm": 0.412109375, "learning_rate": 2.0268280834768695e-06, "loss": 0.9085, "step": 3098 }, { "epoch": 2.4430429641308633, "grad_norm": 0.361328125, "learning_rate": 2.0212920286853656e-06, "loss": 0.8976, "step": 3099 }, { "epoch": 2.443831296807253, "grad_norm": 0.341796875, "learning_rate": 2.0157626946554e-06, "loss": 0.9375, "step": 3100 }, { "epoch": 2.444619629483642, "grad_norm": 0.349609375, "learning_rate": 2.01024008604454e-06, "loss": 0.9377, "step": 3101 }, { "epoch": 2.4454079621600315, "grad_norm": 0.349609375, "learning_rate": 2.004724207504675e-06, "loss": 0.9221, "step": 3102 }, { "epoch": 2.446196294836421, "grad_norm": 0.357421875, "learning_rate": 1.999215063682042e-06, "loss": 0.9284, "step": 3103 }, { "epoch": 2.44698462751281, "grad_norm": 0.36328125, "learning_rate": 1.993712659217194e-06, "loss": 0.957, "step": 3104 }, { "epoch": 2.4477729601891998, "grad_norm": 0.33984375, "learning_rate": 1.988216998745014e-06, "loss": 0.902, "step": 3105 }, { "epoch": 2.4485612928655893, "grad_norm": 0.349609375, "learning_rate": 1.982728086894694e-06, "loss": 0.9646, "step": 3106 }, { "epoch": 2.449349625541979, "grad_norm": 0.33984375, "learning_rate": 1.9772459282897484e-06, "loss": 0.932, "step": 3107 }, { "epoch": 2.450137958218368, "grad_norm": 0.3515625, "learning_rate": 1.971770527548008e-06, "loss": 0.9778, "step": 3108 }, { "epoch": 2.4509262908947576, "grad_norm": 0.337890625, "learning_rate": 1.966301889281607e-06, "loss": 0.9081, "step": 3109 }, { "epoch": 2.451714623571147, "grad_norm": 0.345703125, "learning_rate": 1.9608400180969743e-06, "loss": 0.9154, "step": 3110 }, { "epoch": 2.4525029562475362, "grad_norm": 0.34765625, "learning_rate": 1.9553849185948514e-06, "loss": 0.9355, "step": 3111 }, { "epoch": 2.453291288923926, "grad_norm": 0.337890625, "learning_rate": 1.9499365953702678e-06, "loss": 0.9163, "step": 3112 }, { "epoch": 2.4540796216003153, "grad_norm": 0.34765625, "learning_rate": 1.944495053012555e-06, "loss": 0.9349, "step": 3113 }, { "epoch": 2.454867954276705, "grad_norm": 0.34375, "learning_rate": 1.9390602961053194e-06, "loss": 0.91, "step": 3114 }, { "epoch": 2.455656286953094, "grad_norm": 0.341796875, "learning_rate": 1.933632329226459e-06, "loss": 0.9348, "step": 3115 }, { "epoch": 2.4564446196294836, "grad_norm": 0.34375, "learning_rate": 1.928211156948151e-06, "loss": 0.9437, "step": 3116 }, { "epoch": 2.457232952305873, "grad_norm": 0.337890625, "learning_rate": 1.9227967838368566e-06, "loss": 0.9392, "step": 3117 }, { "epoch": 2.4580212849822627, "grad_norm": 0.33984375, "learning_rate": 1.9173892144532957e-06, "loss": 0.9418, "step": 3118 }, { "epoch": 2.458809617658652, "grad_norm": 0.349609375, "learning_rate": 1.911988453352467e-06, "loss": 0.8895, "step": 3119 }, { "epoch": 2.4595979503350414, "grad_norm": 0.34375, "learning_rate": 1.9065945050836299e-06, "loss": 0.947, "step": 3120 }, { "epoch": 2.460386283011431, "grad_norm": 0.333984375, "learning_rate": 1.9012073741903069e-06, "loss": 0.9132, "step": 3121 }, { "epoch": 2.4611746156878205, "grad_norm": 0.357421875, "learning_rate": 1.8958270652102862e-06, "loss": 0.9634, "step": 3122 }, { "epoch": 2.4619629483642096, "grad_norm": 0.341796875, "learning_rate": 1.890453582675591e-06, "loss": 0.9298, "step": 3123 }, { "epoch": 2.462751281040599, "grad_norm": 0.341796875, "learning_rate": 1.8850869311125098e-06, "loss": 0.9119, "step": 3124 }, { "epoch": 2.4635396137169887, "grad_norm": 0.34765625, "learning_rate": 1.8797271150415709e-06, "loss": 0.9467, "step": 3125 }, { "epoch": 2.464327946393378, "grad_norm": 0.34375, "learning_rate": 1.8743741389775472e-06, "loss": 0.9133, "step": 3126 }, { "epoch": 2.4651162790697674, "grad_norm": 0.361328125, "learning_rate": 1.8690280074294475e-06, "loss": 0.9103, "step": 3127 }, { "epoch": 2.465904611746157, "grad_norm": 0.341796875, "learning_rate": 1.8636887249005176e-06, "loss": 0.9724, "step": 3128 }, { "epoch": 2.4666929444225465, "grad_norm": 0.341796875, "learning_rate": 1.8583562958882329e-06, "loss": 0.9251, "step": 3129 }, { "epoch": 2.4674812770989356, "grad_norm": 0.33984375, "learning_rate": 1.853030724884297e-06, "loss": 0.9253, "step": 3130 }, { "epoch": 2.468269609775325, "grad_norm": 0.3984375, "learning_rate": 1.8477120163746343e-06, "loss": 0.9207, "step": 3131 }, { "epoch": 2.4690579424517147, "grad_norm": 0.337890625, "learning_rate": 1.8424001748393905e-06, "loss": 0.9196, "step": 3132 }, { "epoch": 2.469846275128104, "grad_norm": 0.345703125, "learning_rate": 1.8370952047529267e-06, "loss": 0.9506, "step": 3133 }, { "epoch": 2.4706346078044934, "grad_norm": 0.341796875, "learning_rate": 1.8317971105838173e-06, "loss": 0.9033, "step": 3134 }, { "epoch": 2.471422940480883, "grad_norm": 0.33984375, "learning_rate": 1.8265058967948434e-06, "loss": 0.9479, "step": 3135 }, { "epoch": 2.4722112731572725, "grad_norm": 0.875, "learning_rate": 1.8212215678429856e-06, "loss": 0.967, "step": 3136 }, { "epoch": 2.4729996058336616, "grad_norm": 0.33984375, "learning_rate": 1.8159441281794355e-06, "loss": 0.8991, "step": 3137 }, { "epoch": 2.473787938510051, "grad_norm": 0.349609375, "learning_rate": 1.8106735822495746e-06, "loss": 0.9433, "step": 3138 }, { "epoch": 2.4745762711864407, "grad_norm": 0.349609375, "learning_rate": 1.8054099344929833e-06, "loss": 0.9536, "step": 3139 }, { "epoch": 2.4753646038628303, "grad_norm": 0.373046875, "learning_rate": 1.8001531893434188e-06, "loss": 0.9299, "step": 3140 }, { "epoch": 2.4761529365392194, "grad_norm": 0.33984375, "learning_rate": 1.794903351228835e-06, "loss": 0.9285, "step": 3141 }, { "epoch": 2.476941269215609, "grad_norm": 0.33984375, "learning_rate": 1.7896604245713688e-06, "loss": 0.9481, "step": 3142 }, { "epoch": 2.4777296018919985, "grad_norm": 0.34765625, "learning_rate": 1.7844244137873302e-06, "loss": 0.9476, "step": 3143 }, { "epoch": 2.478517934568388, "grad_norm": 0.3359375, "learning_rate": 1.7791953232872083e-06, "loss": 0.9122, "step": 3144 }, { "epoch": 2.479306267244777, "grad_norm": 0.349609375, "learning_rate": 1.7739731574756524e-06, "loss": 0.9389, "step": 3145 }, { "epoch": 2.4800945999211668, "grad_norm": 0.349609375, "learning_rate": 1.7687579207514893e-06, "loss": 0.9186, "step": 3146 }, { "epoch": 2.4808829325975563, "grad_norm": 0.34765625, "learning_rate": 1.7635496175077082e-06, "loss": 0.9317, "step": 3147 }, { "epoch": 2.4816712652739454, "grad_norm": 0.34375, "learning_rate": 1.7583482521314598e-06, "loss": 0.9348, "step": 3148 }, { "epoch": 2.482459597950335, "grad_norm": 0.33984375, "learning_rate": 1.7531538290040384e-06, "loss": 0.9116, "step": 3149 }, { "epoch": 2.4832479306267246, "grad_norm": 0.345703125, "learning_rate": 1.747966352500904e-06, "loss": 0.9227, "step": 3150 }, { "epoch": 2.484036263303114, "grad_norm": 0.349609375, "learning_rate": 1.7427858269916565e-06, "loss": 0.9362, "step": 3151 }, { "epoch": 2.4848245959795032, "grad_norm": 0.34375, "learning_rate": 1.7376122568400533e-06, "loss": 0.9187, "step": 3152 }, { "epoch": 2.485612928655893, "grad_norm": 0.34375, "learning_rate": 1.7324456464039751e-06, "loss": 0.926, "step": 3153 }, { "epoch": 2.4864012613322823, "grad_norm": 0.341796875, "learning_rate": 1.727286000035454e-06, "loss": 0.9538, "step": 3154 }, { "epoch": 2.4871895940086715, "grad_norm": 0.330078125, "learning_rate": 1.722133322080648e-06, "loss": 0.9013, "step": 3155 }, { "epoch": 2.487977926685061, "grad_norm": 0.34375, "learning_rate": 1.7169876168798561e-06, "loss": 0.9259, "step": 3156 }, { "epoch": 2.4887662593614506, "grad_norm": 0.357421875, "learning_rate": 1.7118488887674889e-06, "loss": 0.913, "step": 3157 }, { "epoch": 2.48955459203784, "grad_norm": 0.34375, "learning_rate": 1.7067171420720908e-06, "loss": 0.8996, "step": 3158 }, { "epoch": 2.4903429247142292, "grad_norm": 0.349609375, "learning_rate": 1.7015923811163225e-06, "loss": 0.9528, "step": 3159 }, { "epoch": 2.491131257390619, "grad_norm": 0.3515625, "learning_rate": 1.6964746102169582e-06, "loss": 0.9558, "step": 3160 }, { "epoch": 2.4919195900670084, "grad_norm": 0.3515625, "learning_rate": 1.6913638336848892e-06, "loss": 0.9279, "step": 3161 }, { "epoch": 2.4927079227433975, "grad_norm": 0.345703125, "learning_rate": 1.6862600558251097e-06, "loss": 0.9223, "step": 3162 }, { "epoch": 2.493496255419787, "grad_norm": 0.349609375, "learning_rate": 1.6811632809367207e-06, "loss": 0.9175, "step": 3163 }, { "epoch": 2.4942845880961766, "grad_norm": 0.33984375, "learning_rate": 1.6760735133129269e-06, "loss": 0.9141, "step": 3164 }, { "epoch": 2.495072920772566, "grad_norm": 0.341796875, "learning_rate": 1.6709907572410266e-06, "loss": 0.9317, "step": 3165 }, { "epoch": 2.4958612534489557, "grad_norm": 0.337890625, "learning_rate": 1.665915017002414e-06, "loss": 0.9406, "step": 3166 }, { "epoch": 2.496649586125345, "grad_norm": 0.34765625, "learning_rate": 1.6608462968725736e-06, "loss": 0.9617, "step": 3167 }, { "epoch": 2.4974379188017344, "grad_norm": 0.337890625, "learning_rate": 1.6557846011210753e-06, "loss": 0.9107, "step": 3168 }, { "epoch": 2.498226251478124, "grad_norm": 0.337890625, "learning_rate": 1.6507299340115746e-06, "loss": 0.9439, "step": 3169 }, { "epoch": 2.499014584154513, "grad_norm": 0.337890625, "learning_rate": 1.645682299801804e-06, "loss": 0.9455, "step": 3170 }, { "epoch": 2.4998029168309026, "grad_norm": 0.396484375, "learning_rate": 1.6406417027435728e-06, "loss": 0.9039, "step": 3171 }, { "epoch": 2.500591249507292, "grad_norm": 0.33203125, "learning_rate": 1.6356081470827635e-06, "loss": 0.9101, "step": 3172 }, { "epoch": 2.5013795821836817, "grad_norm": 0.345703125, "learning_rate": 1.6305816370593263e-06, "loss": 0.9156, "step": 3173 }, { "epoch": 2.502167914860071, "grad_norm": 0.33984375, "learning_rate": 1.6255621769072805e-06, "loss": 0.9493, "step": 3174 }, { "epoch": 2.5029562475364604, "grad_norm": 0.34375, "learning_rate": 1.6205497708546936e-06, "loss": 0.9099, "step": 3175 }, { "epoch": 2.50374458021285, "grad_norm": 0.345703125, "learning_rate": 1.6155444231237106e-06, "loss": 0.9231, "step": 3176 }, { "epoch": 2.504532912889239, "grad_norm": 0.345703125, "learning_rate": 1.6105461379305187e-06, "loss": 0.9138, "step": 3177 }, { "epoch": 2.5053212455656286, "grad_norm": 0.34375, "learning_rate": 1.6055549194853604e-06, "loss": 0.9427, "step": 3178 }, { "epoch": 2.506109578242018, "grad_norm": 0.341796875, "learning_rate": 1.600570771992519e-06, "loss": 0.9632, "step": 3179 }, { "epoch": 2.5068979109184077, "grad_norm": 0.349609375, "learning_rate": 1.5955936996503285e-06, "loss": 0.9217, "step": 3180 }, { "epoch": 2.507686243594797, "grad_norm": 0.345703125, "learning_rate": 1.5906237066511643e-06, "loss": 0.9262, "step": 3181 }, { "epoch": 2.5084745762711864, "grad_norm": 0.34375, "learning_rate": 1.5856607971814375e-06, "loss": 0.9642, "step": 3182 }, { "epoch": 2.509262908947576, "grad_norm": 0.333984375, "learning_rate": 1.580704975421584e-06, "loss": 0.9373, "step": 3183 }, { "epoch": 2.510051241623965, "grad_norm": 0.34375, "learning_rate": 1.5757562455460807e-06, "loss": 0.9125, "step": 3184 }, { "epoch": 2.5108395743003546, "grad_norm": 0.337890625, "learning_rate": 1.5708146117234225e-06, "loss": 0.9304, "step": 3185 }, { "epoch": 2.511627906976744, "grad_norm": 0.337890625, "learning_rate": 1.5658800781161365e-06, "loss": 0.9134, "step": 3186 }, { "epoch": 2.5124162396531338, "grad_norm": 0.345703125, "learning_rate": 1.5609526488807613e-06, "loss": 0.9134, "step": 3187 }, { "epoch": 2.5132045723295233, "grad_norm": 0.345703125, "learning_rate": 1.5560323281678514e-06, "loss": 0.9634, "step": 3188 }, { "epoch": 2.5139929050059124, "grad_norm": 0.337890625, "learning_rate": 1.5511191201219733e-06, "loss": 0.9417, "step": 3189 }, { "epoch": 2.514781237682302, "grad_norm": 0.345703125, "learning_rate": 1.546213028881709e-06, "loss": 0.9329, "step": 3190 }, { "epoch": 2.515569570358691, "grad_norm": 0.33984375, "learning_rate": 1.5413140585796426e-06, "loss": 0.9523, "step": 3191 }, { "epoch": 2.5163579030350807, "grad_norm": 0.341796875, "learning_rate": 1.5364222133423523e-06, "loss": 0.936, "step": 3192 }, { "epoch": 2.5171462357114702, "grad_norm": 0.333984375, "learning_rate": 1.531537497290424e-06, "loss": 0.9071, "step": 3193 }, { "epoch": 2.51793456838786, "grad_norm": 0.345703125, "learning_rate": 1.526659914538432e-06, "loss": 0.9322, "step": 3194 }, { "epoch": 2.5187229010642493, "grad_norm": 0.34375, "learning_rate": 1.521789469194952e-06, "loss": 0.9482, "step": 3195 }, { "epoch": 2.5195112337406385, "grad_norm": 0.341796875, "learning_rate": 1.5169261653625345e-06, "loss": 0.9367, "step": 3196 }, { "epoch": 2.520299566417028, "grad_norm": 0.3359375, "learning_rate": 1.5120700071377215e-06, "loss": 0.9261, "step": 3197 }, { "epoch": 2.5210878990934176, "grad_norm": 0.34375, "learning_rate": 1.5072209986110376e-06, "loss": 0.9147, "step": 3198 }, { "epoch": 2.5218762317698067, "grad_norm": 0.349609375, "learning_rate": 1.50237914386698e-06, "loss": 0.9245, "step": 3199 }, { "epoch": 2.5226645644461962, "grad_norm": 0.337890625, "learning_rate": 1.497544446984024e-06, "loss": 0.9219, "step": 3200 }, { "epoch": 2.523452897122586, "grad_norm": 0.34375, "learning_rate": 1.492716912034614e-06, "loss": 0.9188, "step": 3201 }, { "epoch": 2.5242412297989754, "grad_norm": 0.34765625, "learning_rate": 1.4878965430851612e-06, "loss": 0.9425, "step": 3202 }, { "epoch": 2.5250295624753645, "grad_norm": 0.34375, "learning_rate": 1.4830833441960402e-06, "loss": 0.9033, "step": 3203 }, { "epoch": 2.525817895151754, "grad_norm": 0.33984375, "learning_rate": 1.4782773194215883e-06, "loss": 0.9137, "step": 3204 }, { "epoch": 2.5266062278281436, "grad_norm": 0.34375, "learning_rate": 1.473478472810097e-06, "loss": 0.9061, "step": 3205 }, { "epoch": 2.5273945605045327, "grad_norm": 0.35546875, "learning_rate": 1.468686808403814e-06, "loss": 0.9411, "step": 3206 }, { "epoch": 2.5281828931809223, "grad_norm": 0.353515625, "learning_rate": 1.4639023302389366e-06, "loss": 0.9786, "step": 3207 }, { "epoch": 2.528971225857312, "grad_norm": 0.349609375, "learning_rate": 1.4591250423456048e-06, "loss": 0.9637, "step": 3208 }, { "epoch": 2.5297595585337014, "grad_norm": 0.33984375, "learning_rate": 1.4543549487479092e-06, "loss": 0.9451, "step": 3209 }, { "epoch": 2.5305478912100905, "grad_norm": 0.345703125, "learning_rate": 1.4495920534638741e-06, "loss": 0.9391, "step": 3210 }, { "epoch": 2.53133622388648, "grad_norm": 0.3515625, "learning_rate": 1.4448363605054638e-06, "loss": 0.9376, "step": 3211 }, { "epoch": 2.5321245565628696, "grad_norm": 0.345703125, "learning_rate": 1.440087873878574e-06, "loss": 0.9184, "step": 3212 }, { "epoch": 2.5329128892392587, "grad_norm": 0.345703125, "learning_rate": 1.435346597583034e-06, "loss": 0.926, "step": 3213 }, { "epoch": 2.5337012219156483, "grad_norm": 0.34375, "learning_rate": 1.4306125356125899e-06, "loss": 0.9326, "step": 3214 }, { "epoch": 2.534489554592038, "grad_norm": 0.33203125, "learning_rate": 1.4258856919549236e-06, "loss": 0.9204, "step": 3215 }, { "epoch": 2.5352778872684274, "grad_norm": 0.349609375, "learning_rate": 1.4211660705916286e-06, "loss": 0.9257, "step": 3216 }, { "epoch": 2.536066219944817, "grad_norm": 0.35546875, "learning_rate": 1.4164536754982206e-06, "loss": 0.9758, "step": 3217 }, { "epoch": 2.536854552621206, "grad_norm": 0.33984375, "learning_rate": 1.4117485106441188e-06, "loss": 0.9309, "step": 3218 }, { "epoch": 2.5376428852975956, "grad_norm": 0.33984375, "learning_rate": 1.407050579992658e-06, "loss": 0.9231, "step": 3219 }, { "epoch": 2.538431217973985, "grad_norm": 0.333984375, "learning_rate": 1.4023598875010846e-06, "loss": 0.9199, "step": 3220 }, { "epoch": 2.5392195506503743, "grad_norm": 0.34765625, "learning_rate": 1.397676437120542e-06, "loss": 0.9586, "step": 3221 }, { "epoch": 2.540007883326764, "grad_norm": 0.38671875, "learning_rate": 1.3930002327960702e-06, "loss": 0.9317, "step": 3222 }, { "epoch": 2.5407962160031534, "grad_norm": 0.3515625, "learning_rate": 1.3883312784666091e-06, "loss": 0.9405, "step": 3223 }, { "epoch": 2.541584548679543, "grad_norm": 0.34375, "learning_rate": 1.3836695780649979e-06, "loss": 0.9494, "step": 3224 }, { "epoch": 2.542372881355932, "grad_norm": 0.33984375, "learning_rate": 1.3790151355179581e-06, "loss": 0.9501, "step": 3225 }, { "epoch": 2.5431612140323216, "grad_norm": 0.33984375, "learning_rate": 1.3743679547460943e-06, "loss": 0.9445, "step": 3226 }, { "epoch": 2.543949546708711, "grad_norm": 0.33984375, "learning_rate": 1.3697280396639035e-06, "loss": 0.9293, "step": 3227 }, { "epoch": 2.5447378793851003, "grad_norm": 0.337890625, "learning_rate": 1.365095394179754e-06, "loss": 0.9134, "step": 3228 }, { "epoch": 2.54552621206149, "grad_norm": 0.3359375, "learning_rate": 1.3604700221959022e-06, "loss": 0.9249, "step": 3229 }, { "epoch": 2.5463145447378794, "grad_norm": 0.34765625, "learning_rate": 1.3558519276084636e-06, "loss": 0.9592, "step": 3230 }, { "epoch": 2.547102877414269, "grad_norm": 0.341796875, "learning_rate": 1.3512411143074333e-06, "loss": 0.9399, "step": 3231 }, { "epoch": 2.547891210090658, "grad_norm": 0.341796875, "learning_rate": 1.34663758617667e-06, "loss": 0.9049, "step": 3232 }, { "epoch": 2.5486795427670477, "grad_norm": 0.34375, "learning_rate": 1.3420413470938942e-06, "loss": 0.9243, "step": 3233 }, { "epoch": 2.5494678754434372, "grad_norm": 0.33984375, "learning_rate": 1.3374524009306944e-06, "loss": 0.94, "step": 3234 }, { "epoch": 2.5502562081198263, "grad_norm": 0.341796875, "learning_rate": 1.332870751552503e-06, "loss": 0.968, "step": 3235 }, { "epoch": 2.551044540796216, "grad_norm": 0.341796875, "learning_rate": 1.3282964028186175e-06, "loss": 0.9286, "step": 3236 }, { "epoch": 2.5518328734726055, "grad_norm": 0.3359375, "learning_rate": 1.3237293585821786e-06, "loss": 0.9453, "step": 3237 }, { "epoch": 2.552621206148995, "grad_norm": 0.341796875, "learning_rate": 1.3191696226901795e-06, "loss": 0.9333, "step": 3238 }, { "epoch": 2.5534095388253846, "grad_norm": 0.341796875, "learning_rate": 1.3146171989834544e-06, "loss": 0.9059, "step": 3239 }, { "epoch": 2.5541978715017737, "grad_norm": 0.341796875, "learning_rate": 1.310072091296677e-06, "loss": 0.9597, "step": 3240 }, { "epoch": 2.5549862041781632, "grad_norm": 0.337890625, "learning_rate": 1.3055343034583611e-06, "loss": 0.8902, "step": 3241 }, { "epoch": 2.5557745368545524, "grad_norm": 0.33984375, "learning_rate": 1.301003839290853e-06, "loss": 0.9194, "step": 3242 }, { "epoch": 2.556562869530942, "grad_norm": 0.34375, "learning_rate": 1.296480702610332e-06, "loss": 0.9293, "step": 3243 }, { "epoch": 2.5573512022073315, "grad_norm": 0.3515625, "learning_rate": 1.291964897226803e-06, "loss": 0.9375, "step": 3244 }, { "epoch": 2.558139534883721, "grad_norm": 0.34375, "learning_rate": 1.2874564269440958e-06, "loss": 0.8886, "step": 3245 }, { "epoch": 2.5589278675601106, "grad_norm": 0.34375, "learning_rate": 1.2829552955598623e-06, "loss": 0.9309, "step": 3246 }, { "epoch": 2.5597162002364997, "grad_norm": 0.34375, "learning_rate": 1.2784615068655747e-06, "loss": 0.945, "step": 3247 }, { "epoch": 2.5605045329128893, "grad_norm": 0.34375, "learning_rate": 1.273975064646512e-06, "loss": 0.9173, "step": 3248 }, { "epoch": 2.561292865589279, "grad_norm": 0.341796875, "learning_rate": 1.269495972681777e-06, "loss": 0.9363, "step": 3249 }, { "epoch": 2.562081198265668, "grad_norm": 0.34375, "learning_rate": 1.265024234744271e-06, "loss": 0.9324, "step": 3250 }, { "epoch": 2.5628695309420575, "grad_norm": 0.345703125, "learning_rate": 1.260559854600709e-06, "loss": 0.9421, "step": 3251 }, { "epoch": 2.563657863618447, "grad_norm": 0.37109375, "learning_rate": 1.2561028360116002e-06, "loss": 0.9252, "step": 3252 }, { "epoch": 2.5644461962948366, "grad_norm": 0.33984375, "learning_rate": 1.251653182731254e-06, "loss": 0.9479, "step": 3253 }, { "epoch": 2.5652345289712257, "grad_norm": 0.3359375, "learning_rate": 1.2472108985077836e-06, "loss": 0.8732, "step": 3254 }, { "epoch": 2.5660228616476153, "grad_norm": 0.3515625, "learning_rate": 1.242775987083088e-06, "loss": 0.9625, "step": 3255 }, { "epoch": 2.566811194324005, "grad_norm": 0.337890625, "learning_rate": 1.2383484521928602e-06, "loss": 0.9486, "step": 3256 }, { "epoch": 2.567599527000394, "grad_norm": 0.341796875, "learning_rate": 1.2339282975665712e-06, "loss": 0.9302, "step": 3257 }, { "epoch": 2.5683878596767835, "grad_norm": 0.35546875, "learning_rate": 1.2295155269274827e-06, "loss": 0.9411, "step": 3258 }, { "epoch": 2.569176192353173, "grad_norm": 0.341796875, "learning_rate": 1.2251101439926383e-06, "loss": 0.919, "step": 3259 }, { "epoch": 2.5699645250295626, "grad_norm": 0.34765625, "learning_rate": 1.220712152472856e-06, "loss": 0.9454, "step": 3260 }, { "epoch": 2.570752857705952, "grad_norm": 0.341796875, "learning_rate": 1.2163215560727215e-06, "loss": 0.9426, "step": 3261 }, { "epoch": 2.5715411903823413, "grad_norm": 0.34765625, "learning_rate": 1.2119383584905985e-06, "loss": 0.9541, "step": 3262 }, { "epoch": 2.572329523058731, "grad_norm": 0.3359375, "learning_rate": 1.2075625634186205e-06, "loss": 0.931, "step": 3263 }, { "epoch": 2.57311785573512, "grad_norm": 0.34375, "learning_rate": 1.2031941745426824e-06, "loss": 0.95, "step": 3264 }, { "epoch": 2.5739061884115095, "grad_norm": 0.3515625, "learning_rate": 1.198833195542435e-06, "loss": 0.9483, "step": 3265 }, { "epoch": 2.574694521087899, "grad_norm": 0.345703125, "learning_rate": 1.1944796300912942e-06, "loss": 0.9167, "step": 3266 }, { "epoch": 2.5754828537642886, "grad_norm": 0.34375, "learning_rate": 1.1901334818564291e-06, "loss": 0.9188, "step": 3267 }, { "epoch": 2.576271186440678, "grad_norm": 0.345703125, "learning_rate": 1.1857947544987668e-06, "loss": 0.9752, "step": 3268 }, { "epoch": 2.5770595191170673, "grad_norm": 0.34375, "learning_rate": 1.1814634516729729e-06, "loss": 0.9305, "step": 3269 }, { "epoch": 2.577847851793457, "grad_norm": 0.33984375, "learning_rate": 1.1771395770274653e-06, "loss": 0.941, "step": 3270 }, { "epoch": 2.5786361844698464, "grad_norm": 0.345703125, "learning_rate": 1.172823134204405e-06, "loss": 0.9344, "step": 3271 }, { "epoch": 2.5794245171462356, "grad_norm": 0.3515625, "learning_rate": 1.1685141268396906e-06, "loss": 0.9523, "step": 3272 }, { "epoch": 2.580212849822625, "grad_norm": 0.34375, "learning_rate": 1.1642125585629593e-06, "loss": 0.9097, "step": 3273 }, { "epoch": 2.5810011824990147, "grad_norm": 0.33984375, "learning_rate": 1.159918432997581e-06, "loss": 0.9471, "step": 3274 }, { "epoch": 2.5817895151754042, "grad_norm": 0.3359375, "learning_rate": 1.1556317537606588e-06, "loss": 0.9099, "step": 3275 }, { "epoch": 2.5825778478517933, "grad_norm": 0.34375, "learning_rate": 1.1513525244630198e-06, "loss": 0.9109, "step": 3276 }, { "epoch": 2.583366180528183, "grad_norm": 0.337890625, "learning_rate": 1.1470807487092173e-06, "loss": 0.9407, "step": 3277 }, { "epoch": 2.5841545132045725, "grad_norm": 0.34765625, "learning_rate": 1.1428164300975276e-06, "loss": 0.9584, "step": 3278 }, { "epoch": 2.5849428458809616, "grad_norm": 0.353515625, "learning_rate": 1.1385595722199438e-06, "loss": 0.9192, "step": 3279 }, { "epoch": 2.585731178557351, "grad_norm": 0.3515625, "learning_rate": 1.1343101786621747e-06, "loss": 0.9708, "step": 3280 }, { "epoch": 2.5865195112337407, "grad_norm": 0.34765625, "learning_rate": 1.1300682530036432e-06, "loss": 0.93, "step": 3281 }, { "epoch": 2.5873078439101302, "grad_norm": 0.33984375, "learning_rate": 1.1258337988174794e-06, "loss": 0.9445, "step": 3282 }, { "epoch": 2.5880961765865194, "grad_norm": 0.345703125, "learning_rate": 1.121606819670521e-06, "loss": 0.9399, "step": 3283 }, { "epoch": 2.588884509262909, "grad_norm": 0.33984375, "learning_rate": 1.1173873191233097e-06, "loss": 0.9366, "step": 3284 }, { "epoch": 2.5896728419392985, "grad_norm": 0.34375, "learning_rate": 1.1131753007300884e-06, "loss": 0.9303, "step": 3285 }, { "epoch": 2.5904611746156876, "grad_norm": 0.349609375, "learning_rate": 1.1089707680387962e-06, "loss": 0.9553, "step": 3286 }, { "epoch": 2.591249507292077, "grad_norm": 0.345703125, "learning_rate": 1.1047737245910617e-06, "loss": 0.9458, "step": 3287 }, { "epoch": 2.5920378399684667, "grad_norm": 0.341796875, "learning_rate": 1.1005841739222166e-06, "loss": 0.9011, "step": 3288 }, { "epoch": 2.5928261726448563, "grad_norm": 0.345703125, "learning_rate": 1.0964021195612728e-06, "loss": 0.9006, "step": 3289 }, { "epoch": 2.593614505321246, "grad_norm": 0.341796875, "learning_rate": 1.0922275650309322e-06, "loss": 0.9701, "step": 3290 }, { "epoch": 2.594402837997635, "grad_norm": 0.3359375, "learning_rate": 1.0880605138475708e-06, "loss": 0.8958, "step": 3291 }, { "epoch": 2.5951911706740245, "grad_norm": 0.3515625, "learning_rate": 1.0839009695212521e-06, "loss": 0.9709, "step": 3292 }, { "epoch": 2.5959795033504136, "grad_norm": 0.341796875, "learning_rate": 1.079748935555719e-06, "loss": 0.9041, "step": 3293 }, { "epoch": 2.596767836026803, "grad_norm": 0.33984375, "learning_rate": 1.0756044154483813e-06, "loss": 0.9268, "step": 3294 }, { "epoch": 2.5975561687031927, "grad_norm": 0.345703125, "learning_rate": 1.0714674126903202e-06, "loss": 0.9151, "step": 3295 }, { "epoch": 2.5983445013795823, "grad_norm": 0.341796875, "learning_rate": 1.0673379307662856e-06, "loss": 0.8987, "step": 3296 }, { "epoch": 2.599132834055972, "grad_norm": 0.349609375, "learning_rate": 1.0632159731546965e-06, "loss": 0.9321, "step": 3297 }, { "epoch": 2.599921166732361, "grad_norm": 0.333984375, "learning_rate": 1.0591015433276308e-06, "loss": 0.9461, "step": 3298 }, { "epoch": 2.6007094994087505, "grad_norm": 0.3515625, "learning_rate": 1.054994644750824e-06, "loss": 0.9374, "step": 3299 }, { "epoch": 2.60149783208514, "grad_norm": 0.337890625, "learning_rate": 1.0508952808836682e-06, "loss": 0.9558, "step": 3300 }, { "epoch": 2.602286164761529, "grad_norm": 0.341796875, "learning_rate": 1.0468034551792083e-06, "loss": 0.9398, "step": 3301 }, { "epoch": 2.6030744974379187, "grad_norm": 0.345703125, "learning_rate": 1.0427191710841444e-06, "loss": 0.9089, "step": 3302 }, { "epoch": 2.6038628301143083, "grad_norm": 0.34375, "learning_rate": 1.038642432038821e-06, "loss": 0.9667, "step": 3303 }, { "epoch": 2.604651162790698, "grad_norm": 0.341796875, "learning_rate": 1.0345732414772224e-06, "loss": 0.9312, "step": 3304 }, { "epoch": 2.605439495467087, "grad_norm": 0.3515625, "learning_rate": 1.0305116028269812e-06, "loss": 0.9407, "step": 3305 }, { "epoch": 2.6062278281434765, "grad_norm": 0.349609375, "learning_rate": 1.0264575195093628e-06, "loss": 0.9171, "step": 3306 }, { "epoch": 2.607016160819866, "grad_norm": 0.345703125, "learning_rate": 1.022410994939279e-06, "loss": 0.8765, "step": 3307 }, { "epoch": 2.607804493496255, "grad_norm": 0.341796875, "learning_rate": 1.018372032525261e-06, "loss": 0.9433, "step": 3308 }, { "epoch": 2.6085928261726448, "grad_norm": 0.3515625, "learning_rate": 1.0143406356694797e-06, "loss": 0.8914, "step": 3309 }, { "epoch": 2.6093811588490343, "grad_norm": 0.33984375, "learning_rate": 1.0103168077677284e-06, "loss": 0.9413, "step": 3310 }, { "epoch": 2.610169491525424, "grad_norm": 0.3359375, "learning_rate": 1.006300552209427e-06, "loss": 0.915, "step": 3311 }, { "epoch": 2.6109578242018134, "grad_norm": 0.333984375, "learning_rate": 1.0022918723776175e-06, "loss": 0.9399, "step": 3312 }, { "epoch": 2.6117461568782026, "grad_norm": 0.33984375, "learning_rate": 9.982907716489587e-07, "loss": 0.9273, "step": 3313 }, { "epoch": 2.612534489554592, "grad_norm": 0.34375, "learning_rate": 9.942972533937268e-07, "loss": 0.924, "step": 3314 }, { "epoch": 2.6133228222309812, "grad_norm": 0.341796875, "learning_rate": 9.903113209758098e-07, "loss": 0.9133, "step": 3315 }, { "epoch": 2.614111154907371, "grad_norm": 0.34375, "learning_rate": 9.863329777527053e-07, "loss": 0.9528, "step": 3316 }, { "epoch": 2.6148994875837603, "grad_norm": 0.33984375, "learning_rate": 9.823622270755206e-07, "loss": 0.8871, "step": 3317 }, { "epoch": 2.61568782026015, "grad_norm": 0.34375, "learning_rate": 9.783990722889658e-07, "loss": 0.9399, "step": 3318 }, { "epoch": 2.6164761529365395, "grad_norm": 0.34375, "learning_rate": 9.744435167313537e-07, "loss": 0.9387, "step": 3319 }, { "epoch": 2.6172644856129286, "grad_norm": 0.337890625, "learning_rate": 9.704955637345948e-07, "loss": 0.9286, "step": 3320 }, { "epoch": 2.618052818289318, "grad_norm": 0.345703125, "learning_rate": 9.665552166241965e-07, "loss": 0.9415, "step": 3321 }, { "epoch": 2.6188411509657077, "grad_norm": 0.345703125, "learning_rate": 9.626224787192594e-07, "loss": 0.9659, "step": 3322 }, { "epoch": 2.619629483642097, "grad_norm": 0.349609375, "learning_rate": 9.586973533324738e-07, "loss": 0.926, "step": 3323 }, { "epoch": 2.6204178163184864, "grad_norm": 0.3515625, "learning_rate": 9.547798437701194e-07, "loss": 0.9502, "step": 3324 }, { "epoch": 2.621206148994876, "grad_norm": 0.33984375, "learning_rate": 9.508699533320598e-07, "loss": 0.9466, "step": 3325 }, { "epoch": 2.6219944816712655, "grad_norm": 0.3359375, "learning_rate": 9.469676853117371e-07, "loss": 0.9272, "step": 3326 }, { "epoch": 2.6227828143476546, "grad_norm": 0.341796875, "learning_rate": 9.43073042996181e-07, "loss": 0.9095, "step": 3327 }, { "epoch": 2.623571147024044, "grad_norm": 0.33984375, "learning_rate": 9.391860296659916e-07, "loss": 0.9316, "step": 3328 }, { "epoch": 2.6243594797004337, "grad_norm": 0.33984375, "learning_rate": 9.353066485953455e-07, "loss": 0.9302, "step": 3329 }, { "epoch": 2.625147812376823, "grad_norm": 0.3359375, "learning_rate": 9.314349030519843e-07, "loss": 0.9506, "step": 3330 }, { "epoch": 2.6259361450532124, "grad_norm": 0.341796875, "learning_rate": 9.275707962972281e-07, "loss": 0.9229, "step": 3331 }, { "epoch": 2.626724477729602, "grad_norm": 0.34375, "learning_rate": 9.237143315859553e-07, "loss": 0.9373, "step": 3332 }, { "epoch": 2.6275128104059915, "grad_norm": 0.345703125, "learning_rate": 9.198655121666111e-07, "loss": 0.9765, "step": 3333 }, { "epoch": 2.6283011430823806, "grad_norm": 0.341796875, "learning_rate": 9.160243412811953e-07, "loss": 0.9288, "step": 3334 }, { "epoch": 2.62908947575877, "grad_norm": 0.34765625, "learning_rate": 9.121908221652675e-07, "loss": 0.9296, "step": 3335 }, { "epoch": 2.6298778084351597, "grad_norm": 0.337890625, "learning_rate": 9.083649580479493e-07, "loss": 0.9083, "step": 3336 }, { "epoch": 2.630666141111549, "grad_norm": 0.341796875, "learning_rate": 9.045467521519047e-07, "loss": 0.9253, "step": 3337 }, { "epoch": 2.6314544737879384, "grad_norm": 0.69140625, "learning_rate": 9.00736207693349e-07, "loss": 0.9636, "step": 3338 }, { "epoch": 2.632242806464328, "grad_norm": 0.359375, "learning_rate": 8.969333278820447e-07, "loss": 0.9482, "step": 3339 }, { "epoch": 2.6330311391407175, "grad_norm": 0.34765625, "learning_rate": 8.931381159212982e-07, "loss": 0.943, "step": 3340 }, { "epoch": 2.633819471817107, "grad_norm": 0.341796875, "learning_rate": 8.893505750079623e-07, "loss": 0.9224, "step": 3341 }, { "epoch": 2.634607804493496, "grad_norm": 0.337890625, "learning_rate": 8.855707083324183e-07, "loss": 0.9407, "step": 3342 }, { "epoch": 2.6353961371698857, "grad_norm": 0.345703125, "learning_rate": 8.817985190785882e-07, "loss": 0.9349, "step": 3343 }, { "epoch": 2.636184469846275, "grad_norm": 0.349609375, "learning_rate": 8.780340104239283e-07, "loss": 0.9332, "step": 3344 }, { "epoch": 2.6369728025226644, "grad_norm": 0.345703125, "learning_rate": 8.742771855394205e-07, "loss": 0.9459, "step": 3345 }, { "epoch": 2.637761135199054, "grad_norm": 0.34375, "learning_rate": 8.70528047589585e-07, "loss": 0.9384, "step": 3346 }, { "epoch": 2.6385494678754435, "grad_norm": 0.3359375, "learning_rate": 8.667865997324532e-07, "loss": 0.8874, "step": 3347 }, { "epoch": 2.639337800551833, "grad_norm": 0.359375, "learning_rate": 8.630528451195874e-07, "loss": 0.9735, "step": 3348 }, { "epoch": 2.640126133228222, "grad_norm": 0.341796875, "learning_rate": 8.593267868960675e-07, "loss": 0.9212, "step": 3349 }, { "epoch": 2.6409144659046118, "grad_norm": 0.34375, "learning_rate": 8.556084282004906e-07, "loss": 0.9297, "step": 3350 }, { "epoch": 2.6417027985810013, "grad_norm": 0.33984375, "learning_rate": 8.518977721649679e-07, "loss": 0.9244, "step": 3351 }, { "epoch": 2.6424911312573904, "grad_norm": 0.3515625, "learning_rate": 8.481948219151226e-07, "loss": 0.957, "step": 3352 }, { "epoch": 2.64327946393378, "grad_norm": 0.337890625, "learning_rate": 8.444995805700873e-07, "loss": 0.9174, "step": 3353 }, { "epoch": 2.6440677966101696, "grad_norm": 0.341796875, "learning_rate": 8.408120512425e-07, "loss": 0.9749, "step": 3354 }, { "epoch": 2.644856129286559, "grad_norm": 0.337890625, "learning_rate": 8.371322370385049e-07, "loss": 0.9439, "step": 3355 }, { "epoch": 2.6456444619629482, "grad_norm": 0.34375, "learning_rate": 8.334601410577436e-07, "loss": 0.9165, "step": 3356 }, { "epoch": 2.646432794639338, "grad_norm": 0.353515625, "learning_rate": 8.297957663933609e-07, "loss": 0.947, "step": 3357 }, { "epoch": 2.6472211273157273, "grad_norm": 0.349609375, "learning_rate": 8.261391161319942e-07, "loss": 0.9467, "step": 3358 }, { "epoch": 2.6480094599921165, "grad_norm": 0.341796875, "learning_rate": 8.224901933537777e-07, "loss": 0.9405, "step": 3359 }, { "epoch": 2.648797792668506, "grad_norm": 0.34765625, "learning_rate": 8.188490011323291e-07, "loss": 0.9511, "step": 3360 }, { "epoch": 2.6495861253448956, "grad_norm": 0.345703125, "learning_rate": 8.152155425347652e-07, "loss": 0.9535, "step": 3361 }, { "epoch": 2.650374458021285, "grad_norm": 0.34375, "learning_rate": 8.1158982062168e-07, "loss": 0.9185, "step": 3362 }, { "epoch": 2.6511627906976747, "grad_norm": 0.349609375, "learning_rate": 8.079718384471557e-07, "loss": 0.9395, "step": 3363 }, { "epoch": 2.651951123374064, "grad_norm": 0.3359375, "learning_rate": 8.043615990587495e-07, "loss": 0.9193, "step": 3364 }, { "epoch": 2.6527394560504534, "grad_norm": 0.345703125, "learning_rate": 8.007591054975016e-07, "loss": 0.9922, "step": 3365 }, { "epoch": 2.6535277887268425, "grad_norm": 0.345703125, "learning_rate": 7.971643607979273e-07, "loss": 0.9424, "step": 3366 }, { "epoch": 2.654316121403232, "grad_norm": 0.33984375, "learning_rate": 7.935773679880121e-07, "loss": 0.91, "step": 3367 }, { "epoch": 2.6551044540796216, "grad_norm": 0.345703125, "learning_rate": 7.899981300892145e-07, "loss": 0.9647, "step": 3368 }, { "epoch": 2.655892786756011, "grad_norm": 0.33984375, "learning_rate": 7.864266501164541e-07, "loss": 0.9209, "step": 3369 }, { "epoch": 2.6566811194324007, "grad_norm": 0.34375, "learning_rate": 7.828629310781266e-07, "loss": 0.9437, "step": 3370 }, { "epoch": 2.65746945210879, "grad_norm": 0.3515625, "learning_rate": 7.79306975976083e-07, "loss": 0.9383, "step": 3371 }, { "epoch": 2.6582577847851794, "grad_norm": 0.345703125, "learning_rate": 7.757587878056372e-07, "loss": 0.9469, "step": 3372 }, { "epoch": 2.659046117461569, "grad_norm": 0.34765625, "learning_rate": 7.722183695555563e-07, "loss": 0.9183, "step": 3373 }, { "epoch": 2.659834450137958, "grad_norm": 0.345703125, "learning_rate": 7.68685724208067e-07, "loss": 0.9205, "step": 3374 }, { "epoch": 2.6606227828143476, "grad_norm": 0.333984375, "learning_rate": 7.651608547388489e-07, "loss": 0.9214, "step": 3375 }, { "epoch": 2.661411115490737, "grad_norm": 0.333984375, "learning_rate": 7.616437641170316e-07, "loss": 0.8986, "step": 3376 }, { "epoch": 2.6621994481671267, "grad_norm": 0.3515625, "learning_rate": 7.581344553051873e-07, "loss": 0.9175, "step": 3377 }, { "epoch": 2.662987780843516, "grad_norm": 0.349609375, "learning_rate": 7.546329312593382e-07, "loss": 0.9041, "step": 3378 }, { "epoch": 2.6637761135199054, "grad_norm": 0.33984375, "learning_rate": 7.51139194928947e-07, "loss": 0.9214, "step": 3379 }, { "epoch": 2.664564446196295, "grad_norm": 0.34765625, "learning_rate": 7.476532492569222e-07, "loss": 0.9584, "step": 3380 }, { "epoch": 2.665352778872684, "grad_norm": 0.349609375, "learning_rate": 7.441750971795991e-07, "loss": 0.9058, "step": 3381 }, { "epoch": 2.6661411115490736, "grad_norm": 0.349609375, "learning_rate": 7.407047416267565e-07, "loss": 0.9348, "step": 3382 }, { "epoch": 2.666929444225463, "grad_norm": 0.34375, "learning_rate": 7.372421855216039e-07, "loss": 0.948, "step": 3383 }, { "epoch": 2.6677177769018527, "grad_norm": 0.34765625, "learning_rate": 7.337874317807803e-07, "loss": 0.9571, "step": 3384 }, { "epoch": 2.668506109578242, "grad_norm": 0.337890625, "learning_rate": 7.303404833143524e-07, "loss": 0.9592, "step": 3385 }, { "epoch": 2.6692944422546314, "grad_norm": 0.337890625, "learning_rate": 7.269013430258132e-07, "loss": 0.9151, "step": 3386 }, { "epoch": 2.670082774931021, "grad_norm": 0.345703125, "learning_rate": 7.234700138120776e-07, "loss": 0.9027, "step": 3387 }, { "epoch": 2.67087110760741, "grad_norm": 0.337890625, "learning_rate": 7.200464985634825e-07, "loss": 0.8961, "step": 3388 }, { "epoch": 2.6716594402837996, "grad_norm": 0.341796875, "learning_rate": 7.166308001637812e-07, "loss": 0.9237, "step": 3389 }, { "epoch": 2.672447772960189, "grad_norm": 0.353515625, "learning_rate": 7.13222921490142e-07, "loss": 0.9275, "step": 3390 }, { "epoch": 2.6732361056365788, "grad_norm": 0.34375, "learning_rate": 7.098228654131489e-07, "loss": 0.9106, "step": 3391 }, { "epoch": 2.6740244383129683, "grad_norm": 0.341796875, "learning_rate": 7.064306347967953e-07, "loss": 0.9413, "step": 3392 }, { "epoch": 2.6748127709893574, "grad_norm": 0.349609375, "learning_rate": 7.030462324984821e-07, "loss": 0.9261, "step": 3393 }, { "epoch": 2.675601103665747, "grad_norm": 0.34765625, "learning_rate": 6.996696613690157e-07, "loss": 0.9389, "step": 3394 }, { "epoch": 2.676389436342136, "grad_norm": 0.421875, "learning_rate": 6.963009242526098e-07, "loss": 0.9785, "step": 3395 }, { "epoch": 2.6771777690185257, "grad_norm": 0.3359375, "learning_rate": 6.929400239868745e-07, "loss": 0.9359, "step": 3396 }, { "epoch": 2.6779661016949152, "grad_norm": 0.34375, "learning_rate": 6.895869634028218e-07, "loss": 0.9352, "step": 3397 }, { "epoch": 2.678754434371305, "grad_norm": 0.353515625, "learning_rate": 6.862417453248593e-07, "loss": 0.9431, "step": 3398 }, { "epoch": 2.6795427670476943, "grad_norm": 0.357421875, "learning_rate": 6.829043725707852e-07, "loss": 0.9783, "step": 3399 }, { "epoch": 2.6803310997240835, "grad_norm": 0.349609375, "learning_rate": 6.79574847951796e-07, "loss": 0.9726, "step": 3400 }, { "epoch": 2.681119432400473, "grad_norm": 0.33984375, "learning_rate": 6.76253174272472e-07, "loss": 0.9013, "step": 3401 }, { "epoch": 2.6819077650768626, "grad_norm": 0.34375, "learning_rate": 6.729393543307838e-07, "loss": 0.937, "step": 3402 }, { "epoch": 2.6826960977532517, "grad_norm": 0.357421875, "learning_rate": 6.696333909180796e-07, "loss": 0.9725, "step": 3403 }, { "epoch": 2.6834844304296412, "grad_norm": 0.341796875, "learning_rate": 6.663352868191008e-07, "loss": 0.9049, "step": 3404 }, { "epoch": 2.684272763106031, "grad_norm": 0.345703125, "learning_rate": 6.630450448119618e-07, "loss": 0.956, "step": 3405 }, { "epoch": 2.6850610957824204, "grad_norm": 0.33984375, "learning_rate": 6.597626676681545e-07, "loss": 0.9409, "step": 3406 }, { "epoch": 2.6858494284588095, "grad_norm": 0.345703125, "learning_rate": 6.56488158152545e-07, "loss": 0.9373, "step": 3407 }, { "epoch": 2.686637761135199, "grad_norm": 0.3359375, "learning_rate": 6.532215190233748e-07, "loss": 0.9424, "step": 3408 }, { "epoch": 2.6874260938115886, "grad_norm": 0.3359375, "learning_rate": 6.499627530322583e-07, "loss": 0.9275, "step": 3409 }, { "epoch": 2.6882144264879777, "grad_norm": 0.353515625, "learning_rate": 6.46711862924172e-07, "loss": 0.919, "step": 3410 }, { "epoch": 2.6890027591643673, "grad_norm": 0.33984375, "learning_rate": 6.434688514374632e-07, "loss": 0.9589, "step": 3411 }, { "epoch": 2.689791091840757, "grad_norm": 0.345703125, "learning_rate": 6.402337213038379e-07, "loss": 0.926, "step": 3412 }, { "epoch": 2.6905794245171464, "grad_norm": 0.34375, "learning_rate": 6.370064752483662e-07, "loss": 0.9562, "step": 3413 }, { "epoch": 2.691367757193536, "grad_norm": 0.341796875, "learning_rate": 6.337871159894804e-07, "loss": 0.9073, "step": 3414 }, { "epoch": 2.692156089869925, "grad_norm": 0.349609375, "learning_rate": 6.305756462389645e-07, "loss": 0.9524, "step": 3415 }, { "epoch": 2.6929444225463146, "grad_norm": 0.345703125, "learning_rate": 6.27372068701958e-07, "loss": 0.9591, "step": 3416 }, { "epoch": 2.6937327552227037, "grad_norm": 0.333984375, "learning_rate": 6.241763860769535e-07, "loss": 0.9313, "step": 3417 }, { "epoch": 2.6945210878990933, "grad_norm": 0.33984375, "learning_rate": 6.209886010557908e-07, "loss": 0.9144, "step": 3418 }, { "epoch": 2.695309420575483, "grad_norm": 0.341796875, "learning_rate": 6.178087163236645e-07, "loss": 0.959, "step": 3419 }, { "epoch": 2.6960977532518724, "grad_norm": 0.3515625, "learning_rate": 6.146367345591053e-07, "loss": 0.9587, "step": 3420 }, { "epoch": 2.696886085928262, "grad_norm": 3.5, "learning_rate": 6.114726584339914e-07, "loss": 0.9174, "step": 3421 }, { "epoch": 2.697674418604651, "grad_norm": 0.36328125, "learning_rate": 6.083164906135431e-07, "loss": 0.9526, "step": 3422 }, { "epoch": 2.6984627512810406, "grad_norm": 0.33984375, "learning_rate": 6.051682337563158e-07, "loss": 0.9087, "step": 3423 }, { "epoch": 2.69925108395743, "grad_norm": 0.34765625, "learning_rate": 6.02027890514204e-07, "loss": 0.9435, "step": 3424 }, { "epoch": 2.7000394166338193, "grad_norm": 0.353515625, "learning_rate": 5.988954635324351e-07, "loss": 0.9553, "step": 3425 }, { "epoch": 2.700827749310209, "grad_norm": 0.341796875, "learning_rate": 5.957709554495683e-07, "loss": 0.9125, "step": 3426 }, { "epoch": 2.7016160819865984, "grad_norm": 0.34375, "learning_rate": 5.926543688974928e-07, "loss": 0.9396, "step": 3427 }, { "epoch": 2.702404414662988, "grad_norm": 0.33984375, "learning_rate": 5.895457065014243e-07, "loss": 0.8784, "step": 3428 }, { "epoch": 2.703192747339377, "grad_norm": 0.3515625, "learning_rate": 5.864449708799059e-07, "loss": 0.9229, "step": 3429 }, { "epoch": 2.7039810800157666, "grad_norm": 0.373046875, "learning_rate": 5.833521646448003e-07, "loss": 0.9298, "step": 3430 }, { "epoch": 2.704769412692156, "grad_norm": 0.349609375, "learning_rate": 5.802672904012951e-07, "loss": 0.9241, "step": 3431 }, { "epoch": 2.7055577453685453, "grad_norm": 0.349609375, "learning_rate": 5.771903507478915e-07, "loss": 0.9536, "step": 3432 }, { "epoch": 2.706346078044935, "grad_norm": 0.33984375, "learning_rate": 5.741213482764118e-07, "loss": 0.9273, "step": 3433 }, { "epoch": 2.7071344107213244, "grad_norm": 0.333984375, "learning_rate": 5.710602855719904e-07, "loss": 0.895, "step": 3434 }, { "epoch": 2.707922743397714, "grad_norm": 0.345703125, "learning_rate": 5.680071652130736e-07, "loss": 0.9252, "step": 3435 }, { "epoch": 2.708711076074103, "grad_norm": 0.3515625, "learning_rate": 5.649619897714187e-07, "loss": 0.9522, "step": 3436 }, { "epoch": 2.7094994087504927, "grad_norm": 0.337890625, "learning_rate": 5.619247618120871e-07, "loss": 0.9486, "step": 3437 }, { "epoch": 2.7102877414268822, "grad_norm": 0.345703125, "learning_rate": 5.588954838934523e-07, "loss": 0.9281, "step": 3438 }, { "epoch": 2.7110760741032713, "grad_norm": 0.33984375, "learning_rate": 5.558741585671845e-07, "loss": 0.9638, "step": 3439 }, { "epoch": 2.711864406779661, "grad_norm": 0.345703125, "learning_rate": 5.528607883782599e-07, "loss": 0.9145, "step": 3440 }, { "epoch": 2.7126527394560505, "grad_norm": 0.3359375, "learning_rate": 5.498553758649516e-07, "loss": 0.9407, "step": 3441 }, { "epoch": 2.71344107213244, "grad_norm": 0.357421875, "learning_rate": 5.468579235588268e-07, "loss": 0.9383, "step": 3442 }, { "epoch": 2.7142294048088296, "grad_norm": 0.345703125, "learning_rate": 5.438684339847556e-07, "loss": 0.9592, "step": 3443 }, { "epoch": 2.7150177374852187, "grad_norm": 0.345703125, "learning_rate": 5.408869096608926e-07, "loss": 0.9312, "step": 3444 }, { "epoch": 2.7158060701616082, "grad_norm": 0.353515625, "learning_rate": 5.379133530986902e-07, "loss": 0.9799, "step": 3445 }, { "epoch": 2.716594402837998, "grad_norm": 0.33984375, "learning_rate": 5.349477668028802e-07, "loss": 0.9354, "step": 3446 }, { "epoch": 2.717382735514387, "grad_norm": 0.341796875, "learning_rate": 5.319901532714877e-07, "loss": 0.9468, "step": 3447 }, { "epoch": 2.7181710681907765, "grad_norm": 0.345703125, "learning_rate": 5.290405149958211e-07, "loss": 0.9804, "step": 3448 }, { "epoch": 2.718959400867166, "grad_norm": 0.345703125, "learning_rate": 5.260988544604717e-07, "loss": 0.9373, "step": 3449 }, { "epoch": 2.7197477335435556, "grad_norm": 0.341796875, "learning_rate": 5.231651741433063e-07, "loss": 0.9552, "step": 3450 }, { "epoch": 2.7205360662199447, "grad_norm": 0.34375, "learning_rate": 5.202394765154728e-07, "loss": 0.9365, "step": 3451 }, { "epoch": 2.7213243988963343, "grad_norm": 0.34375, "learning_rate": 5.173217640413942e-07, "loss": 0.9543, "step": 3452 }, { "epoch": 2.722112731572724, "grad_norm": 0.353515625, "learning_rate": 5.144120391787732e-07, "loss": 0.9618, "step": 3453 }, { "epoch": 2.722901064249113, "grad_norm": 0.33984375, "learning_rate": 5.115103043785718e-07, "loss": 0.9188, "step": 3454 }, { "epoch": 2.7236893969255025, "grad_norm": 0.34765625, "learning_rate": 5.086165620850337e-07, "loss": 0.9511, "step": 3455 }, { "epoch": 2.724477729601892, "grad_norm": 0.34765625, "learning_rate": 5.057308147356632e-07, "loss": 0.9126, "step": 3456 }, { "epoch": 2.7252660622782816, "grad_norm": 0.34375, "learning_rate": 5.028530647612306e-07, "loss": 0.9143, "step": 3457 }, { "epoch": 2.7260543949546707, "grad_norm": 0.33984375, "learning_rate": 4.999833145857769e-07, "loss": 0.9392, "step": 3458 }, { "epoch": 2.7268427276310603, "grad_norm": 0.341796875, "learning_rate": 4.971215666265939e-07, "loss": 0.9391, "step": 3459 }, { "epoch": 2.72763106030745, "grad_norm": 0.337890625, "learning_rate": 4.942678232942399e-07, "loss": 0.9195, "step": 3460 }, { "epoch": 2.728419392983839, "grad_norm": 0.341796875, "learning_rate": 4.91422086992529e-07, "loss": 0.9012, "step": 3461 }, { "epoch": 2.7292077256602285, "grad_norm": 0.33984375, "learning_rate": 4.885843601185291e-07, "loss": 0.9546, "step": 3462 }, { "epoch": 2.729996058336618, "grad_norm": 0.33984375, "learning_rate": 4.85754645062565e-07, "loss": 0.9308, "step": 3463 }, { "epoch": 2.7307843910130076, "grad_norm": 0.341796875, "learning_rate": 4.829329442082076e-07, "loss": 0.9051, "step": 3464 }, { "epoch": 2.731572723689397, "grad_norm": 0.349609375, "learning_rate": 4.801192599322835e-07, "loss": 0.9332, "step": 3465 }, { "epoch": 2.7323610563657863, "grad_norm": 0.3359375, "learning_rate": 4.773135946048601e-07, "loss": 0.9098, "step": 3466 }, { "epoch": 2.733149389042176, "grad_norm": 0.486328125, "learning_rate": 4.7451595058925606e-07, "loss": 0.9229, "step": 3467 }, { "epoch": 2.733937721718565, "grad_norm": 0.3359375, "learning_rate": 4.717263302420283e-07, "loss": 0.9089, "step": 3468 }, { "epoch": 2.7347260543949545, "grad_norm": 0.341796875, "learning_rate": 4.6894473591297953e-07, "loss": 0.9247, "step": 3469 }, { "epoch": 2.735514387071344, "grad_norm": 0.3515625, "learning_rate": 4.6617116994514764e-07, "loss": 0.9273, "step": 3470 }, { "epoch": 2.7363027197477336, "grad_norm": 0.349609375, "learning_rate": 4.634056346748117e-07, "loss": 0.9325, "step": 3471 }, { "epoch": 2.737091052424123, "grad_norm": 0.34375, "learning_rate": 4.6064813243148487e-07, "loss": 0.9228, "step": 3472 }, { "epoch": 2.7378793851005123, "grad_norm": 0.341796875, "learning_rate": 4.5789866553791253e-07, "loss": 0.9385, "step": 3473 }, { "epoch": 2.738667717776902, "grad_norm": 0.35546875, "learning_rate": 4.551572363100731e-07, "loss": 0.959, "step": 3474 }, { "epoch": 2.7394560504532914, "grad_norm": 0.35546875, "learning_rate": 4.5242384705717404e-07, "loss": 0.898, "step": 3475 }, { "epoch": 2.7402443831296805, "grad_norm": 0.341796875, "learning_rate": 4.4969850008164897e-07, "loss": 0.93, "step": 3476 }, { "epoch": 2.74103271580607, "grad_norm": 0.349609375, "learning_rate": 4.469811976791605e-07, "loss": 0.9779, "step": 3477 }, { "epoch": 2.7418210484824597, "grad_norm": 0.341796875, "learning_rate": 4.4427194213859216e-07, "loss": 0.9304, "step": 3478 }, { "epoch": 2.7426093811588492, "grad_norm": 0.369140625, "learning_rate": 4.4157073574205176e-07, "loss": 0.9298, "step": 3479 }, { "epoch": 2.7433977138352383, "grad_norm": 0.349609375, "learning_rate": 4.3887758076486597e-07, "loss": 0.9608, "step": 3480 }, { "epoch": 2.744186046511628, "grad_norm": 0.34375, "learning_rate": 4.361924794755745e-07, "loss": 0.9208, "step": 3481 }, { "epoch": 2.7449743791880175, "grad_norm": 0.3359375, "learning_rate": 4.3351543413594263e-07, "loss": 0.912, "step": 3482 }, { "epoch": 2.7457627118644066, "grad_norm": 0.337890625, "learning_rate": 4.308464470009432e-07, "loss": 0.9242, "step": 3483 }, { "epoch": 2.746551044540796, "grad_norm": 0.3515625, "learning_rate": 4.2818552031876457e-07, "loss": 0.9557, "step": 3484 }, { "epoch": 2.7473393772171857, "grad_norm": 0.3828125, "learning_rate": 4.2553265633080154e-07, "loss": 0.9455, "step": 3485 }, { "epoch": 2.7481277098935752, "grad_norm": 0.337890625, "learning_rate": 4.228878572716588e-07, "loss": 0.9563, "step": 3486 }, { "epoch": 2.748916042569965, "grad_norm": 0.431640625, "learning_rate": 4.202511253691521e-07, "loss": 0.912, "step": 3487 }, { "epoch": 2.749704375246354, "grad_norm": 0.33984375, "learning_rate": 4.176224628442982e-07, "loss": 0.947, "step": 3488 }, { "epoch": 2.7504927079227435, "grad_norm": 0.33984375, "learning_rate": 4.150018719113147e-07, "loss": 0.9464, "step": 3489 }, { "epoch": 2.7512810405991326, "grad_norm": 0.349609375, "learning_rate": 4.1238935477762367e-07, "loss": 0.9089, "step": 3490 }, { "epoch": 2.752069373275522, "grad_norm": 0.3359375, "learning_rate": 4.0978491364384365e-07, "loss": 0.9564, "step": 3491 }, { "epoch": 2.7528577059519117, "grad_norm": 0.341796875, "learning_rate": 4.0718855070379535e-07, "loss": 0.9506, "step": 3492 }, { "epoch": 2.7536460386283013, "grad_norm": 0.3515625, "learning_rate": 4.046002681444894e-07, "loss": 0.9483, "step": 3493 }, { "epoch": 2.754434371304691, "grad_norm": 0.341796875, "learning_rate": 4.020200681461317e-07, "loss": 0.9261, "step": 3494 }, { "epoch": 2.75522270398108, "grad_norm": 0.33984375, "learning_rate": 3.9944795288212047e-07, "loss": 0.9547, "step": 3495 }, { "epoch": 2.7560110366574695, "grad_norm": 0.349609375, "learning_rate": 3.968839245190448e-07, "loss": 0.9777, "step": 3496 }, { "epoch": 2.756799369333859, "grad_norm": 0.333984375, "learning_rate": 3.9432798521668035e-07, "loss": 0.9102, "step": 3497 }, { "epoch": 2.757587702010248, "grad_norm": 0.33984375, "learning_rate": 3.917801371279895e-07, "loss": 0.8961, "step": 3498 }, { "epoch": 2.7583760346866377, "grad_norm": 0.349609375, "learning_rate": 3.8924038239911977e-07, "loss": 0.9282, "step": 3499 }, { "epoch": 2.7591643673630273, "grad_norm": 0.34375, "learning_rate": 3.867087231693989e-07, "loss": 0.9159, "step": 3500 }, { "epoch": 2.759952700039417, "grad_norm": 0.3359375, "learning_rate": 3.841851615713399e-07, "loss": 0.8888, "step": 3501 }, { "epoch": 2.760741032715806, "grad_norm": 0.34765625, "learning_rate": 3.8166969973063015e-07, "loss": 0.9288, "step": 3502 }, { "epoch": 2.7615293653921955, "grad_norm": 0.33984375, "learning_rate": 3.7916233976613704e-07, "loss": 0.9514, "step": 3503 }, { "epoch": 2.762317698068585, "grad_norm": 0.34765625, "learning_rate": 3.766630837899032e-07, "loss": 0.9414, "step": 3504 }, { "epoch": 2.763106030744974, "grad_norm": 0.337890625, "learning_rate": 3.741719339071448e-07, "loss": 0.9619, "step": 3505 }, { "epoch": 2.7638943634213637, "grad_norm": 0.357421875, "learning_rate": 3.716888922162487e-07, "loss": 0.9548, "step": 3506 }, { "epoch": 2.7646826960977533, "grad_norm": 0.3515625, "learning_rate": 3.6921396080877414e-07, "loss": 0.9441, "step": 3507 }, { "epoch": 2.765471028774143, "grad_norm": 0.3359375, "learning_rate": 3.667471417694468e-07, "loss": 0.915, "step": 3508 }, { "epoch": 2.766259361450532, "grad_norm": 0.35546875, "learning_rate": 3.642884371761601e-07, "loss": 0.9479, "step": 3509 }, { "epoch": 2.7670476941269215, "grad_norm": 0.341796875, "learning_rate": 3.618378490999719e-07, "loss": 0.9108, "step": 3510 }, { "epoch": 2.767836026803311, "grad_norm": 0.349609375, "learning_rate": 3.5939537960510416e-07, "loss": 0.8961, "step": 3511 }, { "epoch": 2.7686243594797, "grad_norm": 0.345703125, "learning_rate": 3.56961030748938e-07, "loss": 0.9396, "step": 3512 }, { "epoch": 2.7694126921560898, "grad_norm": 0.34375, "learning_rate": 3.545348045820174e-07, "loss": 0.9392, "step": 3513 }, { "epoch": 2.7702010248324793, "grad_norm": 0.3359375, "learning_rate": 3.5211670314804326e-07, "loss": 0.9218, "step": 3514 }, { "epoch": 2.770989357508869, "grad_norm": 0.34375, "learning_rate": 3.497067284838673e-07, "loss": 0.9197, "step": 3515 }, { "epoch": 2.7717776901852584, "grad_norm": 0.357421875, "learning_rate": 3.473048826195058e-07, "loss": 0.9279, "step": 3516 }, { "epoch": 2.7725660228616476, "grad_norm": 0.34375, "learning_rate": 3.4491116757812024e-07, "loss": 0.9267, "step": 3517 }, { "epoch": 2.773354355538037, "grad_norm": 0.359375, "learning_rate": 3.4252558537602785e-07, "loss": 0.9259, "step": 3518 }, { "epoch": 2.7741426882144262, "grad_norm": 0.341796875, "learning_rate": 3.40148138022689e-07, "loss": 0.9591, "step": 3519 }, { "epoch": 2.774931020890816, "grad_norm": 0.34765625, "learning_rate": 3.3777882752071724e-07, "loss": 0.9182, "step": 3520 }, { "epoch": 2.7757193535672053, "grad_norm": 0.33984375, "learning_rate": 3.3541765586587285e-07, "loss": 0.9225, "step": 3521 }, { "epoch": 2.776507686243595, "grad_norm": 0.34375, "learning_rate": 3.3306462504705706e-07, "loss": 0.9519, "step": 3522 }, { "epoch": 2.7772960189199845, "grad_norm": 0.337890625, "learning_rate": 3.307197370463133e-07, "loss": 0.9176, "step": 3523 }, { "epoch": 2.7780843515963736, "grad_norm": 0.33984375, "learning_rate": 3.283829938388294e-07, "loss": 0.9304, "step": 3524 }, { "epoch": 2.778872684272763, "grad_norm": 0.34375, "learning_rate": 3.2605439739292863e-07, "loss": 0.942, "step": 3525 }, { "epoch": 2.7796610169491527, "grad_norm": 0.34765625, "learning_rate": 3.2373394967007753e-07, "loss": 0.9553, "step": 3526 }, { "epoch": 2.780449349625542, "grad_norm": 0.34375, "learning_rate": 3.2142165262487366e-07, "loss": 0.9497, "step": 3527 }, { "epoch": 2.7812376823019314, "grad_norm": 0.33984375, "learning_rate": 3.191175082050502e-07, "loss": 0.9367, "step": 3528 }, { "epoch": 2.782026014978321, "grad_norm": 0.337890625, "learning_rate": 3.1682151835147336e-07, "loss": 0.9247, "step": 3529 }, { "epoch": 2.7828143476547105, "grad_norm": 0.439453125, "learning_rate": 3.145336849981395e-07, "loss": 0.936, "step": 3530 }, { "epoch": 2.7836026803310996, "grad_norm": 0.33984375, "learning_rate": 3.122540100721794e-07, "loss": 0.9446, "step": 3531 }, { "epoch": 2.784391013007489, "grad_norm": 0.34765625, "learning_rate": 3.099824954938435e-07, "loss": 0.9439, "step": 3532 }, { "epoch": 2.7851793456838787, "grad_norm": 0.359375, "learning_rate": 3.0771914317651475e-07, "loss": 0.9545, "step": 3533 }, { "epoch": 2.785967678360268, "grad_norm": 0.345703125, "learning_rate": 3.05463955026698e-07, "loss": 0.9455, "step": 3534 }, { "epoch": 2.7867560110366574, "grad_norm": 0.33984375, "learning_rate": 3.0321693294402264e-07, "loss": 0.9301, "step": 3535 }, { "epoch": 2.787544343713047, "grad_norm": 0.341796875, "learning_rate": 3.00978078821238e-07, "loss": 0.9532, "step": 3536 }, { "epoch": 2.7883326763894365, "grad_norm": 0.34375, "learning_rate": 2.987473945442143e-07, "loss": 0.9412, "step": 3537 }, { "epoch": 2.789121009065826, "grad_norm": 0.341796875, "learning_rate": 2.965248819919397e-07, "loss": 0.9075, "step": 3538 }, { "epoch": 2.789909341742215, "grad_norm": 0.3359375, "learning_rate": 2.9431054303651787e-07, "loss": 0.9258, "step": 3539 }, { "epoch": 2.7906976744186047, "grad_norm": 0.34375, "learning_rate": 2.9210437954316997e-07, "loss": 0.8908, "step": 3540 }, { "epoch": 2.791486007094994, "grad_norm": 0.341796875, "learning_rate": 2.8990639337022844e-07, "loss": 0.9431, "step": 3541 }, { "epoch": 2.7922743397713834, "grad_norm": 0.34765625, "learning_rate": 2.877165863691389e-07, "loss": 0.9674, "step": 3542 }, { "epoch": 2.793062672447773, "grad_norm": 0.33984375, "learning_rate": 2.8553496038445707e-07, "loss": 0.9436, "step": 3543 }, { "epoch": 2.7938510051241625, "grad_norm": 0.353515625, "learning_rate": 2.833615172538473e-07, "loss": 0.9415, "step": 3544 }, { "epoch": 2.794639337800552, "grad_norm": 0.34375, "learning_rate": 2.8119625880808185e-07, "loss": 0.9731, "step": 3545 }, { "epoch": 2.795427670476941, "grad_norm": 0.33984375, "learning_rate": 2.790391868710374e-07, "loss": 0.9228, "step": 3546 }, { "epoch": 2.7962160031533307, "grad_norm": 0.345703125, "learning_rate": 2.7689030325969477e-07, "loss": 0.9302, "step": 3547 }, { "epoch": 2.7970043358297203, "grad_norm": 0.349609375, "learning_rate": 2.7474960978414065e-07, "loss": 0.9384, "step": 3548 }, { "epoch": 2.7977926685061094, "grad_norm": 0.373046875, "learning_rate": 2.7261710824755814e-07, "loss": 0.9393, "step": 3549 }, { "epoch": 2.798581001182499, "grad_norm": 0.333984375, "learning_rate": 2.704928004462337e-07, "loss": 0.8928, "step": 3550 }, { "epoch": 2.7993693338588885, "grad_norm": 0.373046875, "learning_rate": 2.683766881695504e-07, "loss": 0.9451, "step": 3551 }, { "epoch": 2.800157666535278, "grad_norm": 0.34765625, "learning_rate": 2.66268773199988e-07, "loss": 0.8984, "step": 3552 }, { "epoch": 2.800945999211667, "grad_norm": 0.349609375, "learning_rate": 2.641690573131228e-07, "loss": 0.9556, "step": 3553 }, { "epoch": 2.8017343318880568, "grad_norm": 0.33984375, "learning_rate": 2.6207754227761897e-07, "loss": 0.9044, "step": 3554 }, { "epoch": 2.8025226645644463, "grad_norm": 0.34375, "learning_rate": 2.5999422985524157e-07, "loss": 0.9304, "step": 3555 }, { "epoch": 2.8033109972408354, "grad_norm": 0.345703125, "learning_rate": 2.5791912180084033e-07, "loss": 0.9579, "step": 3556 }, { "epoch": 2.804099329917225, "grad_norm": 0.33984375, "learning_rate": 2.55852219862357e-07, "loss": 0.9353, "step": 3557 }, { "epoch": 2.8048876625936146, "grad_norm": 0.33984375, "learning_rate": 2.537935257808177e-07, "loss": 0.9163, "step": 3558 }, { "epoch": 2.805675995270004, "grad_norm": 0.34375, "learning_rate": 2.5174304129033655e-07, "loss": 0.9321, "step": 3559 }, { "epoch": 2.8064643279463932, "grad_norm": 0.3515625, "learning_rate": 2.4970076811811514e-07, "loss": 0.9204, "step": 3560 }, { "epoch": 2.807252660622783, "grad_norm": 0.345703125, "learning_rate": 2.4766670798443414e-07, "loss": 0.9378, "step": 3561 }, { "epoch": 2.8080409932991723, "grad_norm": 0.34375, "learning_rate": 2.456408626026585e-07, "loss": 0.9406, "step": 3562 }, { "epoch": 2.8088293259755615, "grad_norm": 0.353515625, "learning_rate": 2.4362323367923216e-07, "loss": 0.9351, "step": 3563 }, { "epoch": 2.809617658651951, "grad_norm": 0.33984375, "learning_rate": 2.4161382291367776e-07, "loss": 0.9248, "step": 3564 }, { "epoch": 2.8104059913283406, "grad_norm": 0.345703125, "learning_rate": 2.396126319985992e-07, "loss": 0.9359, "step": 3565 }, { "epoch": 2.81119432400473, "grad_norm": 0.34375, "learning_rate": 2.3761966261967252e-07, "loss": 0.8979, "step": 3566 }, { "epoch": 2.8119826566811197, "grad_norm": 0.34765625, "learning_rate": 2.356349164556493e-07, "loss": 0.9149, "step": 3567 }, { "epoch": 2.812770989357509, "grad_norm": 0.357421875, "learning_rate": 2.3365839517835554e-07, "loss": 0.9572, "step": 3568 }, { "epoch": 2.8135593220338984, "grad_norm": 0.337890625, "learning_rate": 2.3169010045268725e-07, "loss": 0.938, "step": 3569 }, { "epoch": 2.8143476547102875, "grad_norm": 0.373046875, "learning_rate": 2.2973003393661374e-07, "loss": 0.9139, "step": 3570 }, { "epoch": 2.815135987386677, "grad_norm": 0.341796875, "learning_rate": 2.277781972811699e-07, "loss": 0.9729, "step": 3571 }, { "epoch": 2.8159243200630666, "grad_norm": 0.353515625, "learning_rate": 2.2583459213046167e-07, "loss": 0.9068, "step": 3572 }, { "epoch": 2.816712652739456, "grad_norm": 0.3515625, "learning_rate": 2.2389922012165944e-07, "loss": 0.9508, "step": 3573 }, { "epoch": 2.8175009854158457, "grad_norm": 0.34375, "learning_rate": 2.2197208288499694e-07, "loss": 0.916, "step": 3574 }, { "epoch": 2.818289318092235, "grad_norm": 0.357421875, "learning_rate": 2.2005318204377569e-07, "loss": 0.9345, "step": 3575 }, { "epoch": 2.8190776507686244, "grad_norm": 0.341796875, "learning_rate": 2.1814251921435603e-07, "loss": 0.9378, "step": 3576 }, { "epoch": 2.819865983445014, "grad_norm": 0.380859375, "learning_rate": 2.162400960061606e-07, "loss": 0.9328, "step": 3577 }, { "epoch": 2.820654316121403, "grad_norm": 0.333984375, "learning_rate": 2.143459140216697e-07, "loss": 0.8961, "step": 3578 }, { "epoch": 2.8214426487977926, "grad_norm": 0.34375, "learning_rate": 2.124599748564249e-07, "loss": 0.9941, "step": 3579 }, { "epoch": 2.822230981474182, "grad_norm": 0.33984375, "learning_rate": 2.1058228009902094e-07, "loss": 0.941, "step": 3580 }, { "epoch": 2.8230193141505717, "grad_norm": 0.33984375, "learning_rate": 2.0871283133111153e-07, "loss": 0.9468, "step": 3581 }, { "epoch": 2.823807646826961, "grad_norm": 0.333984375, "learning_rate": 2.0685163012740039e-07, "loss": 0.8907, "step": 3582 }, { "epoch": 2.8245959795033504, "grad_norm": 0.34765625, "learning_rate": 2.0499867805564789e-07, "loss": 0.9485, "step": 3583 }, { "epoch": 2.82538431217974, "grad_norm": 0.345703125, "learning_rate": 2.0315397667666438e-07, "loss": 0.9386, "step": 3584 }, { "epoch": 2.826172644856129, "grad_norm": 0.341796875, "learning_rate": 2.013175275443102e-07, "loss": 0.9212, "step": 3585 }, { "epoch": 2.8269609775325186, "grad_norm": 0.349609375, "learning_rate": 1.994893322054925e-07, "loss": 0.9433, "step": 3586 }, { "epoch": 2.827749310208908, "grad_norm": 0.345703125, "learning_rate": 1.9766939220017155e-07, "loss": 0.9282, "step": 3587 }, { "epoch": 2.8285376428852977, "grad_norm": 0.34375, "learning_rate": 1.9585770906134671e-07, "loss": 0.8857, "step": 3588 }, { "epoch": 2.8293259755616873, "grad_norm": 0.349609375, "learning_rate": 1.9405428431506835e-07, "loss": 0.9044, "step": 3589 }, { "epoch": 2.8301143082380764, "grad_norm": 0.333984375, "learning_rate": 1.9225911948042685e-07, "loss": 0.9573, "step": 3590 }, { "epoch": 2.830902640914466, "grad_norm": 0.341796875, "learning_rate": 1.9047221606955713e-07, "loss": 0.9123, "step": 3591 }, { "epoch": 2.831690973590855, "grad_norm": 0.36328125, "learning_rate": 1.886935755876329e-07, "loss": 0.9395, "step": 3592 }, { "epoch": 2.8324793062672446, "grad_norm": 0.3515625, "learning_rate": 1.8692319953286908e-07, "loss": 0.9617, "step": 3593 }, { "epoch": 2.833267638943634, "grad_norm": 0.33984375, "learning_rate": 1.8516108939651945e-07, "loss": 0.8846, "step": 3594 }, { "epoch": 2.8340559716200238, "grad_norm": 0.349609375, "learning_rate": 1.8340724666287556e-07, "loss": 0.9933, "step": 3595 }, { "epoch": 2.8348443042964133, "grad_norm": 0.3359375, "learning_rate": 1.816616728092646e-07, "loss": 0.8842, "step": 3596 }, { "epoch": 2.8356326369728024, "grad_norm": 0.34375, "learning_rate": 1.7992436930604484e-07, "loss": 0.9236, "step": 3597 }, { "epoch": 2.836420969649192, "grad_norm": 0.337890625, "learning_rate": 1.7819533761661346e-07, "loss": 0.8998, "step": 3598 }, { "epoch": 2.8372093023255816, "grad_norm": 0.34765625, "learning_rate": 1.7647457919739873e-07, "loss": 0.9396, "step": 3599 }, { "epoch": 2.8379976350019707, "grad_norm": 0.3515625, "learning_rate": 1.7476209549785906e-07, "loss": 0.9222, "step": 3600 }, { "epoch": 2.8387859676783602, "grad_norm": 0.349609375, "learning_rate": 1.7305788796048274e-07, "loss": 0.9482, "step": 3601 }, { "epoch": 2.83957430035475, "grad_norm": 0.341796875, "learning_rate": 1.7136195802078481e-07, "loss": 0.9435, "step": 3602 }, { "epoch": 2.8403626330311393, "grad_norm": 0.337890625, "learning_rate": 1.6967430710731258e-07, "loss": 0.9256, "step": 3603 }, { "epoch": 2.8411509657075285, "grad_norm": 0.33203125, "learning_rate": 1.679949366416367e-07, "loss": 0.9393, "step": 3604 }, { "epoch": 2.841939298383918, "grad_norm": 0.390625, "learning_rate": 1.6632384803835334e-07, "loss": 0.9413, "step": 3605 }, { "epoch": 2.8427276310603076, "grad_norm": 0.365234375, "learning_rate": 1.6466104270508099e-07, "loss": 0.9131, "step": 3606 }, { "epoch": 2.8435159637366967, "grad_norm": 0.337890625, "learning_rate": 1.630065220424626e-07, "loss": 0.9217, "step": 3607 }, { "epoch": 2.8443042964130862, "grad_norm": 0.345703125, "learning_rate": 1.613602874441622e-07, "loss": 0.8951, "step": 3608 }, { "epoch": 2.845092629089476, "grad_norm": 0.345703125, "learning_rate": 1.5972234029686617e-07, "loss": 0.9391, "step": 3609 }, { "epoch": 2.8458809617658654, "grad_norm": 0.341796875, "learning_rate": 1.5809268198027527e-07, "loss": 0.9011, "step": 3610 }, { "epoch": 2.8466692944422545, "grad_norm": 0.34765625, "learning_rate": 1.5647131386711366e-07, "loss": 0.9256, "step": 3611 }, { "epoch": 2.847457627118644, "grad_norm": 0.33984375, "learning_rate": 1.5485823732311777e-07, "loss": 0.9426, "step": 3612 }, { "epoch": 2.8482459597950336, "grad_norm": 0.369140625, "learning_rate": 1.5325345370704292e-07, "loss": 0.9175, "step": 3613 }, { "epoch": 2.8490342924714227, "grad_norm": 0.341796875, "learning_rate": 1.516569643706578e-07, "loss": 0.9838, "step": 3614 }, { "epoch": 2.8498226251478123, "grad_norm": 0.337890625, "learning_rate": 1.5006877065874338e-07, "loss": 0.9156, "step": 3615 }, { "epoch": 2.850610957824202, "grad_norm": 0.341796875, "learning_rate": 1.4848887390909615e-07, "loss": 0.9322, "step": 3616 }, { "epoch": 2.8513992905005914, "grad_norm": 0.34375, "learning_rate": 1.4691727545251945e-07, "loss": 0.9158, "step": 3617 }, { "epoch": 2.852187623176981, "grad_norm": 0.33984375, "learning_rate": 1.4535397661283092e-07, "loss": 0.9254, "step": 3618 }, { "epoch": 2.85297595585337, "grad_norm": 0.34375, "learning_rate": 1.43798978706855e-07, "loss": 0.9523, "step": 3619 }, { "epoch": 2.8537642885297596, "grad_norm": 0.341796875, "learning_rate": 1.4225228304442173e-07, "loss": 0.935, "step": 3620 }, { "epoch": 2.8545526212061487, "grad_norm": 0.33984375, "learning_rate": 1.4071389092837339e-07, "loss": 0.9396, "step": 3621 }, { "epoch": 2.8553409538825383, "grad_norm": 0.337890625, "learning_rate": 1.3918380365455232e-07, "loss": 0.9158, "step": 3622 }, { "epoch": 2.856129286558928, "grad_norm": 0.341796875, "learning_rate": 1.376620225118086e-07, "loss": 0.9554, "step": 3623 }, { "epoch": 2.8569176192353174, "grad_norm": 0.333984375, "learning_rate": 1.3614854878199578e-07, "loss": 0.9186, "step": 3624 }, { "epoch": 2.857705951911707, "grad_norm": 0.34765625, "learning_rate": 1.3464338373996744e-07, "loss": 0.9349, "step": 3625 }, { "epoch": 2.858494284588096, "grad_norm": 0.34375, "learning_rate": 1.3314652865358158e-07, "loss": 0.9306, "step": 3626 }, { "epoch": 2.8592826172644856, "grad_norm": 0.337890625, "learning_rate": 1.3165798478369184e-07, "loss": 0.917, "step": 3627 }, { "epoch": 2.860070949940875, "grad_norm": 0.34375, "learning_rate": 1.3017775338415638e-07, "loss": 0.9264, "step": 3628 }, { "epoch": 2.8608592826172643, "grad_norm": 0.337890625, "learning_rate": 1.287058357018278e-07, "loss": 0.8917, "step": 3629 }, { "epoch": 2.861647615293654, "grad_norm": 0.341796875, "learning_rate": 1.272422329765588e-07, "loss": 0.9285, "step": 3630 }, { "epoch": 2.8624359479700434, "grad_norm": 0.341796875, "learning_rate": 1.2578694644119427e-07, "loss": 0.9532, "step": 3631 }, { "epoch": 2.863224280646433, "grad_norm": 0.337890625, "learning_rate": 1.2433997732157588e-07, "loss": 0.8639, "step": 3632 }, { "epoch": 2.864012613322822, "grad_norm": 0.34375, "learning_rate": 1.2290132683654087e-07, "loss": 0.9949, "step": 3633 }, { "epoch": 2.8648009459992116, "grad_norm": 0.34765625, "learning_rate": 1.214709961979177e-07, "loss": 0.9425, "step": 3634 }, { "epoch": 2.865589278675601, "grad_norm": 0.359375, "learning_rate": 1.2004898661052588e-07, "loss": 0.927, "step": 3635 }, { "epoch": 2.8663776113519903, "grad_norm": 0.33984375, "learning_rate": 1.1863529927217731e-07, "loss": 0.9498, "step": 3636 }, { "epoch": 2.86716594402838, "grad_norm": 0.341796875, "learning_rate": 1.1722993537367278e-07, "loss": 0.9323, "step": 3637 }, { "epoch": 2.8679542767047694, "grad_norm": 0.341796875, "learning_rate": 1.1583289609880311e-07, "loss": 0.9108, "step": 3638 }, { "epoch": 2.868742609381159, "grad_norm": 0.345703125, "learning_rate": 1.1444418262434587e-07, "loss": 0.9543, "step": 3639 }, { "epoch": 2.8695309420575486, "grad_norm": 0.333984375, "learning_rate": 1.1306379612006646e-07, "loss": 0.9135, "step": 3640 }, { "epoch": 2.8703192747339377, "grad_norm": 0.34375, "learning_rate": 1.1169173774871478e-07, "loss": 0.9555, "step": 3641 }, { "epoch": 2.8711076074103272, "grad_norm": 0.34765625, "learning_rate": 1.1032800866602633e-07, "loss": 0.9224, "step": 3642 }, { "epoch": 2.8718959400867163, "grad_norm": 0.34375, "learning_rate": 1.0897261002072223e-07, "loss": 0.8822, "step": 3643 }, { "epoch": 2.872684272763106, "grad_norm": 0.38671875, "learning_rate": 1.0762554295450367e-07, "loss": 0.9575, "step": 3644 }, { "epoch": 2.8734726054394955, "grad_norm": 0.345703125, "learning_rate": 1.062868086020552e-07, "loss": 0.9174, "step": 3645 }, { "epoch": 2.874260938115885, "grad_norm": 0.34765625, "learning_rate": 1.0495640809104257e-07, "loss": 0.938, "step": 3646 }, { "epoch": 2.8750492707922746, "grad_norm": 0.3359375, "learning_rate": 1.0363434254211269e-07, "loss": 0.9524, "step": 3647 }, { "epoch": 2.8758376034686637, "grad_norm": 0.337890625, "learning_rate": 1.0232061306888918e-07, "loss": 0.8967, "step": 3648 }, { "epoch": 2.8766259361450532, "grad_norm": 0.345703125, "learning_rate": 1.0101522077797354e-07, "loss": 0.9245, "step": 3649 }, { "epoch": 2.877414268821443, "grad_norm": 0.3359375, "learning_rate": 9.971816676894952e-08, "loss": 0.9166, "step": 3650 }, { "epoch": 2.878202601497832, "grad_norm": 0.345703125, "learning_rate": 9.842945213437094e-08, "loss": 0.9464, "step": 3651 }, { "epoch": 2.8789909341742215, "grad_norm": 0.345703125, "learning_rate": 9.714907795977169e-08, "loss": 0.9284, "step": 3652 }, { "epoch": 2.879779266850611, "grad_norm": 0.337890625, "learning_rate": 9.587704532365683e-08, "loss": 0.9523, "step": 3653 }, { "epoch": 2.8805675995270006, "grad_norm": 0.345703125, "learning_rate": 9.461335529750815e-08, "loss": 0.937, "step": 3654 }, { "epoch": 2.8813559322033897, "grad_norm": 0.3359375, "learning_rate": 9.335800894577862e-08, "loss": 0.925, "step": 3655 }, { "epoch": 2.8821442648797793, "grad_norm": 0.35546875, "learning_rate": 9.211100732589129e-08, "loss": 0.9473, "step": 3656 }, { "epoch": 2.882932597556169, "grad_norm": 0.35546875, "learning_rate": 9.08723514882437e-08, "loss": 0.9289, "step": 3657 }, { "epoch": 2.883720930232558, "grad_norm": 0.34375, "learning_rate": 8.964204247620012e-08, "loss": 0.9299, "step": 3658 }, { "epoch": 2.8845092629089475, "grad_norm": 0.337890625, "learning_rate": 8.842008132609603e-08, "loss": 0.9203, "step": 3659 }, { "epoch": 2.885297595585337, "grad_norm": 0.34765625, "learning_rate": 8.720646906723585e-08, "loss": 0.9164, "step": 3660 }, { "epoch": 2.8860859282617266, "grad_norm": 0.349609375, "learning_rate": 8.600120672188739e-08, "loss": 0.933, "step": 3661 }, { "epoch": 2.8868742609381157, "grad_norm": 0.330078125, "learning_rate": 8.480429530529077e-08, "loss": 0.8621, "step": 3662 }, { "epoch": 2.8876625936145053, "grad_norm": 0.3515625, "learning_rate": 8.36157358256473e-08, "loss": 0.9191, "step": 3663 }, { "epoch": 2.888450926290895, "grad_norm": 0.353515625, "learning_rate": 8.243552928412501e-08, "loss": 0.9349, "step": 3664 }, { "epoch": 2.889239258967284, "grad_norm": 0.341796875, "learning_rate": 8.126367667485535e-08, "loss": 0.9488, "step": 3665 }, { "epoch": 2.8900275916436735, "grad_norm": 0.33984375, "learning_rate": 8.010017898493316e-08, "loss": 0.9215, "step": 3666 }, { "epoch": 2.890815924320063, "grad_norm": 0.34375, "learning_rate": 7.89450371944167e-08, "loss": 0.9287, "step": 3667 }, { "epoch": 2.8916042569964526, "grad_norm": 0.33984375, "learning_rate": 7.779825227632321e-08, "loss": 0.9082, "step": 3668 }, { "epoch": 2.892392589672842, "grad_norm": 0.341796875, "learning_rate": 7.665982519663329e-08, "loss": 0.9375, "step": 3669 }, { "epoch": 2.8931809223492313, "grad_norm": 0.333984375, "learning_rate": 7.552975691428655e-08, "loss": 0.9337, "step": 3670 }, { "epoch": 2.893969255025621, "grad_norm": 0.34375, "learning_rate": 7.440804838117932e-08, "loss": 0.9132, "step": 3671 }, { "epoch": 2.89475758770201, "grad_norm": 0.3359375, "learning_rate": 7.329470054217024e-08, "loss": 0.9525, "step": 3672 }, { "epoch": 2.8955459203783995, "grad_norm": 0.3359375, "learning_rate": 7.218971433507471e-08, "loss": 0.911, "step": 3673 }, { "epoch": 2.896334253054789, "grad_norm": 0.33984375, "learning_rate": 7.109309069065928e-08, "loss": 0.9243, "step": 3674 }, { "epoch": 2.8971225857311786, "grad_norm": 0.33984375, "learning_rate": 7.000483053265506e-08, "loss": 0.9598, "step": 3675 }, { "epoch": 2.897910918407568, "grad_norm": 0.345703125, "learning_rate": 6.892493477774098e-08, "loss": 0.9077, "step": 3676 }, { "epoch": 2.8986992510839573, "grad_norm": 0.345703125, "learning_rate": 6.785340433555499e-08, "loss": 0.9469, "step": 3677 }, { "epoch": 2.899487583760347, "grad_norm": 0.33984375, "learning_rate": 6.679024010868617e-08, "loss": 0.8927, "step": 3678 }, { "epoch": 2.9002759164367364, "grad_norm": 0.33984375, "learning_rate": 6.573544299267709e-08, "loss": 0.9374, "step": 3679 }, { "epoch": 2.9010642491131255, "grad_norm": 0.3359375, "learning_rate": 6.468901387602367e-08, "loss": 0.897, "step": 3680 }, { "epoch": 2.901852581789515, "grad_norm": 0.34375, "learning_rate": 6.365095364016971e-08, "loss": 0.9474, "step": 3681 }, { "epoch": 2.9026409144659047, "grad_norm": 0.349609375, "learning_rate": 6.262126315951355e-08, "loss": 0.9017, "step": 3682 }, { "epoch": 2.9034292471422942, "grad_norm": 0.34765625, "learning_rate": 6.15999433014014e-08, "loss": 0.9406, "step": 3683 }, { "epoch": 2.9042175798186833, "grad_norm": 0.353515625, "learning_rate": 6.058699492612841e-08, "loss": 0.9328, "step": 3684 }, { "epoch": 2.905005912495073, "grad_norm": 0.345703125, "learning_rate": 5.958241888693872e-08, "loss": 0.9331, "step": 3685 }, { "epoch": 2.9057942451714625, "grad_norm": 0.341796875, "learning_rate": 5.858621603002434e-08, "loss": 0.8866, "step": 3686 }, { "epoch": 2.9065825778478516, "grad_norm": 0.341796875, "learning_rate": 5.759838719452404e-08, "loss": 0.9439, "step": 3687 }, { "epoch": 2.907370910524241, "grad_norm": 0.349609375, "learning_rate": 5.661893321252221e-08, "loss": 0.9276, "step": 3688 }, { "epoch": 2.9081592432006307, "grad_norm": 0.3359375, "learning_rate": 5.5647854909047786e-08, "loss": 0.8954, "step": 3689 }, { "epoch": 2.9089475758770202, "grad_norm": 0.341796875, "learning_rate": 5.468515310207867e-08, "loss": 0.9281, "step": 3690 }, { "epoch": 2.90973590855341, "grad_norm": 0.345703125, "learning_rate": 5.373082860253287e-08, "loss": 0.9583, "step": 3691 }, { "epoch": 2.910524241229799, "grad_norm": 0.345703125, "learning_rate": 5.2784882214274025e-08, "loss": 0.9512, "step": 3692 }, { "epoch": 2.9113125739061885, "grad_norm": 0.34375, "learning_rate": 5.184731473410698e-08, "loss": 0.9704, "step": 3693 }, { "epoch": 2.9121009065825776, "grad_norm": 0.33984375, "learning_rate": 5.091812695178e-08, "loss": 0.9464, "step": 3694 }, { "epoch": 2.912889239258967, "grad_norm": 0.333984375, "learning_rate": 4.999731964998256e-08, "loss": 0.9078, "step": 3695 }, { "epoch": 2.9136775719353567, "grad_norm": 0.345703125, "learning_rate": 4.9084893604344205e-08, "loss": 0.9453, "step": 3696 }, { "epoch": 2.9144659046117463, "grad_norm": 0.34375, "learning_rate": 4.818084958343572e-08, "loss": 0.9524, "step": 3697 }, { "epoch": 2.915254237288136, "grad_norm": 0.341796875, "learning_rate": 4.728518834876683e-08, "loss": 0.9377, "step": 3698 }, { "epoch": 2.916042569964525, "grad_norm": 0.337890625, "learning_rate": 4.639791065478738e-08, "loss": 0.9099, "step": 3699 }, { "epoch": 2.9168309026409145, "grad_norm": 0.34375, "learning_rate": 4.5519017248880635e-08, "loss": 0.9688, "step": 3700 }, { "epoch": 2.917619235317304, "grad_norm": 0.345703125, "learning_rate": 4.464850887137551e-08, "loss": 0.9222, "step": 3701 }, { "epoch": 2.918407567993693, "grad_norm": 0.361328125, "learning_rate": 4.3786386255531e-08, "loss": 0.9178, "step": 3702 }, { "epoch": 2.9191959006700827, "grad_norm": 0.361328125, "learning_rate": 4.293265012754511e-08, "loss": 0.9297, "step": 3703 }, { "epoch": 2.9199842333464723, "grad_norm": 0.35546875, "learning_rate": 4.2087301206552576e-08, "loss": 0.9363, "step": 3704 }, { "epoch": 2.920772566022862, "grad_norm": 0.345703125, "learning_rate": 4.1250340204619375e-08, "loss": 0.8911, "step": 3705 }, { "epoch": 2.921560898699251, "grad_norm": 0.337890625, "learning_rate": 4.042176782675267e-08, "loss": 0.9219, "step": 3706 }, { "epoch": 2.9223492313756405, "grad_norm": 0.33984375, "learning_rate": 3.960158477088749e-08, "loss": 0.9084, "step": 3707 }, { "epoch": 2.92313756405203, "grad_norm": 0.341796875, "learning_rate": 3.878979172789454e-08, "loss": 0.9577, "step": 3708 }, { "epoch": 2.923925896728419, "grad_norm": 0.353515625, "learning_rate": 3.798638938157684e-08, "loss": 0.9554, "step": 3709 }, { "epoch": 2.9247142294048087, "grad_norm": 0.333984375, "learning_rate": 3.719137840867082e-08, "loss": 0.885, "step": 3710 }, { "epoch": 2.9255025620811983, "grad_norm": 0.34375, "learning_rate": 3.640475947884303e-08, "loss": 0.9434, "step": 3711 }, { "epoch": 2.926290894757588, "grad_norm": 0.345703125, "learning_rate": 3.562653325469345e-08, "loss": 0.906, "step": 3712 }, { "epoch": 2.9270792274339774, "grad_norm": 0.341796875, "learning_rate": 3.485670039174882e-08, "loss": 0.9249, "step": 3713 }, { "epoch": 2.9278675601103665, "grad_norm": 0.33984375, "learning_rate": 3.4095261538468204e-08, "loss": 0.9091, "step": 3714 }, { "epoch": 2.928655892786756, "grad_norm": 0.337890625, "learning_rate": 3.3342217336239656e-08, "loss": 0.9028, "step": 3715 }, { "epoch": 2.929444225463145, "grad_norm": 0.357421875, "learning_rate": 3.259756841938244e-08, "loss": 0.9548, "step": 3716 }, { "epoch": 2.9302325581395348, "grad_norm": 0.34375, "learning_rate": 3.186131541513926e-08, "loss": 0.9272, "step": 3717 }, { "epoch": 2.9310208908159243, "grad_norm": 0.333984375, "learning_rate": 3.1133458943684024e-08, "loss": 0.91, "step": 3718 }, { "epoch": 2.931809223492314, "grad_norm": 0.3515625, "learning_rate": 3.0413999618117416e-08, "loss": 0.9623, "step": 3719 }, { "epoch": 2.9325975561687034, "grad_norm": 0.34375, "learning_rate": 2.9702938044468e-08, "loss": 0.8924, "step": 3720 }, { "epoch": 2.9333858888450925, "grad_norm": 0.341796875, "learning_rate": 2.900027482168777e-08, "loss": 0.9413, "step": 3721 }, { "epoch": 2.934174221521482, "grad_norm": 0.34375, "learning_rate": 2.8306010541655493e-08, "loss": 0.922, "step": 3722 }, { "epoch": 2.9349625541978717, "grad_norm": 0.3515625, "learning_rate": 2.7620145789177823e-08, "loss": 0.9446, "step": 3723 }, { "epoch": 2.935750886874261, "grad_norm": 0.34375, "learning_rate": 2.6942681141981508e-08, "loss": 0.9272, "step": 3724 }, { "epoch": 2.9365392195506503, "grad_norm": 0.341796875, "learning_rate": 2.6273617170722298e-08, "loss": 0.9498, "step": 3725 }, { "epoch": 2.93732755222704, "grad_norm": 0.3359375, "learning_rate": 2.5612954438977155e-08, "loss": 0.9352, "step": 3726 }, { "epoch": 2.9381158849034295, "grad_norm": 0.341796875, "learning_rate": 2.496069350324537e-08, "loss": 0.8997, "step": 3727 }, { "epoch": 2.9389042175798186, "grad_norm": 0.345703125, "learning_rate": 2.4316834912951892e-08, "loss": 0.9342, "step": 3728 }, { "epoch": 2.939692550256208, "grad_norm": 0.349609375, "learning_rate": 2.368137921044289e-08, "loss": 0.9345, "step": 3729 }, { "epoch": 2.9404808829325977, "grad_norm": 0.34375, "learning_rate": 2.305432693098464e-08, "loss": 0.9578, "step": 3730 }, { "epoch": 2.941269215608987, "grad_norm": 0.341796875, "learning_rate": 2.243567860276796e-08, "loss": 0.9166, "step": 3731 }, { "epoch": 2.9420575482853764, "grad_norm": 0.341796875, "learning_rate": 2.1825434746903794e-08, "loss": 0.9142, "step": 3732 }, { "epoch": 2.942845880961766, "grad_norm": 0.33984375, "learning_rate": 2.1223595877420954e-08, "loss": 0.9535, "step": 3733 }, { "epoch": 2.9436342136381555, "grad_norm": 0.33984375, "learning_rate": 2.063016250127281e-08, "loss": 0.9334, "step": 3734 }, { "epoch": 2.9444225463145446, "grad_norm": 0.3359375, "learning_rate": 2.0045135118328397e-08, "loss": 0.9429, "step": 3735 }, { "epoch": 2.945210878990934, "grad_norm": 0.349609375, "learning_rate": 1.9468514221380185e-08, "loss": 0.9844, "step": 3736 }, { "epoch": 2.9459992116673237, "grad_norm": 0.349609375, "learning_rate": 1.890030029613521e-08, "loss": 0.948, "step": 3737 }, { "epoch": 2.946787544343713, "grad_norm": 0.37890625, "learning_rate": 1.8340493821222827e-08, "loss": 0.9297, "step": 3738 }, { "epoch": 2.9475758770201024, "grad_norm": 0.333984375, "learning_rate": 1.778909526818806e-08, "loss": 0.9174, "step": 3739 }, { "epoch": 2.948364209696492, "grad_norm": 0.34375, "learning_rate": 1.7246105101493825e-08, "loss": 0.9128, "step": 3740 }, { "epoch": 2.9491525423728815, "grad_norm": 0.33984375, "learning_rate": 1.671152377852092e-08, "loss": 0.9084, "step": 3741 }, { "epoch": 2.949940875049271, "grad_norm": 0.33984375, "learning_rate": 1.6185351749569146e-08, "loss": 0.9461, "step": 3742 }, { "epoch": 2.95072920772566, "grad_norm": 0.341796875, "learning_rate": 1.5667589457849518e-08, "loss": 0.9485, "step": 3743 }, { "epoch": 2.9515175404020497, "grad_norm": 0.345703125, "learning_rate": 1.5158237339494285e-08, "loss": 0.9602, "step": 3744 }, { "epoch": 2.952305873078439, "grad_norm": 0.33984375, "learning_rate": 1.4657295823549134e-08, "loss": 0.8989, "step": 3745 }, { "epoch": 2.9530942057548284, "grad_norm": 0.34375, "learning_rate": 1.4164765331976527e-08, "loss": 0.9421, "step": 3746 }, { "epoch": 2.953882538431218, "grad_norm": 0.345703125, "learning_rate": 1.3680646279651266e-08, "loss": 0.9695, "step": 3747 }, { "epoch": 2.9546708711076075, "grad_norm": 0.330078125, "learning_rate": 1.320493907436604e-08, "loss": 0.8624, "step": 3748 }, { "epoch": 2.955459203783997, "grad_norm": 0.341796875, "learning_rate": 1.2737644116826986e-08, "loss": 0.9056, "step": 3749 }, { "epoch": 2.956247536460386, "grad_norm": 0.3671875, "learning_rate": 1.2278761800653682e-08, "loss": 0.9189, "step": 3750 }, { "epoch": 2.9570358691367757, "grad_norm": 0.34375, "learning_rate": 1.1828292512380269e-08, "loss": 0.9452, "step": 3751 }, { "epoch": 2.9578242018131653, "grad_norm": 0.34765625, "learning_rate": 1.1386236631452108e-08, "loss": 0.9627, "step": 3752 }, { "epoch": 2.9586125344895544, "grad_norm": 0.34375, "learning_rate": 1.0952594530230232e-08, "loss": 0.9259, "step": 3753 }, { "epoch": 2.959400867165944, "grad_norm": 0.341796875, "learning_rate": 1.0527366573986897e-08, "loss": 0.928, "step": 3754 }, { "epoch": 2.9601891998423335, "grad_norm": 0.3359375, "learning_rate": 1.0110553120908917e-08, "loss": 0.9677, "step": 3755 }, { "epoch": 2.960977532518723, "grad_norm": 0.337890625, "learning_rate": 9.702154522092111e-09, "loss": 0.9286, "step": 3756 }, { "epoch": 2.961765865195112, "grad_norm": 0.345703125, "learning_rate": 9.302171121546855e-09, "loss": 0.9469, "step": 3757 }, { "epoch": 2.9625541978715018, "grad_norm": 0.3515625, "learning_rate": 8.91060325619253e-09, "loss": 0.9482, "step": 3758 }, { "epoch": 2.9633425305478913, "grad_norm": 0.33984375, "learning_rate": 8.527451255863073e-09, "loss": 0.938, "step": 3759 }, { "epoch": 2.9641308632242804, "grad_norm": 0.3515625, "learning_rate": 8.152715443300318e-09, "loss": 0.9594, "step": 3760 }, { "epoch": 2.96491919590067, "grad_norm": 0.33984375, "learning_rate": 7.786396134158437e-09, "loss": 0.9362, "step": 3761 }, { "epoch": 2.9657075285770595, "grad_norm": 0.34375, "learning_rate": 7.42849363700282e-09, "loss": 0.9284, "step": 3762 }, { "epoch": 2.966495861253449, "grad_norm": 0.34375, "learning_rate": 7.079008253306763e-09, "loss": 0.9077, "step": 3763 }, { "epoch": 2.9672841939298387, "grad_norm": 0.34375, "learning_rate": 6.73794027745478e-09, "loss": 0.9396, "step": 3764 }, { "epoch": 2.968072526606228, "grad_norm": 0.345703125, "learning_rate": 6.405289996741504e-09, "loss": 0.9246, "step": 3765 }, { "epoch": 2.9688608592826173, "grad_norm": 0.34375, "learning_rate": 6.081057691370573e-09, "loss": 0.9083, "step": 3766 }, { "epoch": 2.9696491919590065, "grad_norm": 0.337890625, "learning_rate": 5.7652436344546315e-09, "loss": 0.9081, "step": 3767 }, { "epoch": 2.970437524635396, "grad_norm": 0.345703125, "learning_rate": 5.457848092015328e-09, "loss": 0.924, "step": 3768 }, { "epoch": 2.9712258573117856, "grad_norm": 0.34765625, "learning_rate": 5.158871322984426e-09, "loss": 0.896, "step": 3769 }, { "epoch": 2.972014189988175, "grad_norm": 0.34375, "learning_rate": 4.868313579200479e-09, "loss": 0.9457, "step": 3770 }, { "epoch": 2.9728025226645647, "grad_norm": 0.34375, "learning_rate": 4.586175105411039e-09, "loss": 0.9198, "step": 3771 }, { "epoch": 2.973590855340954, "grad_norm": 0.3515625, "learning_rate": 4.312456139271559e-09, "loss": 0.9362, "step": 3772 }, { "epoch": 2.9743791880173434, "grad_norm": 0.341796875, "learning_rate": 4.0471569113453844e-09, "loss": 0.9659, "step": 3773 }, { "epoch": 2.975167520693733, "grad_norm": 0.349609375, "learning_rate": 3.790277645104867e-09, "loss": 0.937, "step": 3774 }, { "epoch": 2.975955853370122, "grad_norm": 0.34375, "learning_rate": 3.541818556928034e-09, "loss": 0.9428, "step": 3775 }, { "epoch": 2.9767441860465116, "grad_norm": 0.349609375, "learning_rate": 3.3017798561030268e-09, "loss": 0.8976, "step": 3776 }, { "epoch": 2.977532518722901, "grad_norm": 0.341796875, "learning_rate": 3.0701617448203325e-09, "loss": 0.8983, "step": 3777 }, { "epoch": 2.9783208513992907, "grad_norm": 0.345703125, "learning_rate": 2.8469644181827736e-09, "loss": 0.9442, "step": 3778 }, { "epoch": 2.97910918407568, "grad_norm": 0.341796875, "learning_rate": 2.632188064196628e-09, "loss": 0.9387, "step": 3779 }, { "epoch": 2.9798975167520694, "grad_norm": 0.34765625, "learning_rate": 2.4258328637771776e-09, "loss": 0.9264, "step": 3780 }, { "epoch": 2.980685849428459, "grad_norm": 0.341796875, "learning_rate": 2.2278989907442706e-09, "loss": 0.9314, "step": 3781 }, { "epoch": 2.981474182104848, "grad_norm": 0.337890625, "learning_rate": 2.0383866118245388e-09, "loss": 0.8992, "step": 3782 }, { "epoch": 2.9822625147812376, "grad_norm": 0.3359375, "learning_rate": 1.8572958866514e-09, "loss": 0.9413, "step": 3783 }, { "epoch": 2.983050847457627, "grad_norm": 0.34375, "learning_rate": 1.6846269677650574e-09, "loss": 0.9453, "step": 3784 }, { "epoch": 2.9838391801340167, "grad_norm": 0.345703125, "learning_rate": 1.5203800006102776e-09, "loss": 0.9425, "step": 3785 }, { "epoch": 2.984627512810406, "grad_norm": 0.3515625, "learning_rate": 1.3645551235386134e-09, "loss": 0.9388, "step": 3786 }, { "epoch": 2.9854158454867954, "grad_norm": 0.33984375, "learning_rate": 1.217152467806182e-09, "loss": 0.9261, "step": 3787 }, { "epoch": 2.986204178163185, "grad_norm": 0.3359375, "learning_rate": 1.0781721575781057e-09, "loss": 0.9175, "step": 3788 }, { "epoch": 2.986992510839574, "grad_norm": 0.345703125, "learning_rate": 9.476143099207414e-10, "loss": 0.929, "step": 3789 }, { "epoch": 2.9877808435159636, "grad_norm": 0.345703125, "learning_rate": 8.254790348072306e-10, "loss": 0.925, "step": 3790 }, { "epoch": 2.988569176192353, "grad_norm": 0.337890625, "learning_rate": 7.117664351186104e-10, "loss": 0.9175, "step": 3791 }, { "epoch": 2.9893575088687427, "grad_norm": 0.34765625, "learning_rate": 6.064766066382622e-10, "loss": 0.9493, "step": 3792 }, { "epoch": 2.9901458415451323, "grad_norm": 0.34375, "learning_rate": 5.096096380552417e-10, "loss": 0.9385, "step": 3793 }, { "epoch": 2.9909341742215214, "grad_norm": 0.34375, "learning_rate": 4.211656109642803e-10, "loss": 0.9369, "step": 3794 }, { "epoch": 2.991722506897911, "grad_norm": 0.337890625, "learning_rate": 3.4114459986689386e-10, "loss": 0.9301, "step": 3795 }, { "epoch": 2.9925108395743, "grad_norm": 0.345703125, "learning_rate": 2.6954667216472217e-10, "loss": 0.9248, "step": 3796 }, { "epoch": 2.9932991722506896, "grad_norm": 0.34765625, "learning_rate": 2.063718881695209e-10, "loss": 0.9621, "step": 3797 }, { "epoch": 2.994087504927079, "grad_norm": 0.353515625, "learning_rate": 1.5162030109538982e-10, "loss": 0.9228, "step": 3798 }, { "epoch": 2.9948758376034688, "grad_norm": 0.333984375, "learning_rate": 1.0529195706099338e-10, "loss": 0.9257, "step": 3799 }, { "epoch": 2.9956641702798583, "grad_norm": 0.353515625, "learning_rate": 6.738689509067087e-11, "loss": 0.9282, "step": 3800 }, { "epoch": 2.9964525029562474, "grad_norm": 0.341796875, "learning_rate": 3.790514711332627e-11, "loss": 0.9546, "step": 3801 }, { "epoch": 2.997240835632637, "grad_norm": 0.34375, "learning_rate": 1.6846737963538418e-11, "loss": 0.943, "step": 3802 }, { "epoch": 2.9980291683090265, "grad_norm": 0.345703125, "learning_rate": 4.211685378230357e-12, "loss": 0.9316, "step": 3803 }, { "epoch": 2.9988175009854157, "grad_norm": 0.349609375, "learning_rate": 0.0, "loss": 0.955, "step": 3804 }, { "epoch": 2.9988175009854157, "eval_loss": 0.9453620314598083, "eval_runtime": 614.9836, "eval_samples_per_second": 26.723, "eval_steps_per_second": 1.672, "step": 3804 }, { "epoch": 2.9988175009854157, "step": 3804, "total_flos": 3.95328908667346e+19, "train_loss": 0.9601784737547615, "train_runtime": 73759.6406, "train_samples_per_second": 6.603, "train_steps_per_second": 0.052 } ], "logging_steps": 1, "max_steps": 3804, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.95328908667346e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }