diff --git "a/trainer_state.json" "b/trainer_state.json" deleted file mode 100644--- "a/trainer_state.json" +++ /dev/null @@ -1,21616 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 2.995172609630441, - "eval_steps": 500, - "global_step": 3081, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.000973393900064893, - "grad_norm": 3.8125, - "learning_rate": 5.000000000000001e-07, - "loss": 1.9271, - "step": 1 - }, - { - "epoch": 0.001946787800129786, - "grad_norm": 3.8125, - "learning_rate": 1.0000000000000002e-06, - "loss": 1.947, - "step": 2 - }, - { - "epoch": 0.0029201817001946787, - "grad_norm": 3.5625, - "learning_rate": 1.5e-06, - "loss": 1.8864, - "step": 3 - }, - { - "epoch": 0.003893575600259572, - "grad_norm": 3.546875, - "learning_rate": 2.0000000000000003e-06, - "loss": 1.9048, - "step": 4 - }, - { - "epoch": 0.004866969500324465, - "grad_norm": 3.40625, - "learning_rate": 2.5e-06, - "loss": 1.9077, - "step": 5 - }, - { - "epoch": 0.005840363400389357, - "grad_norm": 3.0, - "learning_rate": 3e-06, - "loss": 1.9014, - "step": 6 - }, - { - "epoch": 0.00681375730045425, - "grad_norm": 2.78125, - "learning_rate": 3.5e-06, - "loss": 1.912, - "step": 7 - }, - { - "epoch": 0.007787151200519144, - "grad_norm": 2.34375, - "learning_rate": 4.000000000000001e-06, - "loss": 1.8813, - "step": 8 - }, - { - "epoch": 0.008760545100584036, - "grad_norm": 2.109375, - "learning_rate": 4.5e-06, - "loss": 1.8683, - "step": 9 - }, - { - "epoch": 0.00973393900064893, - "grad_norm": 1.8125, - "learning_rate": 5e-06, - "loss": 1.9061, - "step": 10 - }, - { - "epoch": 0.010707332900713823, - "grad_norm": 1.765625, - "learning_rate": 4.999998691872464e-06, - "loss": 1.8596, - "step": 11 - }, - { - "epoch": 0.011680726800778715, - "grad_norm": 1.7734375, - "learning_rate": 4.9999947674912255e-06, - "loss": 1.9086, - "step": 12 - }, - { - "epoch": 0.012654120700843608, - "grad_norm": 1.71875, - "learning_rate": 4.99998822686039e-06, - "loss": 1.8465, - "step": 13 - }, - { - "epoch": 0.0136275146009085, - "grad_norm": 1.609375, - "learning_rate": 4.999979069986803e-06, - "loss": 1.835, - "step": 14 - }, - { - "epoch": 0.014600908500973394, - "grad_norm": 1.5390625, - "learning_rate": 4.999967296880048e-06, - "loss": 1.8464, - "step": 15 - }, - { - "epoch": 0.015574302401038288, - "grad_norm": 1.375, - "learning_rate": 4.999952907552444e-06, - "loss": 1.8715, - "step": 16 - }, - { - "epoch": 0.01654769630110318, - "grad_norm": 1.265625, - "learning_rate": 4.999935902019051e-06, - "loss": 1.8614, - "step": 17 - }, - { - "epoch": 0.01752109020116807, - "grad_norm": 1.15625, - "learning_rate": 4.999916280297664e-06, - "loss": 1.8847, - "step": 18 - }, - { - "epoch": 0.018494484101232965, - "grad_norm": 1.0625, - "learning_rate": 4.999894042408818e-06, - "loss": 1.8204, - "step": 19 - }, - { - "epoch": 0.01946787800129786, - "grad_norm": 1.046875, - "learning_rate": 4.999869188375784e-06, - "loss": 1.8313, - "step": 20 - }, - { - "epoch": 0.020441271901362752, - "grad_norm": 1.015625, - "learning_rate": 4.999841718224574e-06, - "loss": 1.8221, - "step": 21 - }, - { - "epoch": 0.021414665801427646, - "grad_norm": 0.984375, - "learning_rate": 4.999811631983934e-06, - "loss": 1.8274, - "step": 22 - }, - { - "epoch": 0.022388059701492536, - "grad_norm": 0.95703125, - "learning_rate": 4.999778929685348e-06, - "loss": 1.8124, - "step": 23 - }, - { - "epoch": 0.02336145360155743, - "grad_norm": 0.94921875, - "learning_rate": 4.999743611363042e-06, - "loss": 1.8222, - "step": 24 - }, - { - "epoch": 0.024334847501622323, - "grad_norm": 0.91015625, - "learning_rate": 4.9997056770539745e-06, - "loss": 1.8039, - "step": 25 - }, - { - "epoch": 0.025308241401687217, - "grad_norm": 0.90625, - "learning_rate": 4.9996651267978446e-06, - "loss": 1.7916, - "step": 26 - }, - { - "epoch": 0.02628163530175211, - "grad_norm": 0.8671875, - "learning_rate": 4.999621960637089e-06, - "loss": 1.7906, - "step": 27 - }, - { - "epoch": 0.027255029201817, - "grad_norm": 0.8359375, - "learning_rate": 4.99957617861688e-06, - "loss": 1.8106, - "step": 28 - }, - { - "epoch": 0.028228423101881894, - "grad_norm": 0.828125, - "learning_rate": 4.999527780785127e-06, - "loss": 1.7894, - "step": 29 - }, - { - "epoch": 0.029201817001946788, - "grad_norm": 0.8046875, - "learning_rate": 4.9994767671924826e-06, - "loss": 1.7524, - "step": 30 - }, - { - "epoch": 0.03017521090201168, - "grad_norm": 0.80078125, - "learning_rate": 4.999423137892329e-06, - "loss": 1.809, - "step": 31 - }, - { - "epoch": 0.031148604802076575, - "grad_norm": 0.78125, - "learning_rate": 4.9993668929407916e-06, - "loss": 1.7914, - "step": 32 - }, - { - "epoch": 0.03212199870214147, - "grad_norm": 0.75390625, - "learning_rate": 4.999308032396729e-06, - "loss": 1.7832, - "step": 33 - }, - { - "epoch": 0.03309539260220636, - "grad_norm": 0.75390625, - "learning_rate": 4.999246556321741e-06, - "loss": 1.7584, - "step": 34 - }, - { - "epoch": 0.03406878650227125, - "grad_norm": 0.74609375, - "learning_rate": 4.999182464780161e-06, - "loss": 1.7985, - "step": 35 - }, - { - "epoch": 0.03504218040233614, - "grad_norm": 0.7421875, - "learning_rate": 4.99911575783906e-06, - "loss": 1.7827, - "step": 36 - }, - { - "epoch": 0.036015574302401036, - "grad_norm": 0.71875, - "learning_rate": 4.99904643556825e-06, - "loss": 1.7835, - "step": 37 - }, - { - "epoch": 0.03698896820246593, - "grad_norm": 0.70703125, - "learning_rate": 4.998974498040273e-06, - "loss": 1.7315, - "step": 38 - }, - { - "epoch": 0.037962362102530824, - "grad_norm": 0.71484375, - "learning_rate": 4.998899945330416e-06, - "loss": 1.7871, - "step": 39 - }, - { - "epoch": 0.03893575600259572, - "grad_norm": 0.69921875, - "learning_rate": 4.9988227775166954e-06, - "loss": 1.7366, - "step": 40 - }, - { - "epoch": 0.03990914990266061, - "grad_norm": 0.6953125, - "learning_rate": 4.9987429946798684e-06, - "loss": 1.7826, - "step": 41 - }, - { - "epoch": 0.040882543802725504, - "grad_norm": 0.6796875, - "learning_rate": 4.998660596903428e-06, - "loss": 1.772, - "step": 42 - }, - { - "epoch": 0.0418559377027904, - "grad_norm": 0.67578125, - "learning_rate": 4.998575584273604e-06, - "loss": 1.7348, - "step": 43 - }, - { - "epoch": 0.04282933160285529, - "grad_norm": 0.66015625, - "learning_rate": 4.998487956879361e-06, - "loss": 1.755, - "step": 44 - }, - { - "epoch": 0.04380272550292018, - "grad_norm": 0.65625, - "learning_rate": 4.998397714812403e-06, - "loss": 1.7603, - "step": 45 - }, - { - "epoch": 0.04477611940298507, - "grad_norm": 0.65625, - "learning_rate": 4.9983048581671676e-06, - "loss": 1.7151, - "step": 46 - }, - { - "epoch": 0.045749513303049966, - "grad_norm": 0.64453125, - "learning_rate": 4.998209387040829e-06, - "loss": 1.7456, - "step": 47 - }, - { - "epoch": 0.04672290720311486, - "grad_norm": 0.64453125, - "learning_rate": 4.998111301533299e-06, - "loss": 1.7458, - "step": 48 - }, - { - "epoch": 0.04769630110317975, - "grad_norm": 0.6484375, - "learning_rate": 4.998010601747223e-06, - "loss": 1.7416, - "step": 49 - }, - { - "epoch": 0.04866969500324465, - "grad_norm": 0.6328125, - "learning_rate": 4.997907287787985e-06, - "loss": 1.7257, - "step": 50 - }, - { - "epoch": 0.04964308890330954, - "grad_norm": 0.62890625, - "learning_rate": 4.997801359763702e-06, - "loss": 1.742, - "step": 51 - }, - { - "epoch": 0.050616482803374434, - "grad_norm": 0.63671875, - "learning_rate": 4.997692817785229e-06, - "loss": 1.7521, - "step": 52 - }, - { - "epoch": 0.05158987670343933, - "grad_norm": 0.62109375, - "learning_rate": 4.997581661966154e-06, - "loss": 1.7133, - "step": 53 - }, - { - "epoch": 0.05256327060350422, - "grad_norm": 0.61328125, - "learning_rate": 4.997467892422804e-06, - "loss": 1.7177, - "step": 54 - }, - { - "epoch": 0.05353666450356911, - "grad_norm": 0.61328125, - "learning_rate": 4.997351509274236e-06, - "loss": 1.7096, - "step": 55 - }, - { - "epoch": 0.054510058403634, - "grad_norm": 0.609375, - "learning_rate": 4.997232512642248e-06, - "loss": 1.7169, - "step": 56 - }, - { - "epoch": 0.055483452303698895, - "grad_norm": 0.609375, - "learning_rate": 4.997110902651368e-06, - "loss": 1.7097, - "step": 57 - }, - { - "epoch": 0.05645684620376379, - "grad_norm": 0.61328125, - "learning_rate": 4.996986679428863e-06, - "loss": 1.696, - "step": 58 - }, - { - "epoch": 0.05743024010382868, - "grad_norm": 0.60546875, - "learning_rate": 4.996859843104732e-06, - "loss": 1.7385, - "step": 59 - }, - { - "epoch": 0.058403634003893576, - "grad_norm": 0.6015625, - "learning_rate": 4.9967303938117095e-06, - "loss": 1.6892, - "step": 60 - }, - { - "epoch": 0.05937702790395847, - "grad_norm": 0.59375, - "learning_rate": 4.9965983316852655e-06, - "loss": 1.7318, - "step": 61 - }, - { - "epoch": 0.06035042180402336, - "grad_norm": 0.58984375, - "learning_rate": 4.996463656863601e-06, - "loss": 1.7081, - "step": 62 - }, - { - "epoch": 0.06132381570408826, - "grad_norm": 0.5859375, - "learning_rate": 4.996326369487654e-06, - "loss": 1.7108, - "step": 63 - }, - { - "epoch": 0.06229720960415315, - "grad_norm": 0.59375, - "learning_rate": 4.996186469701098e-06, - "loss": 1.7007, - "step": 64 - }, - { - "epoch": 0.06327060350421804, - "grad_norm": 0.5859375, - "learning_rate": 4.996043957650337e-06, - "loss": 1.7047, - "step": 65 - }, - { - "epoch": 0.06424399740428294, - "grad_norm": 0.58984375, - "learning_rate": 4.99589883348451e-06, - "loss": 1.6933, - "step": 66 - }, - { - "epoch": 0.06521739130434782, - "grad_norm": 0.578125, - "learning_rate": 4.99575109735549e-06, - "loss": 1.7098, - "step": 67 - }, - { - "epoch": 0.06619078520441272, - "grad_norm": 0.58203125, - "learning_rate": 4.995600749417883e-06, - "loss": 1.7075, - "step": 68 - }, - { - "epoch": 0.06716417910447761, - "grad_norm": 0.5859375, - "learning_rate": 4.9954477898290285e-06, - "loss": 1.7302, - "step": 69 - }, - { - "epoch": 0.0681375730045425, - "grad_norm": 0.57421875, - "learning_rate": 4.9952922187490005e-06, - "loss": 1.6886, - "step": 70 - }, - { - "epoch": 0.0691109669046074, - "grad_norm": 0.56640625, - "learning_rate": 4.995134036340602e-06, - "loss": 1.6862, - "step": 71 - }, - { - "epoch": 0.07008436080467229, - "grad_norm": 0.5703125, - "learning_rate": 4.994973242769372e-06, - "loss": 1.698, - "step": 72 - }, - { - "epoch": 0.07105775470473719, - "grad_norm": 0.56640625, - "learning_rate": 4.994809838203582e-06, - "loss": 1.6917, - "step": 73 - }, - { - "epoch": 0.07203114860480207, - "grad_norm": 0.5703125, - "learning_rate": 4.9946438228142345e-06, - "loss": 1.6791, - "step": 74 - }, - { - "epoch": 0.07300454250486697, - "grad_norm": 0.5703125, - "learning_rate": 4.994475196775066e-06, - "loss": 1.6923, - "step": 75 - }, - { - "epoch": 0.07397793640493186, - "grad_norm": 0.5625, - "learning_rate": 4.9943039602625435e-06, - "loss": 1.6791, - "step": 76 - }, - { - "epoch": 0.07495133030499676, - "grad_norm": 0.59765625, - "learning_rate": 4.994130113455865e-06, - "loss": 1.7237, - "step": 77 - }, - { - "epoch": 0.07592472420506165, - "grad_norm": 0.5703125, - "learning_rate": 4.9939536565369625e-06, - "loss": 1.6223, - "step": 78 - }, - { - "epoch": 0.07689811810512655, - "grad_norm": 0.55078125, - "learning_rate": 4.9937745896905e-06, - "loss": 1.6775, - "step": 79 - }, - { - "epoch": 0.07787151200519143, - "grad_norm": 0.55859375, - "learning_rate": 4.993592913103868e-06, - "loss": 1.6915, - "step": 80 - }, - { - "epoch": 0.07884490590525632, - "grad_norm": 0.5625, - "learning_rate": 4.993408626967196e-06, - "loss": 1.6782, - "step": 81 - }, - { - "epoch": 0.07981829980532122, - "grad_norm": 0.54296875, - "learning_rate": 4.993221731473335e-06, - "loss": 1.6581, - "step": 82 - }, - { - "epoch": 0.08079169370538611, - "grad_norm": 0.546875, - "learning_rate": 4.993032226817874e-06, - "loss": 1.6621, - "step": 83 - }, - { - "epoch": 0.08176508760545101, - "grad_norm": 0.5625, - "learning_rate": 4.992840113199131e-06, - "loss": 1.667, - "step": 84 - }, - { - "epoch": 0.0827384815055159, - "grad_norm": 0.55078125, - "learning_rate": 4.992645390818151e-06, - "loss": 1.6677, - "step": 85 - }, - { - "epoch": 0.0837118754055808, - "grad_norm": 0.55859375, - "learning_rate": 4.992448059878713e-06, - "loss": 1.6475, - "step": 86 - }, - { - "epoch": 0.08468526930564568, - "grad_norm": 0.546875, - "learning_rate": 4.992248120587323e-06, - "loss": 1.6859, - "step": 87 - }, - { - "epoch": 0.08565866320571058, - "grad_norm": 0.55078125, - "learning_rate": 4.992045573153218e-06, - "loss": 1.6847, - "step": 88 - }, - { - "epoch": 0.08663205710577547, - "grad_norm": 0.54296875, - "learning_rate": 4.9918404177883655e-06, - "loss": 1.6697, - "step": 89 - }, - { - "epoch": 0.08760545100584036, - "grad_norm": 0.546875, - "learning_rate": 4.99163265470746e-06, - "loss": 1.6756, - "step": 90 - }, - { - "epoch": 0.08857884490590526, - "grad_norm": 0.5390625, - "learning_rate": 4.991422284127927e-06, - "loss": 1.6589, - "step": 91 - }, - { - "epoch": 0.08955223880597014, - "grad_norm": 0.54296875, - "learning_rate": 4.991209306269918e-06, - "loss": 1.6587, - "step": 92 - }, - { - "epoch": 0.09052563270603504, - "grad_norm": 0.55078125, - "learning_rate": 4.990993721356317e-06, - "loss": 1.6607, - "step": 93 - }, - { - "epoch": 0.09149902660609993, - "grad_norm": 0.546875, - "learning_rate": 4.990775529612732e-06, - "loss": 1.6582, - "step": 94 - }, - { - "epoch": 0.09247242050616483, - "grad_norm": 0.5390625, - "learning_rate": 4.990554731267502e-06, - "loss": 1.669, - "step": 95 - }, - { - "epoch": 0.09344581440622972, - "grad_norm": 0.53515625, - "learning_rate": 4.990331326551693e-06, - "loss": 1.6586, - "step": 96 - }, - { - "epoch": 0.09441920830629462, - "grad_norm": 0.53515625, - "learning_rate": 4.9901053156990984e-06, - "loss": 1.6385, - "step": 97 - }, - { - "epoch": 0.0953926022063595, - "grad_norm": 0.51953125, - "learning_rate": 4.9898766989462385e-06, - "loss": 1.6393, - "step": 98 - }, - { - "epoch": 0.0963659961064244, - "grad_norm": 0.546875, - "learning_rate": 4.989645476532362e-06, - "loss": 1.6478, - "step": 99 - }, - { - "epoch": 0.0973393900064893, - "grad_norm": 0.5234375, - "learning_rate": 4.9894116486994425e-06, - "loss": 1.648, - "step": 100 - }, - { - "epoch": 0.09831278390655418, - "grad_norm": 0.53515625, - "learning_rate": 4.9891752156921835e-06, - "loss": 1.6419, - "step": 101 - }, - { - "epoch": 0.09928617780661908, - "grad_norm": 0.52734375, - "learning_rate": 4.988936177758011e-06, - "loss": 1.6285, - "step": 102 - }, - { - "epoch": 0.10025957170668397, - "grad_norm": 0.5390625, - "learning_rate": 4.9886945351470775e-06, - "loss": 1.6654, - "step": 103 - }, - { - "epoch": 0.10123296560674887, - "grad_norm": 0.5234375, - "learning_rate": 4.988450288112265e-06, - "loss": 1.6355, - "step": 104 - }, - { - "epoch": 0.10220635950681375, - "grad_norm": 0.53125, - "learning_rate": 4.988203436909177e-06, - "loss": 1.6376, - "step": 105 - }, - { - "epoch": 0.10317975340687865, - "grad_norm": 0.5390625, - "learning_rate": 4.987953981796144e-06, - "loss": 1.6474, - "step": 106 - }, - { - "epoch": 0.10415314730694354, - "grad_norm": 0.51953125, - "learning_rate": 4.987701923034222e-06, - "loss": 1.641, - "step": 107 - }, - { - "epoch": 0.10512654120700844, - "grad_norm": 0.5234375, - "learning_rate": 4.98744726088719e-06, - "loss": 1.6212, - "step": 108 - }, - { - "epoch": 0.10609993510707333, - "grad_norm": 0.5234375, - "learning_rate": 4.987189995621553e-06, - "loss": 1.6218, - "step": 109 - }, - { - "epoch": 0.10707332900713822, - "grad_norm": 0.51953125, - "learning_rate": 4.9869301275065394e-06, - "loss": 1.6329, - "step": 110 - }, - { - "epoch": 0.10804672290720312, - "grad_norm": 0.5234375, - "learning_rate": 4.986667656814102e-06, - "loss": 1.6328, - "step": 111 - }, - { - "epoch": 0.109020116807268, - "grad_norm": 0.515625, - "learning_rate": 4.9864025838189165e-06, - "loss": 1.6055, - "step": 112 - }, - { - "epoch": 0.1099935107073329, - "grad_norm": 0.51953125, - "learning_rate": 4.986134908798383e-06, - "loss": 1.6214, - "step": 113 - }, - { - "epoch": 0.11096690460739779, - "grad_norm": 0.51953125, - "learning_rate": 4.985864632032623e-06, - "loss": 1.6265, - "step": 114 - }, - { - "epoch": 0.11194029850746269, - "grad_norm": 0.51953125, - "learning_rate": 4.985591753804483e-06, - "loss": 1.6191, - "step": 115 - }, - { - "epoch": 0.11291369240752758, - "grad_norm": 0.5078125, - "learning_rate": 4.985316274399529e-06, - "loss": 1.615, - "step": 116 - }, - { - "epoch": 0.11388708630759248, - "grad_norm": 0.51171875, - "learning_rate": 4.985038194106052e-06, - "loss": 1.6472, - "step": 117 - }, - { - "epoch": 0.11486048020765736, - "grad_norm": 0.51171875, - "learning_rate": 4.984757513215063e-06, - "loss": 1.6116, - "step": 118 - }, - { - "epoch": 0.11583387410772227, - "grad_norm": 0.5078125, - "learning_rate": 4.984474232020296e-06, - "loss": 1.5963, - "step": 119 - }, - { - "epoch": 0.11680726800778715, - "grad_norm": 0.515625, - "learning_rate": 4.984188350818204e-06, - "loss": 1.6276, - "step": 120 - }, - { - "epoch": 0.11778066190785204, - "grad_norm": 0.5078125, - "learning_rate": 4.983899869907963e-06, - "loss": 1.6063, - "step": 121 - }, - { - "epoch": 0.11875405580791694, - "grad_norm": 0.5078125, - "learning_rate": 4.983608789591468e-06, - "loss": 1.6289, - "step": 122 - }, - { - "epoch": 0.11972744970798183, - "grad_norm": 0.515625, - "learning_rate": 4.983315110173337e-06, - "loss": 1.587, - "step": 123 - }, - { - "epoch": 0.12070084360804673, - "grad_norm": 0.5, - "learning_rate": 4.9830188319609045e-06, - "loss": 1.629, - "step": 124 - }, - { - "epoch": 0.12167423750811161, - "grad_norm": 0.494140625, - "learning_rate": 4.982719955264227e-06, - "loss": 1.6104, - "step": 125 - }, - { - "epoch": 0.12264763140817651, - "grad_norm": 0.49609375, - "learning_rate": 4.9824184803960794e-06, - "loss": 1.571, - "step": 126 - }, - { - "epoch": 0.1236210253082414, - "grad_norm": 0.52734375, - "learning_rate": 4.982114407671955e-06, - "loss": 1.6102, - "step": 127 - }, - { - "epoch": 0.1245944192083063, - "grad_norm": 0.51171875, - "learning_rate": 4.981807737410068e-06, - "loss": 1.6285, - "step": 128 - }, - { - "epoch": 0.1255678131083712, - "grad_norm": 0.51171875, - "learning_rate": 4.981498469931348e-06, - "loss": 1.6085, - "step": 129 - }, - { - "epoch": 0.12654120700843607, - "grad_norm": 0.494140625, - "learning_rate": 4.981186605559445e-06, - "loss": 1.6036, - "step": 130 - }, - { - "epoch": 0.12751460090850097, - "grad_norm": 0.494140625, - "learning_rate": 4.980872144620726e-06, - "loss": 1.603, - "step": 131 - }, - { - "epoch": 0.12848799480856588, - "grad_norm": 0.5, - "learning_rate": 4.9805550874442735e-06, - "loss": 1.6262, - "step": 132 - }, - { - "epoch": 0.12946138870863075, - "grad_norm": 0.51171875, - "learning_rate": 4.9802354343618895e-06, - "loss": 1.6447, - "step": 133 - }, - { - "epoch": 0.13043478260869565, - "grad_norm": 0.5078125, - "learning_rate": 4.979913185708093e-06, - "loss": 1.5935, - "step": 134 - }, - { - "epoch": 0.13140817650876055, - "grad_norm": 0.498046875, - "learning_rate": 4.979588341820114e-06, - "loss": 1.6112, - "step": 135 - }, - { - "epoch": 0.13238157040882545, - "grad_norm": 0.5078125, - "learning_rate": 4.979260903037906e-06, - "loss": 1.6473, - "step": 136 - }, - { - "epoch": 0.13335496430889032, - "grad_norm": 0.50390625, - "learning_rate": 4.9789308697041325e-06, - "loss": 1.633, - "step": 137 - }, - { - "epoch": 0.13432835820895522, - "grad_norm": 0.5, - "learning_rate": 4.9785982421641756e-06, - "loss": 1.607, - "step": 138 - }, - { - "epoch": 0.13530175210902012, - "grad_norm": 0.498046875, - "learning_rate": 4.978263020766129e-06, - "loss": 1.616, - "step": 139 - }, - { - "epoch": 0.136275146009085, - "grad_norm": 0.50390625, - "learning_rate": 4.977925205860803e-06, - "loss": 1.598, - "step": 140 - }, - { - "epoch": 0.1372485399091499, - "grad_norm": 0.5078125, - "learning_rate": 4.977584797801722e-06, - "loss": 1.6183, - "step": 141 - }, - { - "epoch": 0.1382219338092148, - "grad_norm": 0.50390625, - "learning_rate": 4.977241796945123e-06, - "loss": 1.5891, - "step": 142 - }, - { - "epoch": 0.1391953277092797, - "grad_norm": 0.50390625, - "learning_rate": 4.976896203649958e-06, - "loss": 1.6197, - "step": 143 - }, - { - "epoch": 0.14016872160934457, - "grad_norm": 0.498046875, - "learning_rate": 4.976548018277891e-06, - "loss": 1.607, - "step": 144 - }, - { - "epoch": 0.14114211550940947, - "grad_norm": 1.9140625, - "learning_rate": 4.976197241193298e-06, - "loss": 1.6206, - "step": 145 - }, - { - "epoch": 0.14211550940947437, - "grad_norm": 0.484375, - "learning_rate": 4.975843872763269e-06, - "loss": 1.5968, - "step": 146 - }, - { - "epoch": 0.14308890330953927, - "grad_norm": 0.48046875, - "learning_rate": 4.975487913357603e-06, - "loss": 1.5844, - "step": 147 - }, - { - "epoch": 0.14406229720960415, - "grad_norm": 0.51171875, - "learning_rate": 4.975129363348814e-06, - "loss": 1.5843, - "step": 148 - }, - { - "epoch": 0.14503569110966905, - "grad_norm": 0.53515625, - "learning_rate": 4.974768223112125e-06, - "loss": 1.6291, - "step": 149 - }, - { - "epoch": 0.14600908500973395, - "grad_norm": 0.4921875, - "learning_rate": 4.974404493025469e-06, - "loss": 1.5926, - "step": 150 - }, - { - "epoch": 0.14698247890979882, - "grad_norm": 0.498046875, - "learning_rate": 4.974038173469491e-06, - "loss": 1.5854, - "step": 151 - }, - { - "epoch": 0.14795587280986372, - "grad_norm": 0.5, - "learning_rate": 4.973669264827545e-06, - "loss": 1.6067, - "step": 152 - }, - { - "epoch": 0.14892926670992862, - "grad_norm": 0.486328125, - "learning_rate": 4.973297767485695e-06, - "loss": 1.5998, - "step": 153 - }, - { - "epoch": 0.14990266060999352, - "grad_norm": 0.490234375, - "learning_rate": 4.972923681832714e-06, - "loss": 1.5724, - "step": 154 - }, - { - "epoch": 0.1508760545100584, - "grad_norm": 0.490234375, - "learning_rate": 4.972547008260083e-06, - "loss": 1.6057, - "step": 155 - }, - { - "epoch": 0.1518494484101233, - "grad_norm": 0.4921875, - "learning_rate": 4.97216774716199e-06, - "loss": 1.5807, - "step": 156 - }, - { - "epoch": 0.1528228423101882, - "grad_norm": 0.48828125, - "learning_rate": 4.971785898935335e-06, - "loss": 1.5693, - "step": 157 - }, - { - "epoch": 0.1537962362102531, - "grad_norm": 0.49609375, - "learning_rate": 4.971401463979722e-06, - "loss": 1.5948, - "step": 158 - }, - { - "epoch": 0.15476963011031797, - "grad_norm": 0.4921875, - "learning_rate": 4.971014442697463e-06, - "loss": 1.5749, - "step": 159 - }, - { - "epoch": 0.15574302401038287, - "grad_norm": 0.48046875, - "learning_rate": 4.970624835493576e-06, - "loss": 1.5615, - "step": 160 - }, - { - "epoch": 0.15671641791044777, - "grad_norm": 0.48046875, - "learning_rate": 4.970232642775786e-06, - "loss": 1.5725, - "step": 161 - }, - { - "epoch": 0.15768981181051264, - "grad_norm": 0.490234375, - "learning_rate": 4.969837864954524e-06, - "loss": 1.5923, - "step": 162 - }, - { - "epoch": 0.15866320571057754, - "grad_norm": 0.48046875, - "learning_rate": 4.969440502442926e-06, - "loss": 1.5681, - "step": 163 - }, - { - "epoch": 0.15963659961064244, - "grad_norm": 0.486328125, - "learning_rate": 4.969040555656831e-06, - "loss": 1.5895, - "step": 164 - }, - { - "epoch": 0.16060999351070734, - "grad_norm": 0.486328125, - "learning_rate": 4.968638025014786e-06, - "loss": 1.5866, - "step": 165 - }, - { - "epoch": 0.16158338741077222, - "grad_norm": 0.490234375, - "learning_rate": 4.96823291093804e-06, - "loss": 1.566, - "step": 166 - }, - { - "epoch": 0.16255678131083712, - "grad_norm": 0.478515625, - "learning_rate": 4.967825213850545e-06, - "loss": 1.5402, - "step": 167 - }, - { - "epoch": 0.16353017521090202, - "grad_norm": 0.5703125, - "learning_rate": 4.9674149341789554e-06, - "loss": 1.5918, - "step": 168 - }, - { - "epoch": 0.16450356911096692, - "grad_norm": 0.486328125, - "learning_rate": 4.9670020723526325e-06, - "loss": 1.5902, - "step": 169 - }, - { - "epoch": 0.1654769630110318, - "grad_norm": 0.48828125, - "learning_rate": 4.9665866288036354e-06, - "loss": 1.5843, - "step": 170 - }, - { - "epoch": 0.1664503569110967, - "grad_norm": 0.484375, - "learning_rate": 4.966168603966727e-06, - "loss": 1.5759, - "step": 171 - }, - { - "epoch": 0.1674237508111616, - "grad_norm": 0.48046875, - "learning_rate": 4.965747998279371e-06, - "loss": 1.5591, - "step": 172 - }, - { - "epoch": 0.16839714471122647, - "grad_norm": 0.4921875, - "learning_rate": 4.965324812181733e-06, - "loss": 1.5646, - "step": 173 - }, - { - "epoch": 0.16937053861129137, - "grad_norm": 0.4765625, - "learning_rate": 4.964899046116677e-06, - "loss": 1.5582, - "step": 174 - }, - { - "epoch": 0.17034393251135627, - "grad_norm": 0.484375, - "learning_rate": 4.964470700529769e-06, - "loss": 1.579, - "step": 175 - }, - { - "epoch": 0.17131732641142117, - "grad_norm": 0.4921875, - "learning_rate": 4.964039775869271e-06, - "loss": 1.5866, - "step": 176 - }, - { - "epoch": 0.17229072031148604, - "grad_norm": 0.470703125, - "learning_rate": 4.96360627258615e-06, - "loss": 1.5627, - "step": 177 - }, - { - "epoch": 0.17326411421155094, - "grad_norm": 0.48046875, - "learning_rate": 4.963170191134067e-06, - "loss": 1.5562, - "step": 178 - }, - { - "epoch": 0.17423750811161584, - "grad_norm": 0.474609375, - "learning_rate": 4.96273153196938e-06, - "loss": 1.5689, - "step": 179 - }, - { - "epoch": 0.1752109020116807, - "grad_norm": 0.47265625, - "learning_rate": 4.96229029555115e-06, - "loss": 1.5693, - "step": 180 - }, - { - "epoch": 0.17618429591174561, - "grad_norm": 0.47265625, - "learning_rate": 4.9618464823411285e-06, - "loss": 1.576, - "step": 181 - }, - { - "epoch": 0.17715768981181051, - "grad_norm": 0.47265625, - "learning_rate": 4.9614000928037694e-06, - "loss": 1.539, - "step": 182 - }, - { - "epoch": 0.17813108371187542, - "grad_norm": 0.484375, - "learning_rate": 4.96095112740622e-06, - "loss": 1.58, - "step": 183 - }, - { - "epoch": 0.1791044776119403, - "grad_norm": 0.4765625, - "learning_rate": 4.960499586618322e-06, - "loss": 1.5663, - "step": 184 - }, - { - "epoch": 0.1800778715120052, - "grad_norm": 0.47265625, - "learning_rate": 4.960045470912615e-06, - "loss": 1.5625, - "step": 185 - }, - { - "epoch": 0.1810512654120701, - "grad_norm": 0.47265625, - "learning_rate": 4.959588780764333e-06, - "loss": 1.5449, - "step": 186 - }, - { - "epoch": 0.182024659312135, - "grad_norm": 0.46484375, - "learning_rate": 4.9591295166514e-06, - "loss": 1.5538, - "step": 187 - }, - { - "epoch": 0.18299805321219986, - "grad_norm": 0.4765625, - "learning_rate": 4.9586676790544395e-06, - "loss": 1.5541, - "step": 188 - }, - { - "epoch": 0.18397144711226476, - "grad_norm": 0.4765625, - "learning_rate": 4.958203268456765e-06, - "loss": 1.5581, - "step": 189 - }, - { - "epoch": 0.18494484101232966, - "grad_norm": 0.47265625, - "learning_rate": 4.957736285344383e-06, - "loss": 1.5518, - "step": 190 - }, - { - "epoch": 0.18591823491239454, - "grad_norm": 0.470703125, - "learning_rate": 4.957266730205991e-06, - "loss": 1.5802, - "step": 191 - }, - { - "epoch": 0.18689162881245944, - "grad_norm": 0.486328125, - "learning_rate": 4.956794603532981e-06, - "loss": 1.5601, - "step": 192 - }, - { - "epoch": 0.18786502271252434, - "grad_norm": 0.45703125, - "learning_rate": 4.956319905819433e-06, - "loss": 1.5332, - "step": 193 - }, - { - "epoch": 0.18883841661258924, - "grad_norm": 0.4765625, - "learning_rate": 4.955842637562121e-06, - "loss": 1.5651, - "step": 194 - }, - { - "epoch": 0.1898118105126541, - "grad_norm": 0.47265625, - "learning_rate": 4.955362799260507e-06, - "loss": 1.564, - "step": 195 - }, - { - "epoch": 0.190785204412719, - "grad_norm": 0.46484375, - "learning_rate": 4.954880391416742e-06, - "loss": 1.5524, - "step": 196 - }, - { - "epoch": 0.1917585983127839, - "grad_norm": 0.470703125, - "learning_rate": 4.954395414535666e-06, - "loss": 1.5427, - "step": 197 - }, - { - "epoch": 0.1927319922128488, - "grad_norm": 0.47265625, - "learning_rate": 4.95390786912481e-06, - "loss": 1.5694, - "step": 198 - }, - { - "epoch": 0.19370538611291369, - "grad_norm": 0.470703125, - "learning_rate": 4.95341775569439e-06, - "loss": 1.5655, - "step": 199 - }, - { - "epoch": 0.1946787800129786, - "grad_norm": 0.45703125, - "learning_rate": 4.952925074757311e-06, - "loss": 1.5213, - "step": 200 - }, - { - "epoch": 0.1956521739130435, - "grad_norm": 0.46875, - "learning_rate": 4.9524298268291646e-06, - "loss": 1.552, - "step": 201 - }, - { - "epoch": 0.19662556781310836, - "grad_norm": 0.478515625, - "learning_rate": 4.951932012428229e-06, - "loss": 1.5534, - "step": 202 - }, - { - "epoch": 0.19759896171317326, - "grad_norm": 0.470703125, - "learning_rate": 4.951431632075468e-06, - "loss": 1.5501, - "step": 203 - }, - { - "epoch": 0.19857235561323816, - "grad_norm": 0.458984375, - "learning_rate": 4.950928686294531e-06, - "loss": 1.5389, - "step": 204 - }, - { - "epoch": 0.19954574951330306, - "grad_norm": 0.4609375, - "learning_rate": 4.950423175611751e-06, - "loss": 1.561, - "step": 205 - }, - { - "epoch": 0.20051914341336793, - "grad_norm": 0.46875, - "learning_rate": 4.949915100556146e-06, - "loss": 1.5662, - "step": 206 - }, - { - "epoch": 0.20149253731343283, - "grad_norm": 0.46875, - "learning_rate": 4.949404461659417e-06, - "loss": 1.5414, - "step": 207 - }, - { - "epoch": 0.20246593121349774, - "grad_norm": 0.46484375, - "learning_rate": 4.948891259455951e-06, - "loss": 1.5569, - "step": 208 - }, - { - "epoch": 0.2034393251135626, - "grad_norm": 0.458984375, - "learning_rate": 4.948375494482813e-06, - "loss": 1.5561, - "step": 209 - }, - { - "epoch": 0.2044127190136275, - "grad_norm": 0.46875, - "learning_rate": 4.947857167279753e-06, - "loss": 1.5661, - "step": 210 - }, - { - "epoch": 0.2053861129136924, - "grad_norm": 0.470703125, - "learning_rate": 4.947336278389201e-06, - "loss": 1.5579, - "step": 211 - }, - { - "epoch": 0.2063595068137573, - "grad_norm": 0.470703125, - "learning_rate": 4.946812828356268e-06, - "loss": 1.5325, - "step": 212 - }, - { - "epoch": 0.20733290071382218, - "grad_norm": 0.458984375, - "learning_rate": 4.946286817728746e-06, - "loss": 1.5208, - "step": 213 - }, - { - "epoch": 0.20830629461388708, - "grad_norm": 0.462890625, - "learning_rate": 4.945758247057107e-06, - "loss": 1.5509, - "step": 214 - }, - { - "epoch": 0.20927968851395198, - "grad_norm": 0.453125, - "learning_rate": 4.9452271168945e-06, - "loss": 1.5414, - "step": 215 - }, - { - "epoch": 0.21025308241401688, - "grad_norm": 0.45703125, - "learning_rate": 4.944693427796754e-06, - "loss": 1.5746, - "step": 216 - }, - { - "epoch": 0.21122647631408176, - "grad_norm": 0.47265625, - "learning_rate": 4.944157180322377e-06, - "loss": 1.5315, - "step": 217 - }, - { - "epoch": 0.21219987021414666, - "grad_norm": 0.462890625, - "learning_rate": 4.9436183750325505e-06, - "loss": 1.5468, - "step": 218 - }, - { - "epoch": 0.21317326411421156, - "grad_norm": 0.46875, - "learning_rate": 4.943077012491138e-06, - "loss": 1.5213, - "step": 219 - }, - { - "epoch": 0.21414665801427643, - "grad_norm": 0.46484375, - "learning_rate": 4.942533093264675e-06, - "loss": 1.547, - "step": 220 - }, - { - "epoch": 0.21512005191434133, - "grad_norm": 0.4765625, - "learning_rate": 4.941986617922374e-06, - "loss": 1.5467, - "step": 221 - }, - { - "epoch": 0.21609344581440623, - "grad_norm": 0.46484375, - "learning_rate": 4.941437587036123e-06, - "loss": 1.5447, - "step": 222 - }, - { - "epoch": 0.21706683971447113, - "grad_norm": 0.462890625, - "learning_rate": 4.940886001180485e-06, - "loss": 1.5026, - "step": 223 - }, - { - "epoch": 0.218040233614536, - "grad_norm": 0.4609375, - "learning_rate": 4.940331860932694e-06, - "loss": 1.5049, - "step": 224 - }, - { - "epoch": 0.2190136275146009, - "grad_norm": 0.466796875, - "learning_rate": 4.9397751668726595e-06, - "loss": 1.5445, - "step": 225 - }, - { - "epoch": 0.2199870214146658, - "grad_norm": 0.45703125, - "learning_rate": 4.939215919582963e-06, - "loss": 1.5168, - "step": 226 - }, - { - "epoch": 0.2209604153147307, - "grad_norm": 0.470703125, - "learning_rate": 4.938654119648858e-06, - "loss": 1.5318, - "step": 227 - }, - { - "epoch": 0.22193380921479558, - "grad_norm": 0.45703125, - "learning_rate": 4.938089767658269e-06, - "loss": 1.4995, - "step": 228 - }, - { - "epoch": 0.22290720311486048, - "grad_norm": 0.4609375, - "learning_rate": 4.937522864201792e-06, - "loss": 1.513, - "step": 229 - }, - { - "epoch": 0.22388059701492538, - "grad_norm": 0.462890625, - "learning_rate": 4.936953409872692e-06, - "loss": 1.549, - "step": 230 - }, - { - "epoch": 0.22485399091499025, - "grad_norm": 0.4609375, - "learning_rate": 4.936381405266904e-06, - "loss": 1.5253, - "step": 231 - }, - { - "epoch": 0.22582738481505515, - "grad_norm": 0.4609375, - "learning_rate": 4.935806850983034e-06, - "loss": 1.536, - "step": 232 - }, - { - "epoch": 0.22680077871512005, - "grad_norm": 0.458984375, - "learning_rate": 4.935229747622352e-06, - "loss": 1.5603, - "step": 233 - }, - { - "epoch": 0.22777417261518496, - "grad_norm": 0.482421875, - "learning_rate": 4.934650095788798e-06, - "loss": 1.538, - "step": 234 - }, - { - "epoch": 0.22874756651524983, - "grad_norm": 0.466796875, - "learning_rate": 4.934067896088979e-06, - "loss": 1.5108, - "step": 235 - }, - { - "epoch": 0.22972096041531473, - "grad_norm": 0.462890625, - "learning_rate": 4.9334831491321685e-06, - "loss": 1.52, - "step": 236 - }, - { - "epoch": 0.23069435431537963, - "grad_norm": 0.45703125, - "learning_rate": 4.932895855530307e-06, - "loss": 1.5124, - "step": 237 - }, - { - "epoch": 0.23166774821544453, - "grad_norm": 0.484375, - "learning_rate": 4.932306015897995e-06, - "loss": 1.5195, - "step": 238 - }, - { - "epoch": 0.2326411421155094, - "grad_norm": 0.466796875, - "learning_rate": 4.9317136308525025e-06, - "loss": 1.5301, - "step": 239 - }, - { - "epoch": 0.2336145360155743, - "grad_norm": 0.462890625, - "learning_rate": 4.931118701013763e-06, - "loss": 1.518, - "step": 240 - }, - { - "epoch": 0.2345879299156392, - "grad_norm": 0.46484375, - "learning_rate": 4.93052122700437e-06, - "loss": 1.5004, - "step": 241 - }, - { - "epoch": 0.23556132381570408, - "grad_norm": 0.462890625, - "learning_rate": 4.9299212094495816e-06, - "loss": 1.5155, - "step": 242 - }, - { - "epoch": 0.23653471771576898, - "grad_norm": 0.466796875, - "learning_rate": 4.929318648977318e-06, - "loss": 1.5499, - "step": 243 - }, - { - "epoch": 0.23750811161583388, - "grad_norm": 0.455078125, - "learning_rate": 4.92871354621816e-06, - "loss": 1.5221, - "step": 244 - }, - { - "epoch": 0.23848150551589878, - "grad_norm": 0.451171875, - "learning_rate": 4.9281059018053475e-06, - "loss": 1.535, - "step": 245 - }, - { - "epoch": 0.23945489941596365, - "grad_norm": 0.453125, - "learning_rate": 4.927495716374783e-06, - "loss": 1.512, - "step": 246 - }, - { - "epoch": 0.24042829331602855, - "grad_norm": 0.4609375, - "learning_rate": 4.9268829905650274e-06, - "loss": 1.5274, - "step": 247 - }, - { - "epoch": 0.24140168721609345, - "grad_norm": 0.451171875, - "learning_rate": 4.926267725017297e-06, - "loss": 1.5027, - "step": 248 - }, - { - "epoch": 0.24237508111615833, - "grad_norm": 0.462890625, - "learning_rate": 4.925649920375471e-06, - "loss": 1.5368, - "step": 249 - }, - { - "epoch": 0.24334847501622323, - "grad_norm": 0.455078125, - "learning_rate": 4.9250295772860815e-06, - "loss": 1.522, - "step": 250 - }, - { - "epoch": 0.24432186891628813, - "grad_norm": 0.45703125, - "learning_rate": 4.924406696398319e-06, - "loss": 1.5478, - "step": 251 - }, - { - "epoch": 0.24529526281635303, - "grad_norm": 0.453125, - "learning_rate": 4.9237812783640304e-06, - "loss": 1.5049, - "step": 252 - }, - { - "epoch": 0.2462686567164179, - "grad_norm": 0.458984375, - "learning_rate": 4.923153323837717e-06, - "loss": 1.5182, - "step": 253 - }, - { - "epoch": 0.2472420506164828, - "grad_norm": 0.458984375, - "learning_rate": 4.922522833476533e-06, - "loss": 1.5137, - "step": 254 - }, - { - "epoch": 0.2482154445165477, - "grad_norm": 0.45703125, - "learning_rate": 4.92188980794029e-06, - "loss": 1.5455, - "step": 255 - }, - { - "epoch": 0.2491888384166126, - "grad_norm": 0.443359375, - "learning_rate": 4.921254247891449e-06, - "loss": 1.501, - "step": 256 - }, - { - "epoch": 0.2501622323166775, - "grad_norm": 0.451171875, - "learning_rate": 4.920616153995126e-06, - "loss": 1.5195, - "step": 257 - }, - { - "epoch": 0.2511356262167424, - "grad_norm": 0.46484375, - "learning_rate": 4.9199755269190865e-06, - "loss": 1.5014, - "step": 258 - }, - { - "epoch": 0.25210902011680725, - "grad_norm": 0.447265625, - "learning_rate": 4.919332367333748e-06, - "loss": 1.4972, - "step": 259 - }, - { - "epoch": 0.25308241401687215, - "grad_norm": 0.447265625, - "learning_rate": 4.918686675912178e-06, - "loss": 1.5035, - "step": 260 - }, - { - "epoch": 0.25405580791693705, - "grad_norm": 0.453125, - "learning_rate": 4.918038453330095e-06, - "loss": 1.5082, - "step": 261 - }, - { - "epoch": 0.25502920181700195, - "grad_norm": 0.44921875, - "learning_rate": 4.917387700265866e-06, - "loss": 1.518, - "step": 262 - }, - { - "epoch": 0.25600259571706685, - "grad_norm": 0.4453125, - "learning_rate": 4.916734417400503e-06, - "loss": 1.4958, - "step": 263 - }, - { - "epoch": 0.25697598961713175, - "grad_norm": 0.4609375, - "learning_rate": 4.91607860541767e-06, - "loss": 1.5237, - "step": 264 - }, - { - "epoch": 0.25794938351719665, - "grad_norm": 0.453125, - "learning_rate": 4.915420265003674e-06, - "loss": 1.4898, - "step": 265 - }, - { - "epoch": 0.2589227774172615, - "grad_norm": 0.44921875, - "learning_rate": 4.9147593968474705e-06, - "loss": 1.4946, - "step": 266 - }, - { - "epoch": 0.2598961713173264, - "grad_norm": 0.4609375, - "learning_rate": 4.914096001640659e-06, - "loss": 1.4958, - "step": 267 - }, - { - "epoch": 0.2608695652173913, - "grad_norm": 0.453125, - "learning_rate": 4.913430080077483e-06, - "loss": 1.5221, - "step": 268 - }, - { - "epoch": 0.2618429591174562, - "grad_norm": 0.447265625, - "learning_rate": 4.912761632854834e-06, - "loss": 1.4937, - "step": 269 - }, - { - "epoch": 0.2628163530175211, - "grad_norm": 0.453125, - "learning_rate": 4.91209066067224e-06, - "loss": 1.5216, - "step": 270 - }, - { - "epoch": 0.263789746917586, - "grad_norm": 0.462890625, - "learning_rate": 4.911417164231875e-06, - "loss": 1.5069, - "step": 271 - }, - { - "epoch": 0.2647631408176509, - "grad_norm": 0.466796875, - "learning_rate": 4.910741144238556e-06, - "loss": 1.5145, - "step": 272 - }, - { - "epoch": 0.26573653471771574, - "grad_norm": 0.45703125, - "learning_rate": 4.910062601399739e-06, - "loss": 1.5049, - "step": 273 - }, - { - "epoch": 0.26670992861778064, - "grad_norm": 0.46484375, - "learning_rate": 4.9093815364255204e-06, - "loss": 1.5153, - "step": 274 - }, - { - "epoch": 0.26768332251784555, - "grad_norm": 0.4609375, - "learning_rate": 4.9086979500286345e-06, - "loss": 1.5106, - "step": 275 - }, - { - "epoch": 0.26865671641791045, - "grad_norm": 0.478515625, - "learning_rate": 4.908011842924458e-06, - "loss": 1.5176, - "step": 276 - }, - { - "epoch": 0.26963011031797535, - "grad_norm": 0.45703125, - "learning_rate": 4.9073232158310025e-06, - "loss": 1.4883, - "step": 277 - }, - { - "epoch": 0.27060350421804025, - "grad_norm": 0.466796875, - "learning_rate": 4.906632069468917e-06, - "loss": 1.5318, - "step": 278 - }, - { - "epoch": 0.27157689811810515, - "grad_norm": 0.48046875, - "learning_rate": 4.905938404561489e-06, - "loss": 1.518, - "step": 279 - }, - { - "epoch": 0.27255029201817, - "grad_norm": 0.458984375, - "learning_rate": 4.905242221834638e-06, - "loss": 1.4833, - "step": 280 - }, - { - "epoch": 0.2735236859182349, - "grad_norm": 0.453125, - "learning_rate": 4.904543522016923e-06, - "loss": 1.522, - "step": 281 - }, - { - "epoch": 0.2744970798182998, - "grad_norm": 0.447265625, - "learning_rate": 4.903842305839534e-06, - "loss": 1.4828, - "step": 282 - }, - { - "epoch": 0.2754704737183647, - "grad_norm": 0.46875, - "learning_rate": 4.903138574036295e-06, - "loss": 1.5441, - "step": 283 - }, - { - "epoch": 0.2764438676184296, - "grad_norm": 0.47265625, - "learning_rate": 4.902432327343662e-06, - "loss": 1.4891, - "step": 284 - }, - { - "epoch": 0.2774172615184945, - "grad_norm": 0.470703125, - "learning_rate": 4.901723566500725e-06, - "loss": 1.4813, - "step": 285 - }, - { - "epoch": 0.2783906554185594, - "grad_norm": 0.453125, - "learning_rate": 4.901012292249203e-06, - "loss": 1.4753, - "step": 286 - }, - { - "epoch": 0.2793640493186243, - "grad_norm": 0.4453125, - "learning_rate": 4.900298505333446e-06, - "loss": 1.4873, - "step": 287 - }, - { - "epoch": 0.28033744321868914, - "grad_norm": 0.455078125, - "learning_rate": 4.899582206500433e-06, - "loss": 1.4863, - "step": 288 - }, - { - "epoch": 0.28131083711875404, - "grad_norm": 0.46875, - "learning_rate": 4.898863396499772e-06, - "loss": 1.5016, - "step": 289 - }, - { - "epoch": 0.28228423101881894, - "grad_norm": 0.46875, - "learning_rate": 4.898142076083701e-06, - "loss": 1.4733, - "step": 290 - }, - { - "epoch": 0.28325762491888384, - "grad_norm": 0.4609375, - "learning_rate": 4.8974182460070814e-06, - "loss": 1.5243, - "step": 291 - }, - { - "epoch": 0.28423101881894874, - "grad_norm": 0.453125, - "learning_rate": 4.896691907027404e-06, - "loss": 1.489, - "step": 292 - }, - { - "epoch": 0.28520441271901364, - "grad_norm": 0.443359375, - "learning_rate": 4.895963059904782e-06, - "loss": 1.4813, - "step": 293 - }, - { - "epoch": 0.28617780661907855, - "grad_norm": 0.458984375, - "learning_rate": 4.895231705401958e-06, - "loss": 1.4781, - "step": 294 - }, - { - "epoch": 0.2871512005191434, - "grad_norm": 0.474609375, - "learning_rate": 4.8944978442842944e-06, - "loss": 1.4688, - "step": 295 - }, - { - "epoch": 0.2881245944192083, - "grad_norm": 0.46484375, - "learning_rate": 4.893761477319779e-06, - "loss": 1.4892, - "step": 296 - }, - { - "epoch": 0.2890979883192732, - "grad_norm": 0.44921875, - "learning_rate": 4.8930226052790204e-06, - "loss": 1.5027, - "step": 297 - }, - { - "epoch": 0.2900713822193381, - "grad_norm": 0.45703125, - "learning_rate": 4.892281228935252e-06, - "loss": 1.502, - "step": 298 - }, - { - "epoch": 0.291044776119403, - "grad_norm": 0.44140625, - "learning_rate": 4.891537349064322e-06, - "loss": 1.4795, - "step": 299 - }, - { - "epoch": 0.2920181700194679, - "grad_norm": 0.45703125, - "learning_rate": 4.890790966444705e-06, - "loss": 1.4875, - "step": 300 - }, - { - "epoch": 0.2929915639195328, - "grad_norm": 0.455078125, - "learning_rate": 4.8900420818574915e-06, - "loss": 1.4629, - "step": 301 - }, - { - "epoch": 0.29396495781959764, - "grad_norm": 0.4375, - "learning_rate": 4.889290696086391e-06, - "loss": 1.4935, - "step": 302 - }, - { - "epoch": 0.29493835171966254, - "grad_norm": 0.44921875, - "learning_rate": 4.888536809917728e-06, - "loss": 1.47, - "step": 303 - }, - { - "epoch": 0.29591174561972744, - "grad_norm": 0.44140625, - "learning_rate": 4.887780424140448e-06, - "loss": 1.4882, - "step": 304 - }, - { - "epoch": 0.29688513951979234, - "grad_norm": 0.435546875, - "learning_rate": 4.88702153954611e-06, - "loss": 1.4968, - "step": 305 - }, - { - "epoch": 0.29785853341985724, - "grad_norm": 0.455078125, - "learning_rate": 4.8862601569288885e-06, - "loss": 1.4711, - "step": 306 - }, - { - "epoch": 0.29883192731992214, - "grad_norm": 0.44921875, - "learning_rate": 4.885496277085571e-06, - "loss": 1.4824, - "step": 307 - }, - { - "epoch": 0.29980532121998704, - "grad_norm": 0.439453125, - "learning_rate": 4.884729900815559e-06, - "loss": 1.4758, - "step": 308 - }, - { - "epoch": 0.3007787151200519, - "grad_norm": 0.45703125, - "learning_rate": 4.8839610289208695e-06, - "loss": 1.4932, - "step": 309 - }, - { - "epoch": 0.3017521090201168, - "grad_norm": 0.451171875, - "learning_rate": 4.8831896622061256e-06, - "loss": 1.4829, - "step": 310 - }, - { - "epoch": 0.3027255029201817, - "grad_norm": 0.45703125, - "learning_rate": 4.882415801478565e-06, - "loss": 1.4996, - "step": 311 - }, - { - "epoch": 0.3036988968202466, - "grad_norm": 0.46875, - "learning_rate": 4.881639447548034e-06, - "loss": 1.4822, - "step": 312 - }, - { - "epoch": 0.3046722907203115, - "grad_norm": 0.455078125, - "learning_rate": 4.88086060122699e-06, - "loss": 1.485, - "step": 313 - }, - { - "epoch": 0.3056456846203764, - "grad_norm": 0.44921875, - "learning_rate": 4.880079263330497e-06, - "loss": 1.4654, - "step": 314 - }, - { - "epoch": 0.3066190785204413, - "grad_norm": 0.44140625, - "learning_rate": 4.8792954346762256e-06, - "loss": 1.4699, - "step": 315 - }, - { - "epoch": 0.3075924724205062, - "grad_norm": 0.443359375, - "learning_rate": 4.878509116084455e-06, - "loss": 1.4921, - "step": 316 - }, - { - "epoch": 0.30856586632057104, - "grad_norm": 0.447265625, - "learning_rate": 4.8777203083780675e-06, - "loss": 1.4881, - "step": 317 - }, - { - "epoch": 0.30953926022063594, - "grad_norm": 0.453125, - "learning_rate": 4.876929012382555e-06, - "loss": 1.4741, - "step": 318 - }, - { - "epoch": 0.31051265412070084, - "grad_norm": 0.44921875, - "learning_rate": 4.876135228926008e-06, - "loss": 1.475, - "step": 319 - }, - { - "epoch": 0.31148604802076574, - "grad_norm": 0.443359375, - "learning_rate": 4.875338958839123e-06, - "loss": 1.5087, - "step": 320 - }, - { - "epoch": 0.31245944192083064, - "grad_norm": 0.45703125, - "learning_rate": 4.8745402029551995e-06, - "loss": 1.4926, - "step": 321 - }, - { - "epoch": 0.31343283582089554, - "grad_norm": 0.47265625, - "learning_rate": 4.873738962110135e-06, - "loss": 1.4801, - "step": 322 - }, - { - "epoch": 0.31440622972096044, - "grad_norm": 0.4765625, - "learning_rate": 4.872935237142431e-06, - "loss": 1.4819, - "step": 323 - }, - { - "epoch": 0.3153796236210253, - "grad_norm": 0.4453125, - "learning_rate": 4.872129028893186e-06, - "loss": 1.4921, - "step": 324 - }, - { - "epoch": 0.3163530175210902, - "grad_norm": 0.4453125, - "learning_rate": 4.871320338206101e-06, - "loss": 1.4897, - "step": 325 - }, - { - "epoch": 0.3173264114211551, - "grad_norm": 0.451171875, - "learning_rate": 4.870509165927471e-06, - "loss": 1.4856, - "step": 326 - }, - { - "epoch": 0.31829980532122, - "grad_norm": 0.45703125, - "learning_rate": 4.86969551290619e-06, - "loss": 1.4761, - "step": 327 - }, - { - "epoch": 0.3192731992212849, - "grad_norm": 0.45703125, - "learning_rate": 4.868879379993746e-06, - "loss": 1.5015, - "step": 328 - }, - { - "epoch": 0.3202465931213498, - "grad_norm": 0.447265625, - "learning_rate": 4.868060768044225e-06, - "loss": 1.4904, - "step": 329 - }, - { - "epoch": 0.3212199870214147, - "grad_norm": 0.482421875, - "learning_rate": 4.867239677914306e-06, - "loss": 1.4805, - "step": 330 - }, - { - "epoch": 0.32219338092147953, - "grad_norm": 0.53125, - "learning_rate": 4.866416110463261e-06, - "loss": 1.4899, - "step": 331 - }, - { - "epoch": 0.32316677482154443, - "grad_norm": 0.4765625, - "learning_rate": 4.8655900665529565e-06, - "loss": 1.4891, - "step": 332 - }, - { - "epoch": 0.32414016872160933, - "grad_norm": 0.43359375, - "learning_rate": 4.864761547047847e-06, - "loss": 1.4681, - "step": 333 - }, - { - "epoch": 0.32511356262167423, - "grad_norm": 0.482421875, - "learning_rate": 4.863930552814981e-06, - "loss": 1.493, - "step": 334 - }, - { - "epoch": 0.32608695652173914, - "grad_norm": 0.494140625, - "learning_rate": 4.863097084723996e-06, - "loss": 1.4591, - "step": 335 - }, - { - "epoch": 0.32706035042180404, - "grad_norm": 0.4765625, - "learning_rate": 4.862261143647117e-06, - "loss": 1.4805, - "step": 336 - }, - { - "epoch": 0.32803374432186894, - "grad_norm": 0.44921875, - "learning_rate": 4.861422730459159e-06, - "loss": 1.4848, - "step": 337 - }, - { - "epoch": 0.32900713822193384, - "grad_norm": 0.4609375, - "learning_rate": 4.860581846037522e-06, - "loss": 1.4645, - "step": 338 - }, - { - "epoch": 0.3299805321219987, - "grad_norm": 0.466796875, - "learning_rate": 4.859738491262195e-06, - "loss": 1.4922, - "step": 339 - }, - { - "epoch": 0.3309539260220636, - "grad_norm": 0.48046875, - "learning_rate": 4.858892667015749e-06, - "loss": 1.4678, - "step": 340 - }, - { - "epoch": 0.3319273199221285, - "grad_norm": 0.470703125, - "learning_rate": 4.8580443741833404e-06, - "loss": 1.4982, - "step": 341 - }, - { - "epoch": 0.3329007138221934, - "grad_norm": 0.44921875, - "learning_rate": 4.857193613652711e-06, - "loss": 1.4497, - "step": 342 - }, - { - "epoch": 0.3338741077222583, - "grad_norm": 0.4609375, - "learning_rate": 4.8563403863141825e-06, - "loss": 1.4563, - "step": 343 - }, - { - "epoch": 0.3348475016223232, - "grad_norm": 0.458984375, - "learning_rate": 4.855484693060658e-06, - "loss": 1.4787, - "step": 344 - }, - { - "epoch": 0.3358208955223881, - "grad_norm": 0.455078125, - "learning_rate": 4.854626534787625e-06, - "loss": 1.493, - "step": 345 - }, - { - "epoch": 0.33679428942245293, - "grad_norm": 0.4453125, - "learning_rate": 4.853765912393146e-06, - "loss": 1.4569, - "step": 346 - }, - { - "epoch": 0.33776768332251783, - "grad_norm": 0.443359375, - "learning_rate": 4.852902826777863e-06, - "loss": 1.4463, - "step": 347 - }, - { - "epoch": 0.33874107722258273, - "grad_norm": 0.458984375, - "learning_rate": 4.852037278845e-06, - "loss": 1.4696, - "step": 348 - }, - { - "epoch": 0.33971447112264763, - "grad_norm": 0.44140625, - "learning_rate": 4.851169269500351e-06, - "loss": 1.4719, - "step": 349 - }, - { - "epoch": 0.34068786502271253, - "grad_norm": 0.447265625, - "learning_rate": 4.850298799652293e-06, - "loss": 1.4561, - "step": 350 - }, - { - "epoch": 0.34166125892277743, - "grad_norm": 0.458984375, - "learning_rate": 4.8494258702117715e-06, - "loss": 1.4814, - "step": 351 - }, - { - "epoch": 0.34263465282284233, - "grad_norm": 0.458984375, - "learning_rate": 4.8485504820923115e-06, - "loss": 1.4758, - "step": 352 - }, - { - "epoch": 0.3436080467229072, - "grad_norm": 0.4609375, - "learning_rate": 4.847672636210005e-06, - "loss": 1.4657, - "step": 353 - }, - { - "epoch": 0.3445814406229721, - "grad_norm": 0.44921875, - "learning_rate": 4.8467923334835245e-06, - "loss": 1.4647, - "step": 354 - }, - { - "epoch": 0.345554834523037, - "grad_norm": 0.455078125, - "learning_rate": 4.8459095748341045e-06, - "loss": 1.4916, - "step": 355 - }, - { - "epoch": 0.3465282284231019, - "grad_norm": 0.462890625, - "learning_rate": 4.845024361185555e-06, - "loss": 1.4676, - "step": 356 - }, - { - "epoch": 0.3475016223231668, - "grad_norm": 0.5390625, - "learning_rate": 4.8441366934642545e-06, - "loss": 1.4645, - "step": 357 - }, - { - "epoch": 0.3484750162232317, - "grad_norm": 0.439453125, - "learning_rate": 4.8432465725991475e-06, - "loss": 1.4519, - "step": 358 - }, - { - "epoch": 0.3494484101232966, - "grad_norm": 0.443359375, - "learning_rate": 4.842353999521749e-06, - "loss": 1.4563, - "step": 359 - }, - { - "epoch": 0.3504218040233614, - "grad_norm": 0.453125, - "learning_rate": 4.841458975166137e-06, - "loss": 1.4553, - "step": 360 - }, - { - "epoch": 0.35139519792342633, - "grad_norm": 0.44921875, - "learning_rate": 4.840561500468958e-06, - "loss": 1.4512, - "step": 361 - }, - { - "epoch": 0.35236859182349123, - "grad_norm": 0.4375, - "learning_rate": 4.839661576369419e-06, - "loss": 1.4644, - "step": 362 - }, - { - "epoch": 0.35334198572355613, - "grad_norm": 0.443359375, - "learning_rate": 4.838759203809295e-06, - "loss": 1.466, - "step": 363 - }, - { - "epoch": 0.35431537962362103, - "grad_norm": 0.46875, - "learning_rate": 4.837854383732918e-06, - "loss": 1.4882, - "step": 364 - }, - { - "epoch": 0.35528877352368593, - "grad_norm": 0.458984375, - "learning_rate": 4.836947117087186e-06, - "loss": 1.4511, - "step": 365 - }, - { - "epoch": 0.35626216742375083, - "grad_norm": 0.515625, - "learning_rate": 4.836037404821554e-06, - "loss": 1.4536, - "step": 366 - }, - { - "epoch": 0.35723556132381573, - "grad_norm": 0.453125, - "learning_rate": 4.83512524788804e-06, - "loss": 1.4477, - "step": 367 - }, - { - "epoch": 0.3582089552238806, - "grad_norm": 0.462890625, - "learning_rate": 4.834210647241215e-06, - "loss": 1.4668, - "step": 368 - }, - { - "epoch": 0.3591823491239455, - "grad_norm": 0.4921875, - "learning_rate": 4.8332936038382125e-06, - "loss": 1.5002, - "step": 369 - }, - { - "epoch": 0.3601557430240104, - "grad_norm": 0.46875, - "learning_rate": 4.8323741186387205e-06, - "loss": 1.4618, - "step": 370 - }, - { - "epoch": 0.3611291369240753, - "grad_norm": 0.44921875, - "learning_rate": 4.831452192604981e-06, - "loss": 1.4647, - "step": 371 - }, - { - "epoch": 0.3621025308241402, - "grad_norm": 0.451171875, - "learning_rate": 4.830527826701791e-06, - "loss": 1.4363, - "step": 372 - }, - { - "epoch": 0.3630759247242051, - "grad_norm": 0.47265625, - "learning_rate": 4.829601021896503e-06, - "loss": 1.4425, - "step": 373 - }, - { - "epoch": 0.36404931862427, - "grad_norm": 0.455078125, - "learning_rate": 4.828671779159019e-06, - "loss": 1.4738, - "step": 374 - }, - { - "epoch": 0.3650227125243348, - "grad_norm": 0.451171875, - "learning_rate": 4.827740099461793e-06, - "loss": 1.4419, - "step": 375 - }, - { - "epoch": 0.3659961064243997, - "grad_norm": 0.466796875, - "learning_rate": 4.826805983779831e-06, - "loss": 1.4411, - "step": 376 - }, - { - "epoch": 0.3669695003244646, - "grad_norm": 0.443359375, - "learning_rate": 4.825869433090686e-06, - "loss": 1.442, - "step": 377 - }, - { - "epoch": 0.3679428942245295, - "grad_norm": 0.4453125, - "learning_rate": 4.824930448374462e-06, - "loss": 1.4477, - "step": 378 - }, - { - "epoch": 0.3689162881245944, - "grad_norm": 0.44921875, - "learning_rate": 4.823989030613805e-06, - "loss": 1.4556, - "step": 379 - }, - { - "epoch": 0.3698896820246593, - "grad_norm": 0.46875, - "learning_rate": 4.823045180793914e-06, - "loss": 1.4586, - "step": 380 - }, - { - "epoch": 0.37086307592472423, - "grad_norm": 0.4453125, - "learning_rate": 4.822098899902527e-06, - "loss": 1.463, - "step": 381 - }, - { - "epoch": 0.3718364698247891, - "grad_norm": 0.4453125, - "learning_rate": 4.82115018892993e-06, - "loss": 1.4751, - "step": 382 - }, - { - "epoch": 0.372809863724854, - "grad_norm": 0.439453125, - "learning_rate": 4.8201990488689524e-06, - "loss": 1.4391, - "step": 383 - }, - { - "epoch": 0.3737832576249189, - "grad_norm": 0.455078125, - "learning_rate": 4.819245480714962e-06, - "loss": 1.4352, - "step": 384 - }, - { - "epoch": 0.3747566515249838, - "grad_norm": 0.4453125, - "learning_rate": 4.8182894854658715e-06, - "loss": 1.4346, - "step": 385 - }, - { - "epoch": 0.3757300454250487, - "grad_norm": 0.453125, - "learning_rate": 4.81733106412213e-06, - "loss": 1.4661, - "step": 386 - }, - { - "epoch": 0.3767034393251136, - "grad_norm": 0.43359375, - "learning_rate": 4.816370217686729e-06, - "loss": 1.4604, - "step": 387 - }, - { - "epoch": 0.3776768332251785, - "grad_norm": 0.419921875, - "learning_rate": 4.8154069471651956e-06, - "loss": 1.443, - "step": 388 - }, - { - "epoch": 0.3786502271252433, - "grad_norm": 0.4453125, - "learning_rate": 4.814441253565594e-06, - "loss": 1.4584, - "step": 389 - }, - { - "epoch": 0.3796236210253082, - "grad_norm": 0.431640625, - "learning_rate": 4.813473137898526e-06, - "loss": 1.4461, - "step": 390 - }, - { - "epoch": 0.3805970149253731, - "grad_norm": 0.443359375, - "learning_rate": 4.8125026011771244e-06, - "loss": 1.4514, - "step": 391 - }, - { - "epoch": 0.381570408825438, - "grad_norm": 0.435546875, - "learning_rate": 4.81152964441706e-06, - "loss": 1.4393, - "step": 392 - }, - { - "epoch": 0.3825438027255029, - "grad_norm": 0.435546875, - "learning_rate": 4.810554268636532e-06, - "loss": 1.4668, - "step": 393 - }, - { - "epoch": 0.3835171966255678, - "grad_norm": 0.421875, - "learning_rate": 4.8095764748562754e-06, - "loss": 1.4354, - "step": 394 - }, - { - "epoch": 0.3844905905256327, - "grad_norm": 0.43359375, - "learning_rate": 4.808596264099552e-06, - "loss": 1.4275, - "step": 395 - }, - { - "epoch": 0.3854639844256976, - "grad_norm": 0.4453125, - "learning_rate": 4.807613637392153e-06, - "loss": 1.4668, - "step": 396 - }, - { - "epoch": 0.38643737832576247, - "grad_norm": 0.447265625, - "learning_rate": 4.806628595762403e-06, - "loss": 1.4209, - "step": 397 - }, - { - "epoch": 0.38741077222582737, - "grad_norm": 0.435546875, - "learning_rate": 4.805641140241146e-06, - "loss": 1.4311, - "step": 398 - }, - { - "epoch": 0.38838416612589227, - "grad_norm": 0.4453125, - "learning_rate": 4.8046512718617585e-06, - "loss": 1.4645, - "step": 399 - }, - { - "epoch": 0.3893575600259572, - "grad_norm": 0.447265625, - "learning_rate": 4.80365899166014e-06, - "loss": 1.4332, - "step": 400 - }, - { - "epoch": 0.3903309539260221, - "grad_norm": 0.44140625, - "learning_rate": 4.802664300674712e-06, - "loss": 1.4599, - "step": 401 - }, - { - "epoch": 0.391304347826087, - "grad_norm": 0.451171875, - "learning_rate": 4.801667199946422e-06, - "loss": 1.45, - "step": 402 - }, - { - "epoch": 0.3922777417261519, - "grad_norm": 0.447265625, - "learning_rate": 4.800667690518737e-06, - "loss": 1.4397, - "step": 403 - }, - { - "epoch": 0.3932511356262167, - "grad_norm": 0.453125, - "learning_rate": 4.799665773437648e-06, - "loss": 1.4442, - "step": 404 - }, - { - "epoch": 0.3942245295262816, - "grad_norm": 0.439453125, - "learning_rate": 4.79866144975166e-06, - "loss": 1.4539, - "step": 405 - }, - { - "epoch": 0.3951979234263465, - "grad_norm": 0.470703125, - "learning_rate": 4.797654720511802e-06, - "loss": 1.4537, - "step": 406 - }, - { - "epoch": 0.3961713173264114, - "grad_norm": 0.458984375, - "learning_rate": 4.7966455867716165e-06, - "loss": 1.4368, - "step": 407 - }, - { - "epoch": 0.3971447112264763, - "grad_norm": 0.474609375, - "learning_rate": 4.795634049587165e-06, - "loss": 1.4435, - "step": 408 - }, - { - "epoch": 0.3981181051265412, - "grad_norm": 0.43359375, - "learning_rate": 4.794620110017025e-06, - "loss": 1.4547, - "step": 409 - }, - { - "epoch": 0.3990914990266061, - "grad_norm": 0.474609375, - "learning_rate": 4.793603769122283e-06, - "loss": 1.4532, - "step": 410 - }, - { - "epoch": 0.40006489292667097, - "grad_norm": 0.4453125, - "learning_rate": 4.792585027966544e-06, - "loss": 1.4506, - "step": 411 - }, - { - "epoch": 0.40103828682673587, - "grad_norm": 0.486328125, - "learning_rate": 4.791563887615921e-06, - "loss": 1.4642, - "step": 412 - }, - { - "epoch": 0.40201168072680077, - "grad_norm": 0.5078125, - "learning_rate": 4.790540349139041e-06, - "loss": 1.4293, - "step": 413 - }, - { - "epoch": 0.40298507462686567, - "grad_norm": 0.4375, - "learning_rate": 4.789514413607039e-06, - "loss": 1.4423, - "step": 414 - }, - { - "epoch": 0.40395846852693057, - "grad_norm": 0.4453125, - "learning_rate": 4.7884860820935574e-06, - "loss": 1.4504, - "step": 415 - }, - { - "epoch": 0.40493186242699547, - "grad_norm": 0.447265625, - "learning_rate": 4.787455355674748e-06, - "loss": 1.4428, - "step": 416 - }, - { - "epoch": 0.40590525632706037, - "grad_norm": 0.4765625, - "learning_rate": 4.786422235429269e-06, - "loss": 1.4195, - "step": 417 - }, - { - "epoch": 0.4068786502271252, - "grad_norm": 0.44921875, - "learning_rate": 4.785386722438281e-06, - "loss": 1.4364, - "step": 418 - }, - { - "epoch": 0.4078520441271901, - "grad_norm": 0.4375, - "learning_rate": 4.7843488177854516e-06, - "loss": 1.4277, - "step": 419 - }, - { - "epoch": 0.408825438027255, - "grad_norm": 0.44140625, - "learning_rate": 4.783308522556949e-06, - "loss": 1.452, - "step": 420 - }, - { - "epoch": 0.4097988319273199, - "grad_norm": 0.451171875, - "learning_rate": 4.782265837841446e-06, - "loss": 1.4367, - "step": 421 - }, - { - "epoch": 0.4107722258273848, - "grad_norm": 0.443359375, - "learning_rate": 4.781220764730113e-06, - "loss": 1.4328, - "step": 422 - }, - { - "epoch": 0.4117456197274497, - "grad_norm": 0.447265625, - "learning_rate": 4.780173304316622e-06, - "loss": 1.439, - "step": 423 - }, - { - "epoch": 0.4127190136275146, - "grad_norm": 0.447265625, - "learning_rate": 4.779123457697142e-06, - "loss": 1.4275, - "step": 424 - }, - { - "epoch": 0.4136924075275795, - "grad_norm": 0.470703125, - "learning_rate": 4.77807122597034e-06, - "loss": 1.3996, - "step": 425 - }, - { - "epoch": 0.41466580142764436, - "grad_norm": 0.462890625, - "learning_rate": 4.777016610237377e-06, - "loss": 1.4428, - "step": 426 - }, - { - "epoch": 0.41563919532770927, - "grad_norm": 0.49609375, - "learning_rate": 4.7759596116019124e-06, - "loss": 1.4351, - "step": 427 - }, - { - "epoch": 0.41661258922777417, - "grad_norm": 0.462890625, - "learning_rate": 4.774900231170096e-06, - "loss": 1.4532, - "step": 428 - }, - { - "epoch": 0.41758598312783907, - "grad_norm": 0.4453125, - "learning_rate": 4.773838470050574e-06, - "loss": 1.4287, - "step": 429 - }, - { - "epoch": 0.41855937702790397, - "grad_norm": 0.44921875, - "learning_rate": 4.772774329354479e-06, - "loss": 1.4431, - "step": 430 - }, - { - "epoch": 0.41953277092796887, - "grad_norm": 0.4609375, - "learning_rate": 4.771707810195437e-06, - "loss": 1.436, - "step": 431 - }, - { - "epoch": 0.42050616482803377, - "grad_norm": 0.490234375, - "learning_rate": 4.770638913689563e-06, - "loss": 1.4336, - "step": 432 - }, - { - "epoch": 0.4214795587280986, - "grad_norm": 0.4765625, - "learning_rate": 4.7695676409554595e-06, - "loss": 1.436, - "step": 433 - }, - { - "epoch": 0.4224529526281635, - "grad_norm": 0.458984375, - "learning_rate": 4.768493993114215e-06, - "loss": 1.4055, - "step": 434 - }, - { - "epoch": 0.4234263465282284, - "grad_norm": 0.4453125, - "learning_rate": 4.767417971289403e-06, - "loss": 1.4437, - "step": 435 - }, - { - "epoch": 0.4243997404282933, - "grad_norm": 0.4375, - "learning_rate": 4.7663395766070854e-06, - "loss": 1.4219, - "step": 436 - }, - { - "epoch": 0.4253731343283582, - "grad_norm": 0.447265625, - "learning_rate": 4.765258810195802e-06, - "loss": 1.4476, - "step": 437 - }, - { - "epoch": 0.4263465282284231, - "grad_norm": 0.453125, - "learning_rate": 4.764175673186579e-06, - "loss": 1.4434, - "step": 438 - }, - { - "epoch": 0.427319922128488, - "grad_norm": 0.431640625, - "learning_rate": 4.763090166712919e-06, - "loss": 1.4167, - "step": 439 - }, - { - "epoch": 0.42829331602855286, - "grad_norm": 0.4453125, - "learning_rate": 4.76200229191081e-06, - "loss": 1.4278, - "step": 440 - }, - { - "epoch": 0.42926670992861776, - "grad_norm": 0.443359375, - "learning_rate": 4.760912049918711e-06, - "loss": 1.4133, - "step": 441 - }, - { - "epoch": 0.43024010382868266, - "grad_norm": 0.44921875, - "learning_rate": 4.759819441877567e-06, - "loss": 1.4294, - "step": 442 - }, - { - "epoch": 0.43121349772874756, - "grad_norm": 0.443359375, - "learning_rate": 4.758724468930791e-06, - "loss": 1.4389, - "step": 443 - }, - { - "epoch": 0.43218689162881246, - "grad_norm": 0.427734375, - "learning_rate": 4.757627132224276e-06, - "loss": 1.4482, - "step": 444 - }, - { - "epoch": 0.43316028552887736, - "grad_norm": 0.42578125, - "learning_rate": 4.756527432906387e-06, - "loss": 1.4239, - "step": 445 - }, - { - "epoch": 0.43413367942894227, - "grad_norm": 0.462890625, - "learning_rate": 4.755425372127961e-06, - "loss": 1.4439, - "step": 446 - }, - { - "epoch": 0.43510707332900717, - "grad_norm": 0.451171875, - "learning_rate": 4.754320951042307e-06, - "loss": 1.4092, - "step": 447 - }, - { - "epoch": 0.436080467229072, - "grad_norm": 0.439453125, - "learning_rate": 4.753214170805205e-06, - "loss": 1.4312, - "step": 448 - }, - { - "epoch": 0.4370538611291369, - "grad_norm": 0.43359375, - "learning_rate": 4.752105032574902e-06, - "loss": 1.4296, - "step": 449 - }, - { - "epoch": 0.4380272550292018, - "grad_norm": 0.435546875, - "learning_rate": 4.7509935375121115e-06, - "loss": 1.4436, - "step": 450 - }, - { - "epoch": 0.4390006489292667, - "grad_norm": 0.46484375, - "learning_rate": 4.749879686780019e-06, - "loss": 1.4108, - "step": 451 - }, - { - "epoch": 0.4399740428293316, - "grad_norm": 0.466796875, - "learning_rate": 4.74876348154427e-06, - "loss": 1.4423, - "step": 452 - }, - { - "epoch": 0.4409474367293965, - "grad_norm": 0.4296875, - "learning_rate": 4.747644922972973e-06, - "loss": 1.4195, - "step": 453 - }, - { - "epoch": 0.4419208306294614, - "grad_norm": 0.43359375, - "learning_rate": 4.746524012236706e-06, - "loss": 1.4384, - "step": 454 - }, - { - "epoch": 0.44289422452952626, - "grad_norm": 0.43359375, - "learning_rate": 4.745400750508502e-06, - "loss": 1.4084, - "step": 455 - }, - { - "epoch": 0.44386761842959116, - "grad_norm": 0.4375, - "learning_rate": 4.7442751389638564e-06, - "loss": 1.4368, - "step": 456 - }, - { - "epoch": 0.44484101232965606, - "grad_norm": 0.447265625, - "learning_rate": 4.743147178780725e-06, - "loss": 1.4065, - "step": 457 - }, - { - "epoch": 0.44581440622972096, - "grad_norm": 0.4375, - "learning_rate": 4.7420168711395194e-06, - "loss": 1.4054, - "step": 458 - }, - { - "epoch": 0.44678780012978586, - "grad_norm": 0.421875, - "learning_rate": 4.7408842172231115e-06, - "loss": 1.3868, - "step": 459 - }, - { - "epoch": 0.44776119402985076, - "grad_norm": 0.43359375, - "learning_rate": 4.739749218216823e-06, - "loss": 1.4552, - "step": 460 - }, - { - "epoch": 0.44873458792991566, - "grad_norm": 0.451171875, - "learning_rate": 4.7386118753084325e-06, - "loss": 1.4349, - "step": 461 - }, - { - "epoch": 0.4497079818299805, - "grad_norm": 0.458984375, - "learning_rate": 4.737472189688173e-06, - "loss": 1.4277, - "step": 462 - }, - { - "epoch": 0.4506813757300454, - "grad_norm": 0.43359375, - "learning_rate": 4.736330162548729e-06, - "loss": 1.4151, - "step": 463 - }, - { - "epoch": 0.4516547696301103, - "grad_norm": 0.439453125, - "learning_rate": 4.735185795085231e-06, - "loss": 1.4178, - "step": 464 - }, - { - "epoch": 0.4526281635301752, - "grad_norm": 0.427734375, - "learning_rate": 4.734039088495265e-06, - "loss": 1.4401, - "step": 465 - }, - { - "epoch": 0.4536015574302401, - "grad_norm": 0.435546875, - "learning_rate": 4.73289004397886e-06, - "loss": 1.416, - "step": 466 - }, - { - "epoch": 0.454574951330305, - "grad_norm": 0.44140625, - "learning_rate": 4.731738662738494e-06, - "loss": 1.4224, - "step": 467 - }, - { - "epoch": 0.4555483452303699, - "grad_norm": 0.435546875, - "learning_rate": 4.73058494597909e-06, - "loss": 1.4286, - "step": 468 - }, - { - "epoch": 0.45652173913043476, - "grad_norm": 0.435546875, - "learning_rate": 4.729428894908013e-06, - "loss": 1.4023, - "step": 469 - }, - { - "epoch": 0.45749513303049966, - "grad_norm": 0.44921875, - "learning_rate": 4.728270510735076e-06, - "loss": 1.4043, - "step": 470 - }, - { - "epoch": 0.45846852693056456, - "grad_norm": 0.4296875, - "learning_rate": 4.727109794672528e-06, - "loss": 1.4357, - "step": 471 - }, - { - "epoch": 0.45944192083062946, - "grad_norm": 0.435546875, - "learning_rate": 4.725946747935062e-06, - "loss": 1.4129, - "step": 472 - }, - { - "epoch": 0.46041531473069436, - "grad_norm": 0.4609375, - "learning_rate": 4.724781371739807e-06, - "loss": 1.4292, - "step": 473 - }, - { - "epoch": 0.46138870863075926, - "grad_norm": 0.453125, - "learning_rate": 4.723613667306333e-06, - "loss": 1.4386, - "step": 474 - }, - { - "epoch": 0.46236210253082416, - "grad_norm": 0.4453125, - "learning_rate": 4.722443635856646e-06, - "loss": 1.4274, - "step": 475 - }, - { - "epoch": 0.46333549643088906, - "grad_norm": 0.421875, - "learning_rate": 4.721271278615185e-06, - "loss": 1.4315, - "step": 476 - }, - { - "epoch": 0.4643088903309539, - "grad_norm": 0.451171875, - "learning_rate": 4.720096596808824e-06, - "loss": 1.4202, - "step": 477 - }, - { - "epoch": 0.4652822842310188, - "grad_norm": 0.431640625, - "learning_rate": 4.718919591666871e-06, - "loss": 1.42, - "step": 478 - }, - { - "epoch": 0.4662556781310837, - "grad_norm": 0.44140625, - "learning_rate": 4.717740264421063e-06, - "loss": 1.4341, - "step": 479 - }, - { - "epoch": 0.4672290720311486, - "grad_norm": 0.44140625, - "learning_rate": 4.716558616305568e-06, - "loss": 1.4016, - "step": 480 - }, - { - "epoch": 0.4682024659312135, - "grad_norm": 0.455078125, - "learning_rate": 4.715374648556985e-06, - "loss": 1.4382, - "step": 481 - }, - { - "epoch": 0.4691758598312784, - "grad_norm": 0.431640625, - "learning_rate": 4.714188362414337e-06, - "loss": 1.4271, - "step": 482 - }, - { - "epoch": 0.4701492537313433, - "grad_norm": 0.43359375, - "learning_rate": 4.712999759119076e-06, - "loss": 1.43, - "step": 483 - }, - { - "epoch": 0.47112264763140815, - "grad_norm": 0.443359375, - "learning_rate": 4.7118088399150776e-06, - "loss": 1.4367, - "step": 484 - }, - { - "epoch": 0.47209604153147305, - "grad_norm": 0.44921875, - "learning_rate": 4.71061560604864e-06, - "loss": 1.4129, - "step": 485 - }, - { - "epoch": 0.47306943543153795, - "grad_norm": 0.44921875, - "learning_rate": 4.709420058768487e-06, - "loss": 1.3979, - "step": 486 - }, - { - "epoch": 0.47404282933160286, - "grad_norm": 0.44921875, - "learning_rate": 4.708222199325759e-06, - "loss": 1.418, - "step": 487 - }, - { - "epoch": 0.47501622323166776, - "grad_norm": 0.44140625, - "learning_rate": 4.70702202897402e-06, - "loss": 1.4164, - "step": 488 - }, - { - "epoch": 0.47598961713173266, - "grad_norm": 0.451171875, - "learning_rate": 4.70581954896925e-06, - "loss": 1.4043, - "step": 489 - }, - { - "epoch": 0.47696301103179756, - "grad_norm": 0.4375, - "learning_rate": 4.704614760569846e-06, - "loss": 1.4203, - "step": 490 - }, - { - "epoch": 0.4779364049318624, - "grad_norm": 0.435546875, - "learning_rate": 4.703407665036622e-06, - "loss": 1.4177, - "step": 491 - }, - { - "epoch": 0.4789097988319273, - "grad_norm": 0.462890625, - "learning_rate": 4.702198263632808e-06, - "loss": 1.4099, - "step": 492 - }, - { - "epoch": 0.4798831927319922, - "grad_norm": 0.447265625, - "learning_rate": 4.700986557624041e-06, - "loss": 1.4183, - "step": 493 - }, - { - "epoch": 0.4808565866320571, - "grad_norm": 0.42578125, - "learning_rate": 4.699772548278378e-06, - "loss": 1.4027, - "step": 494 - }, - { - "epoch": 0.481829980532122, - "grad_norm": 0.42578125, - "learning_rate": 4.6985562368662795e-06, - "loss": 1.3994, - "step": 495 - }, - { - "epoch": 0.4828033744321869, - "grad_norm": 0.4375, - "learning_rate": 4.697337624660619e-06, - "loss": 1.4274, - "step": 496 - }, - { - "epoch": 0.4837767683322518, - "grad_norm": 0.4375, - "learning_rate": 4.696116712936676e-06, - "loss": 1.4226, - "step": 497 - }, - { - "epoch": 0.48475016223231665, - "grad_norm": 0.4453125, - "learning_rate": 4.694893502972137e-06, - "loss": 1.43, - "step": 498 - }, - { - "epoch": 0.48572355613238155, - "grad_norm": 0.4296875, - "learning_rate": 4.693667996047094e-06, - "loss": 1.4295, - "step": 499 - }, - { - "epoch": 0.48669695003244645, - "grad_norm": 0.431640625, - "learning_rate": 4.692440193444043e-06, - "loss": 1.4063, - "step": 500 - }, - { - "epoch": 0.48767034393251135, - "grad_norm": 0.41796875, - "learning_rate": 4.6912100964478825e-06, - "loss": 1.4152, - "step": 501 - }, - { - "epoch": 0.48864373783257625, - "grad_norm": 0.44140625, - "learning_rate": 4.689977706345909e-06, - "loss": 1.4157, - "step": 502 - }, - { - "epoch": 0.48961713173264115, - "grad_norm": 0.451171875, - "learning_rate": 4.6887430244278235e-06, - "loss": 1.4212, - "step": 503 - }, - { - "epoch": 0.49059052563270605, - "grad_norm": 0.443359375, - "learning_rate": 4.6875060519857215e-06, - "loss": 1.4256, - "step": 504 - }, - { - "epoch": 0.49156391953277095, - "grad_norm": 0.431640625, - "learning_rate": 4.686266790314099e-06, - "loss": 1.4218, - "step": 505 - }, - { - "epoch": 0.4925373134328358, - "grad_norm": 0.431640625, - "learning_rate": 4.685025240709845e-06, - "loss": 1.4024, - "step": 506 - }, - { - "epoch": 0.4935107073329007, - "grad_norm": 0.453125, - "learning_rate": 4.683781404472243e-06, - "loss": 1.4169, - "step": 507 - }, - { - "epoch": 0.4944841012329656, - "grad_norm": 0.435546875, - "learning_rate": 4.6825352829029705e-06, - "loss": 1.4094, - "step": 508 - }, - { - "epoch": 0.4954574951330305, - "grad_norm": 0.431640625, - "learning_rate": 4.6812868773060975e-06, - "loss": 1.4059, - "step": 509 - }, - { - "epoch": 0.4964308890330954, - "grad_norm": 0.4296875, - "learning_rate": 4.6800361889880805e-06, - "loss": 1.4261, - "step": 510 - }, - { - "epoch": 0.4974042829331603, - "grad_norm": 0.455078125, - "learning_rate": 4.67878321925777e-06, - "loss": 1.4211, - "step": 511 - }, - { - "epoch": 0.4983776768332252, - "grad_norm": 0.439453125, - "learning_rate": 4.6775279694264e-06, - "loss": 1.3971, - "step": 512 - }, - { - "epoch": 0.49935107073329005, - "grad_norm": 0.439453125, - "learning_rate": 4.6762704408075925e-06, - "loss": 1.3995, - "step": 513 - }, - { - "epoch": 0.500324464633355, - "grad_norm": 0.44921875, - "learning_rate": 4.675010634717353e-06, - "loss": 1.4011, - "step": 514 - }, - { - "epoch": 0.5012978585334199, - "grad_norm": 0.44140625, - "learning_rate": 4.673748552474071e-06, - "loss": 1.4245, - "step": 515 - }, - { - "epoch": 0.5022712524334848, - "grad_norm": 0.451171875, - "learning_rate": 4.672484195398519e-06, - "loss": 1.4161, - "step": 516 - }, - { - "epoch": 0.5032446463335496, - "grad_norm": 0.427734375, - "learning_rate": 4.671217564813849e-06, - "loss": 1.4103, - "step": 517 - }, - { - "epoch": 0.5042180402336145, - "grad_norm": 0.4296875, - "learning_rate": 4.669948662045593e-06, - "loss": 1.407, - "step": 518 - }, - { - "epoch": 0.5051914341336794, - "grad_norm": 0.4453125, - "learning_rate": 4.668677488421659e-06, - "loss": 1.4174, - "step": 519 - }, - { - "epoch": 0.5061648280337443, - "grad_norm": 0.4296875, - "learning_rate": 4.667404045272334e-06, - "loss": 1.4044, - "step": 520 - }, - { - "epoch": 0.5071382219338092, - "grad_norm": 0.435546875, - "learning_rate": 4.666128333930278e-06, - "loss": 1.3874, - "step": 521 - }, - { - "epoch": 0.5081116158338741, - "grad_norm": 0.44140625, - "learning_rate": 4.664850355730526e-06, - "loss": 1.408, - "step": 522 - }, - { - "epoch": 0.509085009733939, - "grad_norm": 0.427734375, - "learning_rate": 4.663570112010485e-06, - "loss": 1.4181, - "step": 523 - }, - { - "epoch": 0.5100584036340039, - "grad_norm": 0.443359375, - "learning_rate": 4.662287604109932e-06, - "loss": 1.4218, - "step": 524 - }, - { - "epoch": 0.5110317975340688, - "grad_norm": 0.435546875, - "learning_rate": 4.661002833371014e-06, - "loss": 1.4302, - "step": 525 - }, - { - "epoch": 0.5120051914341337, - "grad_norm": 0.44140625, - "learning_rate": 4.659715801138247e-06, - "loss": 1.4138, - "step": 526 - }, - { - "epoch": 0.5129785853341986, - "grad_norm": 0.46875, - "learning_rate": 4.658426508758512e-06, - "loss": 1.3998, - "step": 527 - }, - { - "epoch": 0.5139519792342635, - "grad_norm": 0.45703125, - "learning_rate": 4.657134957581057e-06, - "loss": 1.3993, - "step": 528 - }, - { - "epoch": 0.5149253731343284, - "grad_norm": 0.4453125, - "learning_rate": 4.655841148957493e-06, - "loss": 1.4133, - "step": 529 - }, - { - "epoch": 0.5158987670343933, - "grad_norm": 0.458984375, - "learning_rate": 4.654545084241792e-06, - "loss": 1.4066, - "step": 530 - }, - { - "epoch": 0.5168721609344581, - "grad_norm": 0.462890625, - "learning_rate": 4.653246764790289e-06, - "loss": 1.3966, - "step": 531 - }, - { - "epoch": 0.517845554834523, - "grad_norm": 0.423828125, - "learning_rate": 4.6519461919616795e-06, - "loss": 1.3933, - "step": 532 - }, - { - "epoch": 0.5188189487345879, - "grad_norm": 0.423828125, - "learning_rate": 4.650643367117013e-06, - "loss": 1.4092, - "step": 533 - }, - { - "epoch": 0.5197923426346528, - "grad_norm": 0.427734375, - "learning_rate": 4.649338291619699e-06, - "loss": 1.3936, - "step": 534 - }, - { - "epoch": 0.5207657365347177, - "grad_norm": 0.44140625, - "learning_rate": 4.648030966835502e-06, - "loss": 1.3975, - "step": 535 - }, - { - "epoch": 0.5217391304347826, - "grad_norm": 0.4453125, - "learning_rate": 4.646721394132541e-06, - "loss": 1.4235, - "step": 536 - }, - { - "epoch": 0.5227125243348475, - "grad_norm": 0.4296875, - "learning_rate": 4.645409574881287e-06, - "loss": 1.3871, - "step": 537 - }, - { - "epoch": 0.5236859182349124, - "grad_norm": 0.431640625, - "learning_rate": 4.6440955104545595e-06, - "loss": 1.4192, - "step": 538 - }, - { - "epoch": 0.5246593121349773, - "grad_norm": 0.4375, - "learning_rate": 4.6427792022275296e-06, - "loss": 1.4187, - "step": 539 - }, - { - "epoch": 0.5256327060350422, - "grad_norm": 0.443359375, - "learning_rate": 4.641460651577717e-06, - "loss": 1.3986, - "step": 540 - }, - { - "epoch": 0.5266060999351071, - "grad_norm": 0.435546875, - "learning_rate": 4.640139859884989e-06, - "loss": 1.371, - "step": 541 - }, - { - "epoch": 0.527579493835172, - "grad_norm": 0.421875, - "learning_rate": 4.638816828531555e-06, - "loss": 1.4026, - "step": 542 - }, - { - "epoch": 0.5285528877352369, - "grad_norm": 0.447265625, - "learning_rate": 4.637491558901971e-06, - "loss": 1.4008, - "step": 543 - }, - { - "epoch": 0.5295262816353018, - "grad_norm": 0.423828125, - "learning_rate": 4.636164052383134e-06, - "loss": 1.3944, - "step": 544 - }, - { - "epoch": 0.5304996755353667, - "grad_norm": 0.44140625, - "learning_rate": 4.634834310364282e-06, - "loss": 1.421, - "step": 545 - }, - { - "epoch": 0.5314730694354315, - "grad_norm": 0.44140625, - "learning_rate": 4.633502334236993e-06, - "loss": 1.4209, - "step": 546 - }, - { - "epoch": 0.5324464633354964, - "grad_norm": 0.4140625, - "learning_rate": 4.632168125395183e-06, - "loss": 1.3769, - "step": 547 - }, - { - "epoch": 0.5334198572355613, - "grad_norm": 0.419921875, - "learning_rate": 4.6308316852351036e-06, - "loss": 1.4062, - "step": 548 - }, - { - "epoch": 0.5343932511356262, - "grad_norm": 0.43359375, - "learning_rate": 4.629493015155343e-06, - "loss": 1.4029, - "step": 549 - }, - { - "epoch": 0.5353666450356911, - "grad_norm": 0.435546875, - "learning_rate": 4.628152116556821e-06, - "loss": 1.4101, - "step": 550 - }, - { - "epoch": 0.536340038935756, - "grad_norm": 0.421875, - "learning_rate": 4.626808990842793e-06, - "loss": 1.4199, - "step": 551 - }, - { - "epoch": 0.5373134328358209, - "grad_norm": 0.431640625, - "learning_rate": 4.625463639418839e-06, - "loss": 1.4257, - "step": 552 - }, - { - "epoch": 0.5382868267358858, - "grad_norm": 0.419921875, - "learning_rate": 4.624116063692875e-06, - "loss": 1.3929, - "step": 553 - }, - { - "epoch": 0.5392602206359507, - "grad_norm": 0.431640625, - "learning_rate": 4.622766265075141e-06, - "loss": 1.4009, - "step": 554 - }, - { - "epoch": 0.5402336145360156, - "grad_norm": 0.427734375, - "learning_rate": 4.621414244978204e-06, - "loss": 1.3887, - "step": 555 - }, - { - "epoch": 0.5412070084360805, - "grad_norm": 0.43359375, - "learning_rate": 4.620060004816957e-06, - "loss": 1.4077, - "step": 556 - }, - { - "epoch": 0.5421804023361454, - "grad_norm": 0.43359375, - "learning_rate": 4.618703546008611e-06, - "loss": 1.4113, - "step": 557 - }, - { - "epoch": 0.5431537962362103, - "grad_norm": 0.44140625, - "learning_rate": 4.617344869972707e-06, - "loss": 1.4042, - "step": 558 - }, - { - "epoch": 0.5441271901362752, - "grad_norm": 0.439453125, - "learning_rate": 4.615983978131102e-06, - "loss": 1.4071, - "step": 559 - }, - { - "epoch": 0.54510058403634, - "grad_norm": 0.4296875, - "learning_rate": 4.614620871907969e-06, - "loss": 1.3912, - "step": 560 - }, - { - "epoch": 0.5460739779364049, - "grad_norm": 0.455078125, - "learning_rate": 4.613255552729805e-06, - "loss": 1.4088, - "step": 561 - }, - { - "epoch": 0.5470473718364698, - "grad_norm": 0.439453125, - "learning_rate": 4.611888022025417e-06, - "loss": 1.3976, - "step": 562 - }, - { - "epoch": 0.5480207657365347, - "grad_norm": 0.443359375, - "learning_rate": 4.610518281225929e-06, - "loss": 1.4268, - "step": 563 - }, - { - "epoch": 0.5489941596365996, - "grad_norm": 0.4375, - "learning_rate": 4.609146331764778e-06, - "loss": 1.3967, - "step": 564 - }, - { - "epoch": 0.5499675535366645, - "grad_norm": 0.435546875, - "learning_rate": 4.607772175077712e-06, - "loss": 1.4042, - "step": 565 - }, - { - "epoch": 0.5509409474367294, - "grad_norm": 0.482421875, - "learning_rate": 4.606395812602788e-06, - "loss": 1.4014, - "step": 566 - }, - { - "epoch": 0.5519143413367943, - "grad_norm": 0.482421875, - "learning_rate": 4.605017245780372e-06, - "loss": 1.417, - "step": 567 - }, - { - "epoch": 0.5528877352368592, - "grad_norm": 0.45703125, - "learning_rate": 4.603636476053139e-06, - "loss": 1.387, - "step": 568 - }, - { - "epoch": 0.5538611291369241, - "grad_norm": 0.443359375, - "learning_rate": 4.602253504866066e-06, - "loss": 1.4108, - "step": 569 - }, - { - "epoch": 0.554834523036989, - "grad_norm": 0.4453125, - "learning_rate": 4.600868333666434e-06, - "loss": 1.4, - "step": 570 - }, - { - "epoch": 0.5558079169370539, - "grad_norm": 0.462890625, - "learning_rate": 4.5994809639038285e-06, - "loss": 1.398, - "step": 571 - }, - { - "epoch": 0.5567813108371188, - "grad_norm": 0.4375, - "learning_rate": 4.598091397030136e-06, - "loss": 1.3986, - "step": 572 - }, - { - "epoch": 0.5577547047371837, - "grad_norm": 0.453125, - "learning_rate": 4.596699634499538e-06, - "loss": 1.3918, - "step": 573 - }, - { - "epoch": 0.5587280986372486, - "grad_norm": 0.439453125, - "learning_rate": 4.59530567776852e-06, - "loss": 1.3921, - "step": 574 - }, - { - "epoch": 0.5597014925373134, - "grad_norm": 0.453125, - "learning_rate": 4.593909528295859e-06, - "loss": 1.3768, - "step": 575 - }, - { - "epoch": 0.5606748864373783, - "grad_norm": 0.462890625, - "learning_rate": 4.5925111875426285e-06, - "loss": 1.4077, - "step": 576 - }, - { - "epoch": 0.5616482803374432, - "grad_norm": 0.462890625, - "learning_rate": 4.591110656972195e-06, - "loss": 1.411, - "step": 577 - }, - { - "epoch": 0.5626216742375081, - "grad_norm": 0.4296875, - "learning_rate": 4.589707938050216e-06, - "loss": 1.3958, - "step": 578 - }, - { - "epoch": 0.563595068137573, - "grad_norm": 0.431640625, - "learning_rate": 4.588303032244641e-06, - "loss": 1.4149, - "step": 579 - }, - { - "epoch": 0.5645684620376379, - "grad_norm": 0.427734375, - "learning_rate": 4.586895941025705e-06, - "loss": 1.3885, - "step": 580 - }, - { - "epoch": 0.5655418559377028, - "grad_norm": 0.4453125, - "learning_rate": 4.585486665865933e-06, - "loss": 1.3821, - "step": 581 - }, - { - "epoch": 0.5665152498377677, - "grad_norm": 0.4375, - "learning_rate": 4.584075208240135e-06, - "loss": 1.3888, - "step": 582 - }, - { - "epoch": 0.5674886437378326, - "grad_norm": 0.42578125, - "learning_rate": 4.5826615696254026e-06, - "loss": 1.374, - "step": 583 - }, - { - "epoch": 0.5684620376378975, - "grad_norm": 0.443359375, - "learning_rate": 4.581245751501113e-06, - "loss": 1.3988, - "step": 584 - }, - { - "epoch": 0.5694354315379624, - "grad_norm": 0.435546875, - "learning_rate": 4.579827755348921e-06, - "loss": 1.3786, - "step": 585 - }, - { - "epoch": 0.5704088254380273, - "grad_norm": 0.435546875, - "learning_rate": 4.578407582652764e-06, - "loss": 1.3826, - "step": 586 - }, - { - "epoch": 0.5713822193380922, - "grad_norm": 0.44140625, - "learning_rate": 4.576985234898855e-06, - "loss": 1.3793, - "step": 587 - }, - { - "epoch": 0.5723556132381571, - "grad_norm": 0.439453125, - "learning_rate": 4.575560713575684e-06, - "loss": 1.4009, - "step": 588 - }, - { - "epoch": 0.5733290071382219, - "grad_norm": 0.466796875, - "learning_rate": 4.5741340201740146e-06, - "loss": 1.4198, - "step": 589 - }, - { - "epoch": 0.5743024010382868, - "grad_norm": 0.4609375, - "learning_rate": 4.572705156186886e-06, - "loss": 1.4197, - "step": 590 - }, - { - "epoch": 0.5752757949383517, - "grad_norm": 0.447265625, - "learning_rate": 4.571274123109606e-06, - "loss": 1.3729, - "step": 591 - }, - { - "epoch": 0.5762491888384166, - "grad_norm": 0.421875, - "learning_rate": 4.569840922439753e-06, - "loss": 1.3909, - "step": 592 - }, - { - "epoch": 0.5772225827384815, - "grad_norm": 0.43359375, - "learning_rate": 4.568405555677177e-06, - "loss": 1.3993, - "step": 593 - }, - { - "epoch": 0.5781959766385464, - "grad_norm": 0.474609375, - "learning_rate": 4.566968024323989e-06, - "loss": 1.405, - "step": 594 - }, - { - "epoch": 0.5791693705386113, - "grad_norm": 0.439453125, - "learning_rate": 4.565528329884571e-06, - "loss": 1.4054, - "step": 595 - }, - { - "epoch": 0.5801427644386762, - "grad_norm": 0.4453125, - "learning_rate": 4.564086473865565e-06, - "loss": 1.3597, - "step": 596 - }, - { - "epoch": 0.5811161583387411, - "grad_norm": 0.4375, - "learning_rate": 4.562642457775876e-06, - "loss": 1.3909, - "step": 597 - }, - { - "epoch": 0.582089552238806, - "grad_norm": 0.453125, - "learning_rate": 4.56119628312667e-06, - "loss": 1.4039, - "step": 598 - }, - { - "epoch": 0.5830629461388709, - "grad_norm": 0.427734375, - "learning_rate": 4.559747951431372e-06, - "loss": 1.395, - "step": 599 - }, - { - "epoch": 0.5840363400389358, - "grad_norm": 0.44140625, - "learning_rate": 4.558297464205666e-06, - "loss": 1.3963, - "step": 600 - }, - { - "epoch": 0.5850097339390007, - "grad_norm": 0.44140625, - "learning_rate": 4.556844822967486e-06, - "loss": 1.4153, - "step": 601 - }, - { - "epoch": 0.5859831278390656, - "grad_norm": 0.453125, - "learning_rate": 4.555390029237026e-06, - "loss": 1.3682, - "step": 602 - }, - { - "epoch": 0.5869565217391305, - "grad_norm": 0.44140625, - "learning_rate": 4.55393308453673e-06, - "loss": 1.4046, - "step": 603 - }, - { - "epoch": 0.5879299156391953, - "grad_norm": 0.439453125, - "learning_rate": 4.552473990391294e-06, - "loss": 1.3929, - "step": 604 - }, - { - "epoch": 0.5889033095392602, - "grad_norm": 0.4140625, - "learning_rate": 4.551012748327663e-06, - "loss": 1.3667, - "step": 605 - }, - { - "epoch": 0.5898767034393251, - "grad_norm": 0.44921875, - "learning_rate": 4.549549359875031e-06, - "loss": 1.3875, - "step": 606 - }, - { - "epoch": 0.59085009733939, - "grad_norm": 0.427734375, - "learning_rate": 4.548083826564834e-06, - "loss": 1.3889, - "step": 607 - }, - { - "epoch": 0.5918234912394549, - "grad_norm": 0.431640625, - "learning_rate": 4.546616149930758e-06, - "loss": 1.3711, - "step": 608 - }, - { - "epoch": 0.5927968851395198, - "grad_norm": 0.431640625, - "learning_rate": 4.54514633150873e-06, - "loss": 1.3763, - "step": 609 - }, - { - "epoch": 0.5937702790395847, - "grad_norm": 0.421875, - "learning_rate": 4.543674372836915e-06, - "loss": 1.3942, - "step": 610 - }, - { - "epoch": 0.5947436729396496, - "grad_norm": 0.42578125, - "learning_rate": 4.542200275455724e-06, - "loss": 1.3893, - "step": 611 - }, - { - "epoch": 0.5957170668397145, - "grad_norm": 0.423828125, - "learning_rate": 4.5407240409078e-06, - "loss": 1.3826, - "step": 612 - }, - { - "epoch": 0.5966904607397794, - "grad_norm": 0.431640625, - "learning_rate": 4.539245670738029e-06, - "loss": 1.3885, - "step": 613 - }, - { - "epoch": 0.5976638546398443, - "grad_norm": 0.431640625, - "learning_rate": 4.537765166493524e-06, - "loss": 1.4051, - "step": 614 - }, - { - "epoch": 0.5986372485399092, - "grad_norm": 0.44140625, - "learning_rate": 4.5362825297236394e-06, - "loss": 1.3893, - "step": 615 - }, - { - "epoch": 0.5996106424399741, - "grad_norm": 0.4375, - "learning_rate": 4.534797761979955e-06, - "loss": 1.3847, - "step": 616 - }, - { - "epoch": 0.600584036340039, - "grad_norm": 0.423828125, - "learning_rate": 4.533310864816286e-06, - "loss": 1.3936, - "step": 617 - }, - { - "epoch": 0.6015574302401038, - "grad_norm": 0.42578125, - "learning_rate": 4.531821839788671e-06, - "loss": 1.383, - "step": 618 - }, - { - "epoch": 0.6025308241401687, - "grad_norm": 0.427734375, - "learning_rate": 4.5303306884553785e-06, - "loss": 1.3955, - "step": 619 - }, - { - "epoch": 0.6035042180402336, - "grad_norm": 0.43359375, - "learning_rate": 4.528837412376902e-06, - "loss": 1.3963, - "step": 620 - }, - { - "epoch": 0.6044776119402985, - "grad_norm": 0.431640625, - "learning_rate": 4.527342013115956e-06, - "loss": 1.3767, - "step": 621 - }, - { - "epoch": 0.6054510058403634, - "grad_norm": 0.4296875, - "learning_rate": 4.525844492237481e-06, - "loss": 1.3808, - "step": 622 - }, - { - "epoch": 0.6064243997404283, - "grad_norm": 0.435546875, - "learning_rate": 4.524344851308635e-06, - "loss": 1.3882, - "step": 623 - }, - { - "epoch": 0.6073977936404932, - "grad_norm": 0.423828125, - "learning_rate": 4.522843091898795e-06, - "loss": 1.3824, - "step": 624 - }, - { - "epoch": 0.6083711875405581, - "grad_norm": 0.42578125, - "learning_rate": 4.521339215579555e-06, - "loss": 1.3764, - "step": 625 - }, - { - "epoch": 0.609344581440623, - "grad_norm": 0.439453125, - "learning_rate": 4.519833223924725e-06, - "loss": 1.3805, - "step": 626 - }, - { - "epoch": 0.6103179753406879, - "grad_norm": 0.439453125, - "learning_rate": 4.518325118510328e-06, - "loss": 1.3689, - "step": 627 - }, - { - "epoch": 0.6112913692407528, - "grad_norm": 0.4375, - "learning_rate": 4.516814900914601e-06, - "loss": 1.386, - "step": 628 - }, - { - "epoch": 0.6122647631408177, - "grad_norm": 0.431640625, - "learning_rate": 4.515302572717987e-06, - "loss": 1.4075, - "step": 629 - }, - { - "epoch": 0.6132381570408826, - "grad_norm": 0.421875, - "learning_rate": 4.513788135503142e-06, - "loss": 1.3808, - "step": 630 - }, - { - "epoch": 0.6142115509409475, - "grad_norm": 0.43359375, - "learning_rate": 4.512271590854929e-06, - "loss": 1.3767, - "step": 631 - }, - { - "epoch": 0.6151849448410124, - "grad_norm": 0.431640625, - "learning_rate": 4.5107529403604126e-06, - "loss": 1.3818, - "step": 632 - }, - { - "epoch": 0.6161583387410772, - "grad_norm": 0.42578125, - "learning_rate": 4.509232185608864e-06, - "loss": 1.3886, - "step": 633 - }, - { - "epoch": 0.6171317326411421, - "grad_norm": 0.423828125, - "learning_rate": 4.507709328191758e-06, - "loss": 1.3751, - "step": 634 - }, - { - "epoch": 0.618105126541207, - "grad_norm": 0.435546875, - "learning_rate": 4.506184369702766e-06, - "loss": 1.3608, - "step": 635 - }, - { - "epoch": 0.6190785204412719, - "grad_norm": 0.4296875, - "learning_rate": 4.5046573117377616e-06, - "loss": 1.3771, - "step": 636 - }, - { - "epoch": 0.6200519143413368, - "grad_norm": 0.4375, - "learning_rate": 4.503128155894812e-06, - "loss": 1.3854, - "step": 637 - }, - { - "epoch": 0.6210253082414017, - "grad_norm": 0.439453125, - "learning_rate": 4.501596903774184e-06, - "loss": 1.3916, - "step": 638 - }, - { - "epoch": 0.6219987021414666, - "grad_norm": 0.470703125, - "learning_rate": 4.5000635569783365e-06, - "loss": 1.3768, - "step": 639 - }, - { - "epoch": 0.6229720960415315, - "grad_norm": 0.419921875, - "learning_rate": 4.498528117111918e-06, - "loss": 1.3629, - "step": 640 - }, - { - "epoch": 0.6239454899415964, - "grad_norm": 0.41796875, - "learning_rate": 4.49699058578177e-06, - "loss": 1.3698, - "step": 641 - }, - { - "epoch": 0.6249188838416613, - "grad_norm": 0.4375, - "learning_rate": 4.495450964596923e-06, - "loss": 1.3571, - "step": 642 - }, - { - "epoch": 0.6258922777417262, - "grad_norm": 0.4453125, - "learning_rate": 4.493909255168592e-06, - "loss": 1.3787, - "step": 643 - }, - { - "epoch": 0.6268656716417911, - "grad_norm": 0.44140625, - "learning_rate": 4.492365459110182e-06, - "loss": 1.3611, - "step": 644 - }, - { - "epoch": 0.627839065541856, - "grad_norm": 0.42578125, - "learning_rate": 4.490819578037275e-06, - "loss": 1.3921, - "step": 645 - }, - { - "epoch": 0.6288124594419209, - "grad_norm": 0.41796875, - "learning_rate": 4.4892716135676415e-06, - "loss": 1.4044, - "step": 646 - }, - { - "epoch": 0.6297858533419857, - "grad_norm": 0.431640625, - "learning_rate": 4.487721567321229e-06, - "loss": 1.3714, - "step": 647 - }, - { - "epoch": 0.6307592472420506, - "grad_norm": 0.431640625, - "learning_rate": 4.486169440920163e-06, - "loss": 1.3805, - "step": 648 - }, - { - "epoch": 0.6317326411421155, - "grad_norm": 0.443359375, - "learning_rate": 4.484615235988747e-06, - "loss": 1.3888, - "step": 649 - }, - { - "epoch": 0.6327060350421804, - "grad_norm": 0.412109375, - "learning_rate": 4.4830589541534615e-06, - "loss": 1.3453, - "step": 650 - }, - { - "epoch": 0.6336794289422453, - "grad_norm": 0.42578125, - "learning_rate": 4.481500597042956e-06, - "loss": 1.4119, - "step": 651 - }, - { - "epoch": 0.6346528228423102, - "grad_norm": 0.443359375, - "learning_rate": 4.479940166288056e-06, - "loss": 1.3965, - "step": 652 - }, - { - "epoch": 0.6356262167423751, - "grad_norm": 0.439453125, - "learning_rate": 4.4783776635217555e-06, - "loss": 1.4012, - "step": 653 - }, - { - "epoch": 0.63659961064244, - "grad_norm": 0.427734375, - "learning_rate": 4.476813090379216e-06, - "loss": 1.3946, - "step": 654 - }, - { - "epoch": 0.6375730045425049, - "grad_norm": 0.427734375, - "learning_rate": 4.475246448497766e-06, - "loss": 1.3893, - "step": 655 - }, - { - "epoch": 0.6385463984425698, - "grad_norm": 0.44921875, - "learning_rate": 4.473677739516901e-06, - "loss": 1.3874, - "step": 656 - }, - { - "epoch": 0.6395197923426347, - "grad_norm": 0.4296875, - "learning_rate": 4.472106965078277e-06, - "loss": 1.3767, - "step": 657 - }, - { - "epoch": 0.6404931862426996, - "grad_norm": 0.427734375, - "learning_rate": 4.470534126825714e-06, - "loss": 1.3864, - "step": 658 - }, - { - "epoch": 0.6414665801427645, - "grad_norm": 0.439453125, - "learning_rate": 4.468959226405188e-06, - "loss": 1.3849, - "step": 659 - }, - { - "epoch": 0.6424399740428294, - "grad_norm": 0.423828125, - "learning_rate": 4.467382265464838e-06, - "loss": 1.3789, - "step": 660 - }, - { - "epoch": 0.6434133679428943, - "grad_norm": 0.41796875, - "learning_rate": 4.465803245654955e-06, - "loss": 1.3473, - "step": 661 - }, - { - "epoch": 0.6443867618429591, - "grad_norm": 0.416015625, - "learning_rate": 4.464222168627987e-06, - "loss": 1.3614, - "step": 662 - }, - { - "epoch": 0.645360155743024, - "grad_norm": 0.4296875, - "learning_rate": 4.462639036038536e-06, - "loss": 1.368, - "step": 663 - }, - { - "epoch": 0.6463335496430889, - "grad_norm": 0.451171875, - "learning_rate": 4.461053849543351e-06, - "loss": 1.3965, - "step": 664 - }, - { - "epoch": 0.6473069435431538, - "grad_norm": 0.43359375, - "learning_rate": 4.459466610801333e-06, - "loss": 1.3626, - "step": 665 - }, - { - "epoch": 0.6482803374432187, - "grad_norm": 0.421875, - "learning_rate": 4.457877321473532e-06, - "loss": 1.3897, - "step": 666 - }, - { - "epoch": 0.6492537313432836, - "grad_norm": 0.431640625, - "learning_rate": 4.456285983223143e-06, - "loss": 1.3792, - "step": 667 - }, - { - "epoch": 0.6502271252433485, - "grad_norm": 0.431640625, - "learning_rate": 4.454692597715502e-06, - "loss": 1.3975, - "step": 668 - }, - { - "epoch": 0.6512005191434134, - "grad_norm": 0.443359375, - "learning_rate": 4.4530971666180925e-06, - "loss": 1.3743, - "step": 669 - }, - { - "epoch": 0.6521739130434783, - "grad_norm": 0.4296875, - "learning_rate": 4.451499691600536e-06, - "loss": 1.3572, - "step": 670 - }, - { - "epoch": 0.6531473069435432, - "grad_norm": 0.431640625, - "learning_rate": 4.449900174334592e-06, - "loss": 1.3806, - "step": 671 - }, - { - "epoch": 0.6541207008436081, - "grad_norm": 0.4375, - "learning_rate": 4.44829861649416e-06, - "loss": 1.3852, - "step": 672 - }, - { - "epoch": 0.655094094743673, - "grad_norm": 0.412109375, - "learning_rate": 4.446695019755274e-06, - "loss": 1.3605, - "step": 673 - }, - { - "epoch": 0.6560674886437379, - "grad_norm": 0.419921875, - "learning_rate": 4.445089385796099e-06, - "loss": 1.3732, - "step": 674 - }, - { - "epoch": 0.6570408825438028, - "grad_norm": 0.42578125, - "learning_rate": 4.443481716296936e-06, - "loss": 1.3804, - "step": 675 - }, - { - "epoch": 0.6580142764438677, - "grad_norm": 0.423828125, - "learning_rate": 4.4418720129402145e-06, - "loss": 1.3737, - "step": 676 - }, - { - "epoch": 0.6589876703439325, - "grad_norm": 0.43359375, - "learning_rate": 4.440260277410491e-06, - "loss": 1.3599, - "step": 677 - }, - { - "epoch": 0.6599610642439974, - "grad_norm": 0.435546875, - "learning_rate": 4.438646511394451e-06, - "loss": 1.3439, - "step": 678 - }, - { - "epoch": 0.6609344581440623, - "grad_norm": 0.423828125, - "learning_rate": 4.437030716580904e-06, - "loss": 1.3733, - "step": 679 - }, - { - "epoch": 0.6619078520441272, - "grad_norm": 0.41796875, - "learning_rate": 4.435412894660782e-06, - "loss": 1.365, - "step": 680 - }, - { - "epoch": 0.6628812459441921, - "grad_norm": 0.40625, - "learning_rate": 4.433793047327138e-06, - "loss": 1.3667, - "step": 681 - }, - { - "epoch": 0.663854639844257, - "grad_norm": 0.44140625, - "learning_rate": 4.432171176275149e-06, - "loss": 1.3674, - "step": 682 - }, - { - "epoch": 0.6648280337443219, - "grad_norm": 0.443359375, - "learning_rate": 4.430547283202103e-06, - "loss": 1.3543, - "step": 683 - }, - { - "epoch": 0.6658014276443868, - "grad_norm": 0.423828125, - "learning_rate": 4.428921369807407e-06, - "loss": 1.3757, - "step": 684 - }, - { - "epoch": 0.6667748215444517, - "grad_norm": 0.427734375, - "learning_rate": 4.427293437792585e-06, - "loss": 1.371, - "step": 685 - }, - { - "epoch": 0.6677482154445166, - "grad_norm": 0.421875, - "learning_rate": 4.42566348886127e-06, - "loss": 1.3876, - "step": 686 - }, - { - "epoch": 0.6687216093445815, - "grad_norm": 0.41796875, - "learning_rate": 4.424031524719208e-06, - "loss": 1.3651, - "step": 687 - }, - { - "epoch": 0.6696950032446464, - "grad_norm": 0.42578125, - "learning_rate": 4.422397547074252e-06, - "loss": 1.3677, - "step": 688 - }, - { - "epoch": 0.6706683971447113, - "grad_norm": 0.4296875, - "learning_rate": 4.420761557636362e-06, - "loss": 1.3676, - "step": 689 - }, - { - "epoch": 0.6716417910447762, - "grad_norm": 0.443359375, - "learning_rate": 4.419123558117605e-06, - "loss": 1.3757, - "step": 690 - }, - { - "epoch": 0.672615184944841, - "grad_norm": 0.41796875, - "learning_rate": 4.417483550232151e-06, - "loss": 1.3686, - "step": 691 - }, - { - "epoch": 0.6735885788449059, - "grad_norm": 0.42578125, - "learning_rate": 4.415841535696271e-06, - "loss": 1.3566, - "step": 692 - }, - { - "epoch": 0.6745619727449708, - "grad_norm": 0.412109375, - "learning_rate": 4.414197516228338e-06, - "loss": 1.3666, - "step": 693 - }, - { - "epoch": 0.6755353666450357, - "grad_norm": 0.4140625, - "learning_rate": 4.41255149354882e-06, - "loss": 1.3482, - "step": 694 - }, - { - "epoch": 0.6765087605451006, - "grad_norm": 0.423828125, - "learning_rate": 4.410903469380284e-06, - "loss": 1.3735, - "step": 695 - }, - { - "epoch": 0.6774821544451655, - "grad_norm": 0.416015625, - "learning_rate": 4.40925344544739e-06, - "loss": 1.3773, - "step": 696 - }, - { - "epoch": 0.6784555483452304, - "grad_norm": 0.419921875, - "learning_rate": 4.407601423476893e-06, - "loss": 1.3859, - "step": 697 - }, - { - "epoch": 0.6794289422452953, - "grad_norm": 0.41796875, - "learning_rate": 4.405947405197635e-06, - "loss": 1.3765, - "step": 698 - }, - { - "epoch": 0.6804023361453602, - "grad_norm": 0.4375, - "learning_rate": 4.404291392340551e-06, - "loss": 1.3512, - "step": 699 - }, - { - "epoch": 0.6813757300454251, - "grad_norm": 0.41796875, - "learning_rate": 4.402633386638662e-06, - "loss": 1.3713, - "step": 700 - }, - { - "epoch": 0.68234912394549, - "grad_norm": 0.431640625, - "learning_rate": 4.400973389827072e-06, - "loss": 1.3983, - "step": 701 - }, - { - "epoch": 0.6833225178455549, - "grad_norm": 0.42578125, - "learning_rate": 4.399311403642975e-06, - "loss": 1.3858, - "step": 702 - }, - { - "epoch": 0.6842959117456198, - "grad_norm": 0.453125, - "learning_rate": 4.3976474298256395e-06, - "loss": 1.3683, - "step": 703 - }, - { - "epoch": 0.6852693056456847, - "grad_norm": 0.447265625, - "learning_rate": 4.395981470116419e-06, - "loss": 1.3935, - "step": 704 - }, - { - "epoch": 0.6862426995457496, - "grad_norm": 0.423828125, - "learning_rate": 4.394313526258743e-06, - "loss": 1.373, - "step": 705 - }, - { - "epoch": 0.6872160934458144, - "grad_norm": 0.435546875, - "learning_rate": 4.3926435999981194e-06, - "loss": 1.3831, - "step": 706 - }, - { - "epoch": 0.6881894873458793, - "grad_norm": 0.435546875, - "learning_rate": 4.390971693082128e-06, - "loss": 1.3843, - "step": 707 - }, - { - "epoch": 0.6891628812459442, - "grad_norm": 0.443359375, - "learning_rate": 4.3892978072604235e-06, - "loss": 1.3679, - "step": 708 - }, - { - "epoch": 0.6901362751460091, - "grad_norm": 0.423828125, - "learning_rate": 4.38762194428473e-06, - "loss": 1.3644, - "step": 709 - }, - { - "epoch": 0.691109669046074, - "grad_norm": 0.41796875, - "learning_rate": 4.3859441059088435e-06, - "loss": 1.3766, - "step": 710 - }, - { - "epoch": 0.6920830629461389, - "grad_norm": 0.453125, - "learning_rate": 4.384264293888624e-06, - "loss": 1.3654, - "step": 711 - }, - { - "epoch": 0.6930564568462038, - "grad_norm": 0.427734375, - "learning_rate": 4.382582509981996e-06, - "loss": 1.3694, - "step": 712 - }, - { - "epoch": 0.6940298507462687, - "grad_norm": 0.42578125, - "learning_rate": 4.3808987559489536e-06, - "loss": 1.3698, - "step": 713 - }, - { - "epoch": 0.6950032446463336, - "grad_norm": 0.44140625, - "learning_rate": 4.379213033551547e-06, - "loss": 1.3681, - "step": 714 - }, - { - "epoch": 0.6959766385463985, - "grad_norm": 0.427734375, - "learning_rate": 4.377525344553888e-06, - "loss": 1.3757, - "step": 715 - }, - { - "epoch": 0.6969500324464634, - "grad_norm": 0.41796875, - "learning_rate": 4.375835690722147e-06, - "loss": 1.3604, - "step": 716 - }, - { - "epoch": 0.6979234263465283, - "grad_norm": 0.44140625, - "learning_rate": 4.374144073824549e-06, - "loss": 1.3715, - "step": 717 - }, - { - "epoch": 0.6988968202465932, - "grad_norm": 0.447265625, - "learning_rate": 4.372450495631376e-06, - "loss": 1.3486, - "step": 718 - }, - { - "epoch": 0.6998702141466581, - "grad_norm": 0.455078125, - "learning_rate": 4.3707549579149605e-06, - "loss": 1.3803, - "step": 719 - }, - { - "epoch": 0.7008436080467229, - "grad_norm": 0.427734375, - "learning_rate": 4.369057462449686e-06, - "loss": 1.3779, - "step": 720 - }, - { - "epoch": 0.7018170019467878, - "grad_norm": 0.4375, - "learning_rate": 4.367358011011985e-06, - "loss": 1.375, - "step": 721 - }, - { - "epoch": 0.7027903958468527, - "grad_norm": 0.42578125, - "learning_rate": 4.365656605380338e-06, - "loss": 1.351, - "step": 722 - }, - { - "epoch": 0.7037637897469176, - "grad_norm": 0.44921875, - "learning_rate": 4.363953247335267e-06, - "loss": 1.3634, - "step": 723 - }, - { - "epoch": 0.7047371836469825, - "grad_norm": 0.447265625, - "learning_rate": 4.362247938659342e-06, - "loss": 1.3717, - "step": 724 - }, - { - "epoch": 0.7057105775470474, - "grad_norm": 0.43359375, - "learning_rate": 4.36054068113717e-06, - "loss": 1.3568, - "step": 725 - }, - { - "epoch": 0.7066839714471123, - "grad_norm": 0.44921875, - "learning_rate": 4.358831476555401e-06, - "loss": 1.3884, - "step": 726 - }, - { - "epoch": 0.7076573653471772, - "grad_norm": 0.4453125, - "learning_rate": 4.357120326702721e-06, - "loss": 1.3781, - "step": 727 - }, - { - "epoch": 0.7086307592472421, - "grad_norm": 0.435546875, - "learning_rate": 4.35540723336985e-06, - "loss": 1.3788, - "step": 728 - }, - { - "epoch": 0.709604153147307, - "grad_norm": 0.43359375, - "learning_rate": 4.353692198349547e-06, - "loss": 1.3691, - "step": 729 - }, - { - "epoch": 0.7105775470473719, - "grad_norm": 0.431640625, - "learning_rate": 4.351975223436597e-06, - "loss": 1.3503, - "step": 730 - }, - { - "epoch": 0.7115509409474368, - "grad_norm": 0.443359375, - "learning_rate": 4.3502563104278175e-06, - "loss": 1.3927, - "step": 731 - }, - { - "epoch": 0.7125243348475017, - "grad_norm": 0.431640625, - "learning_rate": 4.3485354611220555e-06, - "loss": 1.366, - "step": 732 - }, - { - "epoch": 0.7134977287475666, - "grad_norm": 0.46484375, - "learning_rate": 4.346812677320183e-06, - "loss": 1.3639, - "step": 733 - }, - { - "epoch": 0.7144711226476315, - "grad_norm": 0.455078125, - "learning_rate": 4.345087960825098e-06, - "loss": 1.3415, - "step": 734 - }, - { - "epoch": 0.7154445165476963, - "grad_norm": 0.453125, - "learning_rate": 4.343361313441717e-06, - "loss": 1.3652, - "step": 735 - }, - { - "epoch": 0.7164179104477612, - "grad_norm": 0.4375, - "learning_rate": 4.3416327369769824e-06, - "loss": 1.3618, - "step": 736 - }, - { - "epoch": 0.717391304347826, - "grad_norm": 0.421875, - "learning_rate": 4.339902233239853e-06, - "loss": 1.366, - "step": 737 - }, - { - "epoch": 0.718364698247891, - "grad_norm": 0.435546875, - "learning_rate": 4.338169804041303e-06, - "loss": 1.3752, - "step": 738 - }, - { - "epoch": 0.7193380921479559, - "grad_norm": 0.431640625, - "learning_rate": 4.336435451194324e-06, - "loss": 1.337, - "step": 739 - }, - { - "epoch": 0.7203114860480208, - "grad_norm": 0.439453125, - "learning_rate": 4.334699176513919e-06, - "loss": 1.3579, - "step": 740 - }, - { - "epoch": 0.7212848799480857, - "grad_norm": 0.455078125, - "learning_rate": 4.3329609818171035e-06, - "loss": 1.3837, - "step": 741 - }, - { - "epoch": 0.7222582738481506, - "grad_norm": 0.419921875, - "learning_rate": 4.3312208689229026e-06, - "loss": 1.3677, - "step": 742 - }, - { - "epoch": 0.7232316677482155, - "grad_norm": 0.423828125, - "learning_rate": 4.3294788396523465e-06, - "loss": 1.3692, - "step": 743 - }, - { - "epoch": 0.7242050616482804, - "grad_norm": 0.4296875, - "learning_rate": 4.327734895828473e-06, - "loss": 1.3583, - "step": 744 - }, - { - "epoch": 0.7251784555483453, - "grad_norm": 0.42578125, - "learning_rate": 4.325989039276323e-06, - "loss": 1.353, - "step": 745 - }, - { - "epoch": 0.7261518494484102, - "grad_norm": 0.421875, - "learning_rate": 4.324241271822939e-06, - "loss": 1.3476, - "step": 746 - }, - { - "epoch": 0.7271252433484751, - "grad_norm": 0.45703125, - "learning_rate": 4.322491595297363e-06, - "loss": 1.3385, - "step": 747 - }, - { - "epoch": 0.72809863724854, - "grad_norm": 0.4296875, - "learning_rate": 4.320740011530634e-06, - "loss": 1.3517, - "step": 748 - }, - { - "epoch": 0.7290720311486047, - "grad_norm": 0.435546875, - "learning_rate": 4.31898652235579e-06, - "loss": 1.3732, - "step": 749 - }, - { - "epoch": 0.7300454250486696, - "grad_norm": 0.41796875, - "learning_rate": 4.317231129607859e-06, - "loss": 1.3647, - "step": 750 - }, - { - "epoch": 0.7310188189487346, - "grad_norm": 0.43359375, - "learning_rate": 4.3154738351238655e-06, - "loss": 1.3595, - "step": 751 - }, - { - "epoch": 0.7319922128487995, - "grad_norm": 0.423828125, - "learning_rate": 4.3137146407428196e-06, - "loss": 1.3575, - "step": 752 - }, - { - "epoch": 0.7329656067488644, - "grad_norm": 0.431640625, - "learning_rate": 4.311953548305722e-06, - "loss": 1.3698, - "step": 753 - }, - { - "epoch": 0.7339390006489293, - "grad_norm": 0.42578125, - "learning_rate": 4.31019055965556e-06, - "loss": 1.3651, - "step": 754 - }, - { - "epoch": 0.7349123945489942, - "grad_norm": 0.4375, - "learning_rate": 4.3084256766373056e-06, - "loss": 1.3609, - "step": 755 - }, - { - "epoch": 0.735885788449059, - "grad_norm": 0.431640625, - "learning_rate": 4.306658901097911e-06, - "loss": 1.3398, - "step": 756 - }, - { - "epoch": 0.736859182349124, - "grad_norm": 0.419921875, - "learning_rate": 4.3048902348863116e-06, - "loss": 1.3856, - "step": 757 - }, - { - "epoch": 0.7378325762491889, - "grad_norm": 0.421875, - "learning_rate": 4.303119679853419e-06, - "loss": 1.3818, - "step": 758 - }, - { - "epoch": 0.7388059701492538, - "grad_norm": 0.427734375, - "learning_rate": 4.3013472378521236e-06, - "loss": 1.3709, - "step": 759 - }, - { - "epoch": 0.7397793640493187, - "grad_norm": 0.4296875, - "learning_rate": 4.299572910737289e-06, - "loss": 1.3466, - "step": 760 - }, - { - "epoch": 0.7407527579493836, - "grad_norm": 0.427734375, - "learning_rate": 4.297796700365752e-06, - "loss": 1.3511, - "step": 761 - }, - { - "epoch": 0.7417261518494485, - "grad_norm": 0.408203125, - "learning_rate": 4.296018608596321e-06, - "loss": 1.3448, - "step": 762 - }, - { - "epoch": 0.7426995457495134, - "grad_norm": 0.41796875, - "learning_rate": 4.294238637289772e-06, - "loss": 1.3608, - "step": 763 - }, - { - "epoch": 0.7436729396495781, - "grad_norm": 0.43359375, - "learning_rate": 4.29245678830885e-06, - "loss": 1.3773, - "step": 764 - }, - { - "epoch": 0.744646333549643, - "grad_norm": 0.423828125, - "learning_rate": 4.290673063518261e-06, - "loss": 1.3641, - "step": 765 - }, - { - "epoch": 0.745619727449708, - "grad_norm": 0.42578125, - "learning_rate": 4.288887464784679e-06, - "loss": 1.3438, - "step": 766 - }, - { - "epoch": 0.7465931213497728, - "grad_norm": 0.4296875, - "learning_rate": 4.287099993976735e-06, - "loss": 1.3731, - "step": 767 - }, - { - "epoch": 0.7475665152498377, - "grad_norm": 0.42578125, - "learning_rate": 4.2853106529650225e-06, - "loss": 1.3327, - "step": 768 - }, - { - "epoch": 0.7485399091499026, - "grad_norm": 0.421875, - "learning_rate": 4.28351944362209e-06, - "loss": 1.3578, - "step": 769 - }, - { - "epoch": 0.7495133030499675, - "grad_norm": 0.42578125, - "learning_rate": 4.28172636782244e-06, - "loss": 1.3547, - "step": 770 - }, - { - "epoch": 0.7504866969500325, - "grad_norm": 0.435546875, - "learning_rate": 4.279931427442532e-06, - "loss": 1.3565, - "step": 771 - }, - { - "epoch": 0.7514600908500974, - "grad_norm": 0.419921875, - "learning_rate": 4.278134624360773e-06, - "loss": 1.357, - "step": 772 - }, - { - "epoch": 0.7524334847501623, - "grad_norm": 0.423828125, - "learning_rate": 4.276335960457522e-06, - "loss": 1.3664, - "step": 773 - }, - { - "epoch": 0.7534068786502272, - "grad_norm": 0.44140625, - "learning_rate": 4.2745354376150865e-06, - "loss": 1.3623, - "step": 774 - }, - { - "epoch": 0.754380272550292, - "grad_norm": 0.431640625, - "learning_rate": 4.272733057717714e-06, - "loss": 1.3567, - "step": 775 - }, - { - "epoch": 0.755353666450357, - "grad_norm": 0.423828125, - "learning_rate": 4.2709288226516e-06, - "loss": 1.3633, - "step": 776 - }, - { - "epoch": 0.7563270603504219, - "grad_norm": 0.421875, - "learning_rate": 4.269122734304881e-06, - "loss": 1.361, - "step": 777 - }, - { - "epoch": 0.7573004542504866, - "grad_norm": 0.44140625, - "learning_rate": 4.2673147945676305e-06, - "loss": 1.3854, - "step": 778 - }, - { - "epoch": 0.7582738481505515, - "grad_norm": 0.427734375, - "learning_rate": 4.265505005331863e-06, - "loss": 1.3854, - "step": 779 - }, - { - "epoch": 0.7592472420506164, - "grad_norm": 0.4140625, - "learning_rate": 4.263693368491524e-06, - "loss": 1.3353, - "step": 780 - }, - { - "epoch": 0.7602206359506813, - "grad_norm": 0.470703125, - "learning_rate": 4.261879885942497e-06, - "loss": 1.3398, - "step": 781 - }, - { - "epoch": 0.7611940298507462, - "grad_norm": 0.47265625, - "learning_rate": 4.260064559582596e-06, - "loss": 1.3785, - "step": 782 - }, - { - "epoch": 0.7621674237508111, - "grad_norm": 0.435546875, - "learning_rate": 4.258247391311562e-06, - "loss": 1.3685, - "step": 783 - }, - { - "epoch": 0.763140817650876, - "grad_norm": 0.431640625, - "learning_rate": 4.256428383031065e-06, - "loss": 1.3556, - "step": 784 - }, - { - "epoch": 0.764114211550941, - "grad_norm": 0.427734375, - "learning_rate": 4.254607536644702e-06, - "loss": 1.3519, - "step": 785 - }, - { - "epoch": 0.7650876054510058, - "grad_norm": 0.4765625, - "learning_rate": 4.252784854057993e-06, - "loss": 1.3622, - "step": 786 - }, - { - "epoch": 0.7660609993510707, - "grad_norm": 0.447265625, - "learning_rate": 4.2509603371783776e-06, - "loss": 1.3613, - "step": 787 - }, - { - "epoch": 0.7670343932511356, - "grad_norm": 0.443359375, - "learning_rate": 4.249133987915217e-06, - "loss": 1.3614, - "step": 788 - }, - { - "epoch": 0.7680077871512005, - "grad_norm": 0.4453125, - "learning_rate": 4.247305808179789e-06, - "loss": 1.3472, - "step": 789 - }, - { - "epoch": 0.7689811810512654, - "grad_norm": 0.451171875, - "learning_rate": 4.245475799885288e-06, - "loss": 1.342, - "step": 790 - }, - { - "epoch": 0.7699545749513304, - "grad_norm": 0.44921875, - "learning_rate": 4.243643964946821e-06, - "loss": 1.3809, - "step": 791 - }, - { - "epoch": 0.7709279688513953, - "grad_norm": 0.443359375, - "learning_rate": 4.241810305281407e-06, - "loss": 1.3536, - "step": 792 - }, - { - "epoch": 0.77190136275146, - "grad_norm": 0.447265625, - "learning_rate": 4.239974822807976e-06, - "loss": 1.3412, - "step": 793 - }, - { - "epoch": 0.7728747566515249, - "grad_norm": 0.458984375, - "learning_rate": 4.238137519447362e-06, - "loss": 1.3441, - "step": 794 - }, - { - "epoch": 0.7738481505515898, - "grad_norm": 0.451171875, - "learning_rate": 4.236298397122307e-06, - "loss": 1.3554, - "step": 795 - }, - { - "epoch": 0.7748215444516547, - "grad_norm": 0.44921875, - "learning_rate": 4.234457457757457e-06, - "loss": 1.3543, - "step": 796 - }, - { - "epoch": 0.7757949383517196, - "grad_norm": 0.44921875, - "learning_rate": 4.232614703279359e-06, - "loss": 1.3508, - "step": 797 - }, - { - "epoch": 0.7767683322517845, - "grad_norm": 0.439453125, - "learning_rate": 4.230770135616459e-06, - "loss": 1.3559, - "step": 798 - }, - { - "epoch": 0.7777417261518494, - "grad_norm": 0.4453125, - "learning_rate": 4.2289237566991e-06, - "loss": 1.3658, - "step": 799 - }, - { - "epoch": 0.7787151200519143, - "grad_norm": 0.431640625, - "learning_rate": 4.227075568459522e-06, - "loss": 1.367, - "step": 800 - }, - { - "epoch": 0.7796885139519792, - "grad_norm": 0.43359375, - "learning_rate": 4.225225572831858e-06, - "loss": 1.3504, - "step": 801 - }, - { - "epoch": 0.7806619078520441, - "grad_norm": 0.439453125, - "learning_rate": 4.223373771752131e-06, - "loss": 1.3512, - "step": 802 - }, - { - "epoch": 0.781635301752109, - "grad_norm": 0.44921875, - "learning_rate": 4.221520167158257e-06, - "loss": 1.3597, - "step": 803 - }, - { - "epoch": 0.782608695652174, - "grad_norm": 0.435546875, - "learning_rate": 4.219664760990035e-06, - "loss": 1.3292, - "step": 804 - }, - { - "epoch": 0.7835820895522388, - "grad_norm": 0.41796875, - "learning_rate": 4.217807555189151e-06, - "loss": 1.3392, - "step": 805 - }, - { - "epoch": 0.7845554834523037, - "grad_norm": 0.41796875, - "learning_rate": 4.215948551699175e-06, - "loss": 1.3377, - "step": 806 - }, - { - "epoch": 0.7855288773523685, - "grad_norm": 0.4296875, - "learning_rate": 4.21408775246556e-06, - "loss": 1.3513, - "step": 807 - }, - { - "epoch": 0.7865022712524334, - "grad_norm": 0.453125, - "learning_rate": 4.212225159435633e-06, - "loss": 1.3877, - "step": 808 - }, - { - "epoch": 0.7874756651524983, - "grad_norm": 0.439453125, - "learning_rate": 4.210360774558604e-06, - "loss": 1.3519, - "step": 809 - }, - { - "epoch": 0.7884490590525632, - "grad_norm": 0.431640625, - "learning_rate": 4.208494599785553e-06, - "loss": 1.3441, - "step": 810 - }, - { - "epoch": 0.7894224529526281, - "grad_norm": 0.421875, - "learning_rate": 4.206626637069438e-06, - "loss": 1.3656, - "step": 811 - }, - { - "epoch": 0.790395846852693, - "grad_norm": 0.4140625, - "learning_rate": 4.204756888365085e-06, - "loss": 1.3504, - "step": 812 - }, - { - "epoch": 0.7913692407527579, - "grad_norm": 0.431640625, - "learning_rate": 4.202885355629189e-06, - "loss": 1.3429, - "step": 813 - }, - { - "epoch": 0.7923426346528228, - "grad_norm": 0.435546875, - "learning_rate": 4.201012040820314e-06, - "loss": 1.3432, - "step": 814 - }, - { - "epoch": 0.7933160285528877, - "grad_norm": 0.4375, - "learning_rate": 4.199136945898887e-06, - "loss": 1.3612, - "step": 815 - }, - { - "epoch": 0.7942894224529526, - "grad_norm": 0.431640625, - "learning_rate": 4.197260072827199e-06, - "loss": 1.3556, - "step": 816 - }, - { - "epoch": 0.7952628163530175, - "grad_norm": 0.4296875, - "learning_rate": 4.1953814235694015e-06, - "loss": 1.3423, - "step": 817 - }, - { - "epoch": 0.7962362102530824, - "grad_norm": 0.41796875, - "learning_rate": 4.193501000091504e-06, - "loss": 1.37, - "step": 818 - }, - { - "epoch": 0.7972096041531473, - "grad_norm": 0.421875, - "learning_rate": 4.1916188043613754e-06, - "loss": 1.3564, - "step": 819 - }, - { - "epoch": 0.7981829980532122, - "grad_norm": 0.431640625, - "learning_rate": 4.189734838348736e-06, - "loss": 1.3681, - "step": 820 - }, - { - "epoch": 0.7991563919532771, - "grad_norm": 0.421875, - "learning_rate": 4.187849104025159e-06, - "loss": 1.38, - "step": 821 - }, - { - "epoch": 0.8001297858533419, - "grad_norm": 0.421875, - "learning_rate": 4.18596160336407e-06, - "loss": 1.3596, - "step": 822 - }, - { - "epoch": 0.8011031797534068, - "grad_norm": 0.4140625, - "learning_rate": 4.184072338340743e-06, - "loss": 1.3484, - "step": 823 - }, - { - "epoch": 0.8020765736534717, - "grad_norm": 0.4296875, - "learning_rate": 4.1821813109322975e-06, - "loss": 1.3658, - "step": 824 - }, - { - "epoch": 0.8030499675535366, - "grad_norm": 0.4140625, - "learning_rate": 4.180288523117697e-06, - "loss": 1.3523, - "step": 825 - }, - { - "epoch": 0.8040233614536015, - "grad_norm": 0.416015625, - "learning_rate": 4.178393976877749e-06, - "loss": 1.3549, - "step": 826 - }, - { - "epoch": 0.8049967553536664, - "grad_norm": 0.40625, - "learning_rate": 4.176497674195098e-06, - "loss": 1.3393, - "step": 827 - }, - { - "epoch": 0.8059701492537313, - "grad_norm": 0.4296875, - "learning_rate": 4.17459961705423e-06, - "loss": 1.351, - "step": 828 - }, - { - "epoch": 0.8069435431537962, - "grad_norm": 0.439453125, - "learning_rate": 4.172699807441465e-06, - "loss": 1.3442, - "step": 829 - }, - { - "epoch": 0.8079169370538611, - "grad_norm": 0.42578125, - "learning_rate": 4.1707982473449584e-06, - "loss": 1.3427, - "step": 830 - }, - { - "epoch": 0.808890330953926, - "grad_norm": 0.416015625, - "learning_rate": 4.168894938754696e-06, - "loss": 1.3437, - "step": 831 - }, - { - "epoch": 0.8098637248539909, - "grad_norm": 0.419921875, - "learning_rate": 4.166989883662495e-06, - "loss": 1.3536, - "step": 832 - }, - { - "epoch": 0.8108371187540558, - "grad_norm": 0.408203125, - "learning_rate": 4.165083084061997e-06, - "loss": 1.3342, - "step": 833 - }, - { - "epoch": 0.8118105126541207, - "grad_norm": 0.45703125, - "learning_rate": 4.1631745419486744e-06, - "loss": 1.3379, - "step": 834 - }, - { - "epoch": 0.8127839065541856, - "grad_norm": 0.43359375, - "learning_rate": 4.16126425931982e-06, - "loss": 1.3536, - "step": 835 - }, - { - "epoch": 0.8137573004542504, - "grad_norm": 0.4453125, - "learning_rate": 4.1593522381745465e-06, - "loss": 1.3368, - "step": 836 - }, - { - "epoch": 0.8147306943543153, - "grad_norm": 0.4375, - "learning_rate": 4.15743848051379e-06, - "loss": 1.3444, - "step": 837 - }, - { - "epoch": 0.8157040882543802, - "grad_norm": 0.419921875, - "learning_rate": 4.155522988340301e-06, - "loss": 1.3621, - "step": 838 - }, - { - "epoch": 0.8166774821544451, - "grad_norm": 0.42578125, - "learning_rate": 4.153605763658645e-06, - "loss": 1.3682, - "step": 839 - }, - { - "epoch": 0.81765087605451, - "grad_norm": 0.42578125, - "learning_rate": 4.151686808475204e-06, - "loss": 1.3607, - "step": 840 - }, - { - "epoch": 0.8186242699545749, - "grad_norm": 0.427734375, - "learning_rate": 4.149766124798166e-06, - "loss": 1.3299, - "step": 841 - }, - { - "epoch": 0.8195976638546398, - "grad_norm": 0.431640625, - "learning_rate": 4.1478437146375315e-06, - "loss": 1.3649, - "step": 842 - }, - { - "epoch": 0.8205710577547047, - "grad_norm": 0.447265625, - "learning_rate": 4.145919580005107e-06, - "loss": 1.372, - "step": 843 - }, - { - "epoch": 0.8215444516547696, - "grad_norm": 0.423828125, - "learning_rate": 4.143993722914502e-06, - "loss": 1.3601, - "step": 844 - }, - { - "epoch": 0.8225178455548345, - "grad_norm": 0.41796875, - "learning_rate": 4.142066145381131e-06, - "loss": 1.3618, - "step": 845 - }, - { - "epoch": 0.8234912394548994, - "grad_norm": 0.4296875, - "learning_rate": 4.1401368494222075e-06, - "loss": 1.3613, - "step": 846 - }, - { - "epoch": 0.8244646333549643, - "grad_norm": 0.423828125, - "learning_rate": 4.138205837056743e-06, - "loss": 1.3517, - "step": 847 - }, - { - "epoch": 0.8254380272550292, - "grad_norm": 0.42578125, - "learning_rate": 4.136273110305547e-06, - "loss": 1.3322, - "step": 848 - }, - { - "epoch": 0.8264114211550941, - "grad_norm": 0.4375, - "learning_rate": 4.134338671191221e-06, - "loss": 1.359, - "step": 849 - }, - { - "epoch": 0.827384815055159, - "grad_norm": 0.412109375, - "learning_rate": 4.132402521738159e-06, - "loss": 1.3477, - "step": 850 - }, - { - "epoch": 0.8283582089552238, - "grad_norm": 0.4296875, - "learning_rate": 4.130464663972548e-06, - "loss": 1.3535, - "step": 851 - }, - { - "epoch": 0.8293316028552887, - "grad_norm": 0.41796875, - "learning_rate": 4.128525099922357e-06, - "loss": 1.3613, - "step": 852 - }, - { - "epoch": 0.8303049967553536, - "grad_norm": 0.435546875, - "learning_rate": 4.1265838316173455e-06, - "loss": 1.3557, - "step": 853 - }, - { - "epoch": 0.8312783906554185, - "grad_norm": 0.43359375, - "learning_rate": 4.124640861089055e-06, - "loss": 1.336, - "step": 854 - }, - { - "epoch": 0.8322517845554834, - "grad_norm": 0.421875, - "learning_rate": 4.122696190370805e-06, - "loss": 1.3498, - "step": 855 - }, - { - "epoch": 0.8332251784555483, - "grad_norm": 0.443359375, - "learning_rate": 4.1207498214977e-06, - "loss": 1.3572, - "step": 856 - }, - { - "epoch": 0.8341985723556132, - "grad_norm": 0.427734375, - "learning_rate": 4.11880175650662e-06, - "loss": 1.3538, - "step": 857 - }, - { - "epoch": 0.8351719662556781, - "grad_norm": 0.427734375, - "learning_rate": 4.1168519974362175e-06, - "loss": 1.3554, - "step": 858 - }, - { - "epoch": 0.836145360155743, - "grad_norm": 0.43359375, - "learning_rate": 4.1149005463269186e-06, - "loss": 1.324, - "step": 859 - }, - { - "epoch": 0.8371187540558079, - "grad_norm": 0.43359375, - "learning_rate": 4.112947405220921e-06, - "loss": 1.3603, - "step": 860 - }, - { - "epoch": 0.8380921479558728, - "grad_norm": 0.421875, - "learning_rate": 4.110992576162193e-06, - "loss": 1.3518, - "step": 861 - }, - { - "epoch": 0.8390655418559377, - "grad_norm": 0.41796875, - "learning_rate": 4.1090360611964644e-06, - "loss": 1.3701, - "step": 862 - }, - { - "epoch": 0.8400389357560026, - "grad_norm": 0.419921875, - "learning_rate": 4.107077862371235e-06, - "loss": 1.3422, - "step": 863 - }, - { - "epoch": 0.8410123296560675, - "grad_norm": 0.419921875, - "learning_rate": 4.1051179817357605e-06, - "loss": 1.3492, - "step": 864 - }, - { - "epoch": 0.8419857235561323, - "grad_norm": 0.427734375, - "learning_rate": 4.103156421341062e-06, - "loss": 1.3381, - "step": 865 - }, - { - "epoch": 0.8429591174561972, - "grad_norm": 0.4140625, - "learning_rate": 4.101193183239916e-06, - "loss": 1.3399, - "step": 866 - }, - { - "epoch": 0.8439325113562621, - "grad_norm": 0.4140625, - "learning_rate": 4.0992282694868555e-06, - "loss": 1.3459, - "step": 867 - }, - { - "epoch": 0.844905905256327, - "grad_norm": 0.41015625, - "learning_rate": 4.097261682138166e-06, - "loss": 1.331, - "step": 868 - }, - { - "epoch": 0.8458792991563919, - "grad_norm": 0.416015625, - "learning_rate": 4.095293423251886e-06, - "loss": 1.3371, - "step": 869 - }, - { - "epoch": 0.8468526930564568, - "grad_norm": 0.4140625, - "learning_rate": 4.0933234948878025e-06, - "loss": 1.3529, - "step": 870 - }, - { - "epoch": 0.8478260869565217, - "grad_norm": 0.419921875, - "learning_rate": 4.091351899107448e-06, - "loss": 1.3533, - "step": 871 - }, - { - "epoch": 0.8487994808565866, - "grad_norm": 0.416015625, - "learning_rate": 4.089378637974103e-06, - "loss": 1.3438, - "step": 872 - }, - { - "epoch": 0.8497728747566515, - "grad_norm": 0.4140625, - "learning_rate": 4.087403713552789e-06, - "loss": 1.3272, - "step": 873 - }, - { - "epoch": 0.8507462686567164, - "grad_norm": 0.4296875, - "learning_rate": 4.085427127910268e-06, - "loss": 1.3609, - "step": 874 - }, - { - "epoch": 0.8517196625567813, - "grad_norm": 0.42578125, - "learning_rate": 4.083448883115041e-06, - "loss": 1.371, - "step": 875 - }, - { - "epoch": 0.8526930564568462, - "grad_norm": 0.419921875, - "learning_rate": 4.081468981237345e-06, - "loss": 1.3452, - "step": 876 - }, - { - "epoch": 0.8536664503569111, - "grad_norm": 0.439453125, - "learning_rate": 4.079487424349152e-06, - "loss": 1.34, - "step": 877 - }, - { - "epoch": 0.854639844256976, - "grad_norm": 0.419921875, - "learning_rate": 4.077504214524166e-06, - "loss": 1.3307, - "step": 878 - }, - { - "epoch": 0.8556132381570409, - "grad_norm": 0.44921875, - "learning_rate": 4.075519353837818e-06, - "loss": 1.3515, - "step": 879 - }, - { - "epoch": 0.8565866320571057, - "grad_norm": 0.419921875, - "learning_rate": 4.0735328443672694e-06, - "loss": 1.3461, - "step": 880 - }, - { - "epoch": 0.8575600259571706, - "grad_norm": 0.43359375, - "learning_rate": 4.071544688191407e-06, - "loss": 1.3386, - "step": 881 - }, - { - "epoch": 0.8585334198572355, - "grad_norm": 0.455078125, - "learning_rate": 4.06955488739084e-06, - "loss": 1.3717, - "step": 882 - }, - { - "epoch": 0.8595068137573004, - "grad_norm": 0.4609375, - "learning_rate": 4.0675634440478995e-06, - "loss": 1.3352, - "step": 883 - }, - { - "epoch": 0.8604802076573653, - "grad_norm": 0.42578125, - "learning_rate": 4.065570360246634e-06, - "loss": 1.3568, - "step": 884 - }, - { - "epoch": 0.8614536015574302, - "grad_norm": 0.416015625, - "learning_rate": 4.0635756380728096e-06, - "loss": 1.3352, - "step": 885 - }, - { - "epoch": 0.8624269954574951, - "grad_norm": 0.42578125, - "learning_rate": 4.061579279613909e-06, - "loss": 1.3333, - "step": 886 - }, - { - "epoch": 0.86340038935756, - "grad_norm": 0.431640625, - "learning_rate": 4.0595812869591235e-06, - "loss": 1.3464, - "step": 887 - }, - { - "epoch": 0.8643737832576249, - "grad_norm": 0.447265625, - "learning_rate": 4.057581662199357e-06, - "loss": 1.3583, - "step": 888 - }, - { - "epoch": 0.8653471771576898, - "grad_norm": 0.44140625, - "learning_rate": 4.055580407427222e-06, - "loss": 1.363, - "step": 889 - }, - { - "epoch": 0.8663205710577547, - "grad_norm": 0.439453125, - "learning_rate": 4.053577524737034e-06, - "loss": 1.3543, - "step": 890 - }, - { - "epoch": 0.8672939649578196, - "grad_norm": 0.419921875, - "learning_rate": 4.051573016224813e-06, - "loss": 1.3434, - "step": 891 - }, - { - "epoch": 0.8682673588578845, - "grad_norm": 0.42578125, - "learning_rate": 4.0495668839882846e-06, - "loss": 1.3433, - "step": 892 - }, - { - "epoch": 0.8692407527579494, - "grad_norm": 0.435546875, - "learning_rate": 4.047559130126868e-06, - "loss": 1.3248, - "step": 893 - }, - { - "epoch": 0.8702141466580143, - "grad_norm": 0.447265625, - "learning_rate": 4.045549756741682e-06, - "loss": 1.3511, - "step": 894 - }, - { - "epoch": 0.8711875405580791, - "grad_norm": 0.44140625, - "learning_rate": 4.043538765935539e-06, - "loss": 1.3574, - "step": 895 - }, - { - "epoch": 0.872160934458144, - "grad_norm": 0.42578125, - "learning_rate": 4.0415261598129465e-06, - "loss": 1.3671, - "step": 896 - }, - { - "epoch": 0.8731343283582089, - "grad_norm": 0.423828125, - "learning_rate": 4.0395119404801e-06, - "loss": 1.3383, - "step": 897 - }, - { - "epoch": 0.8741077222582738, - "grad_norm": 0.42578125, - "learning_rate": 4.037496110044885e-06, - "loss": 1.3695, - "step": 898 - }, - { - "epoch": 0.8750811161583387, - "grad_norm": 0.44140625, - "learning_rate": 4.03547867061687e-06, - "loss": 1.3367, - "step": 899 - }, - { - "epoch": 0.8760545100584036, - "grad_norm": 0.419921875, - "learning_rate": 4.0334596243073125e-06, - "loss": 1.3357, - "step": 900 - }, - { - "epoch": 0.8770279039584685, - "grad_norm": 0.4140625, - "learning_rate": 4.031438973229147e-06, - "loss": 1.3463, - "step": 901 - }, - { - "epoch": 0.8780012978585334, - "grad_norm": 0.419921875, - "learning_rate": 4.029416719496988e-06, - "loss": 1.3597, - "step": 902 - }, - { - "epoch": 0.8789746917585983, - "grad_norm": 0.42578125, - "learning_rate": 4.027392865227131e-06, - "loss": 1.3586, - "step": 903 - }, - { - "epoch": 0.8799480856586632, - "grad_norm": 0.412109375, - "learning_rate": 4.025367412537539e-06, - "loss": 1.3201, - "step": 904 - }, - { - "epoch": 0.8809214795587281, - "grad_norm": 0.416015625, - "learning_rate": 4.023340363547858e-06, - "loss": 1.3621, - "step": 905 - }, - { - "epoch": 0.881894873458793, - "grad_norm": 0.419921875, - "learning_rate": 4.021311720379394e-06, - "loss": 1.372, - "step": 906 - }, - { - "epoch": 0.8828682673588579, - "grad_norm": 0.421875, - "learning_rate": 4.0192814851551284e-06, - "loss": 1.3457, - "step": 907 - }, - { - "epoch": 0.8838416612589228, - "grad_norm": 0.419921875, - "learning_rate": 4.017249659999707e-06, - "loss": 1.3675, - "step": 908 - }, - { - "epoch": 0.8848150551589876, - "grad_norm": 0.421875, - "learning_rate": 4.015216247039438e-06, - "loss": 1.3445, - "step": 909 - }, - { - "epoch": 0.8857884490590525, - "grad_norm": 0.4140625, - "learning_rate": 4.013181248402293e-06, - "loss": 1.3588, - "step": 910 - }, - { - "epoch": 0.8867618429591174, - "grad_norm": 0.41015625, - "learning_rate": 4.0111446662179e-06, - "loss": 1.3382, - "step": 911 - }, - { - "epoch": 0.8877352368591823, - "grad_norm": 0.41796875, - "learning_rate": 4.009106502617548e-06, - "loss": 1.3339, - "step": 912 - }, - { - "epoch": 0.8887086307592472, - "grad_norm": 0.4140625, - "learning_rate": 4.0070667597341806e-06, - "loss": 1.3351, - "step": 913 - }, - { - "epoch": 0.8896820246593121, - "grad_norm": 0.423828125, - "learning_rate": 4.005025439702391e-06, - "loss": 1.3449, - "step": 914 - }, - { - "epoch": 0.890655418559377, - "grad_norm": 0.435546875, - "learning_rate": 4.002982544658426e-06, - "loss": 1.3419, - "step": 915 - }, - { - "epoch": 0.8916288124594419, - "grad_norm": 0.423828125, - "learning_rate": 4.000938076740177e-06, - "loss": 1.3651, - "step": 916 - }, - { - "epoch": 0.8926022063595068, - "grad_norm": 0.4140625, - "learning_rate": 3.998892038087187e-06, - "loss": 1.3451, - "step": 917 - }, - { - "epoch": 0.8935756002595717, - "grad_norm": 0.4140625, - "learning_rate": 3.996844430840637e-06, - "loss": 1.3337, - "step": 918 - }, - { - "epoch": 0.8945489941596366, - "grad_norm": 0.41796875, - "learning_rate": 3.994795257143355e-06, - "loss": 1.3516, - "step": 919 - }, - { - "epoch": 0.8955223880597015, - "grad_norm": 0.416015625, - "learning_rate": 3.992744519139803e-06, - "loss": 1.3304, - "step": 920 - }, - { - "epoch": 0.8964957819597664, - "grad_norm": 0.419921875, - "learning_rate": 3.990692218976082e-06, - "loss": 1.3498, - "step": 921 - }, - { - "epoch": 0.8974691758598313, - "grad_norm": 0.416015625, - "learning_rate": 3.988638358799931e-06, - "loss": 1.3217, - "step": 922 - }, - { - "epoch": 0.8984425697598962, - "grad_norm": 0.421875, - "learning_rate": 3.986582940760717e-06, - "loss": 1.3579, - "step": 923 - }, - { - "epoch": 0.899415963659961, - "grad_norm": 0.42578125, - "learning_rate": 3.98452596700944e-06, - "loss": 1.3152, - "step": 924 - }, - { - "epoch": 0.9003893575600259, - "grad_norm": 0.431640625, - "learning_rate": 3.982467439698725e-06, - "loss": 1.3425, - "step": 925 - }, - { - "epoch": 0.9013627514600908, - "grad_norm": 0.412109375, - "learning_rate": 3.980407360982828e-06, - "loss": 1.3399, - "step": 926 - }, - { - "epoch": 0.9023361453601557, - "grad_norm": 0.41015625, - "learning_rate": 3.978345733017624e-06, - "loss": 1.3438, - "step": 927 - }, - { - "epoch": 0.9033095392602206, - "grad_norm": 0.421875, - "learning_rate": 3.976282557960611e-06, - "loss": 1.3449, - "step": 928 - }, - { - "epoch": 0.9042829331602855, - "grad_norm": 0.42578125, - "learning_rate": 3.974217837970906e-06, - "loss": 1.3385, - "step": 929 - }, - { - "epoch": 0.9052563270603504, - "grad_norm": 0.435546875, - "learning_rate": 3.9721515752092434e-06, - "loss": 1.3309, - "step": 930 - }, - { - "epoch": 0.9062297209604153, - "grad_norm": 0.421875, - "learning_rate": 3.970083771837969e-06, - "loss": 1.3521, - "step": 931 - }, - { - "epoch": 0.9072031148604802, - "grad_norm": 0.408203125, - "learning_rate": 3.968014430021046e-06, - "loss": 1.3293, - "step": 932 - }, - { - "epoch": 0.9081765087605451, - "grad_norm": 0.419921875, - "learning_rate": 3.965943551924043e-06, - "loss": 1.3597, - "step": 933 - }, - { - "epoch": 0.90914990266061, - "grad_norm": 0.43359375, - "learning_rate": 3.963871139714139e-06, - "loss": 1.3457, - "step": 934 - }, - { - "epoch": 0.9101232965606749, - "grad_norm": 0.431640625, - "learning_rate": 3.961797195560118e-06, - "loss": 1.3425, - "step": 935 - }, - { - "epoch": 0.9110966904607398, - "grad_norm": 0.41796875, - "learning_rate": 3.959721721632366e-06, - "loss": 1.3502, - "step": 936 - }, - { - "epoch": 0.9120700843608047, - "grad_norm": 0.3984375, - "learning_rate": 3.95764472010287e-06, - "loss": 1.3603, - "step": 937 - }, - { - "epoch": 0.9130434782608695, - "grad_norm": 0.41796875, - "learning_rate": 3.955566193145218e-06, - "loss": 1.3416, - "step": 938 - }, - { - "epoch": 0.9140168721609344, - "grad_norm": 0.416015625, - "learning_rate": 3.953486142934591e-06, - "loss": 1.3304, - "step": 939 - }, - { - "epoch": 0.9149902660609993, - "grad_norm": 0.421875, - "learning_rate": 3.9514045716477665e-06, - "loss": 1.3487, - "step": 940 - }, - { - "epoch": 0.9159636599610642, - "grad_norm": 0.4140625, - "learning_rate": 3.949321481463114e-06, - "loss": 1.3587, - "step": 941 - }, - { - "epoch": 0.9169370538611291, - "grad_norm": 0.408203125, - "learning_rate": 3.947236874560591e-06, - "loss": 1.3468, - "step": 942 - }, - { - "epoch": 0.917910447761194, - "grad_norm": 0.40625, - "learning_rate": 3.945150753121742e-06, - "loss": 1.3397, - "step": 943 - }, - { - "epoch": 0.9188838416612589, - "grad_norm": 0.412109375, - "learning_rate": 3.9430631193296976e-06, - "loss": 1.3269, - "step": 944 - }, - { - "epoch": 0.9198572355613238, - "grad_norm": 0.412109375, - "learning_rate": 3.940973975369171e-06, - "loss": 1.3579, - "step": 945 - }, - { - "epoch": 0.9208306294613887, - "grad_norm": 0.421875, - "learning_rate": 3.938883323426456e-06, - "loss": 1.3617, - "step": 946 - }, - { - "epoch": 0.9218040233614536, - "grad_norm": 0.431640625, - "learning_rate": 3.936791165689424e-06, - "loss": 1.3583, - "step": 947 - }, - { - "epoch": 0.9227774172615185, - "grad_norm": 0.4375, - "learning_rate": 3.9346975043475216e-06, - "loss": 1.3346, - "step": 948 - }, - { - "epoch": 0.9237508111615834, - "grad_norm": 0.42578125, - "learning_rate": 3.9326023415917704e-06, - "loss": 1.334, - "step": 949 - }, - { - "epoch": 0.9247242050616483, - "grad_norm": 0.419921875, - "learning_rate": 3.930505679614762e-06, - "loss": 1.3414, - "step": 950 - }, - { - "epoch": 0.9256975989617132, - "grad_norm": 0.408203125, - "learning_rate": 3.928407520610658e-06, - "loss": 1.3375, - "step": 951 - }, - { - "epoch": 0.9266709928617781, - "grad_norm": 0.439453125, - "learning_rate": 3.926307866775186e-06, - "loss": 1.3527, - "step": 952 - }, - { - "epoch": 0.9276443867618429, - "grad_norm": 0.439453125, - "learning_rate": 3.924206720305638e-06, - "loss": 1.3498, - "step": 953 - }, - { - "epoch": 0.9286177806619078, - "grad_norm": 0.423828125, - "learning_rate": 3.922104083400866e-06, - "loss": 1.3638, - "step": 954 - }, - { - "epoch": 0.9295911745619727, - "grad_norm": 0.400390625, - "learning_rate": 3.919999958261287e-06, - "loss": 1.334, - "step": 955 - }, - { - "epoch": 0.9305645684620376, - "grad_norm": 0.44921875, - "learning_rate": 3.91789434708887e-06, - "loss": 1.3619, - "step": 956 - }, - { - "epoch": 0.9315379623621025, - "grad_norm": 0.45703125, - "learning_rate": 3.915787252087143e-06, - "loss": 1.3265, - "step": 957 - }, - { - "epoch": 0.9325113562621674, - "grad_norm": 0.431640625, - "learning_rate": 3.913678675461184e-06, - "loss": 1.345, - "step": 958 - }, - { - "epoch": 0.9334847501622323, - "grad_norm": 0.3984375, - "learning_rate": 3.9115686194176225e-06, - "loss": 1.3243, - "step": 959 - }, - { - "epoch": 0.9344581440622972, - "grad_norm": 0.408203125, - "learning_rate": 3.909457086164638e-06, - "loss": 1.33, - "step": 960 - }, - { - "epoch": 0.9354315379623621, - "grad_norm": 0.435546875, - "learning_rate": 3.907344077911952e-06, - "loss": 1.3443, - "step": 961 - }, - { - "epoch": 0.936404931862427, - "grad_norm": 0.439453125, - "learning_rate": 3.905229596870833e-06, - "loss": 1.3452, - "step": 962 - }, - { - "epoch": 0.9373783257624919, - "grad_norm": 0.4140625, - "learning_rate": 3.9031136452540915e-06, - "loss": 1.3114, - "step": 963 - }, - { - "epoch": 0.9383517196625568, - "grad_norm": 0.4140625, - "learning_rate": 3.900996225276073e-06, - "loss": 1.3469, - "step": 964 - }, - { - "epoch": 0.9393251135626217, - "grad_norm": 0.412109375, - "learning_rate": 3.8988773391526626e-06, - "loss": 1.3281, - "step": 965 - }, - { - "epoch": 0.9402985074626866, - "grad_norm": 0.4375, - "learning_rate": 3.896756989101278e-06, - "loss": 1.3396, - "step": 966 - }, - { - "epoch": 0.9412719013627514, - "grad_norm": 0.44921875, - "learning_rate": 3.894635177340871e-06, - "loss": 1.3279, - "step": 967 - }, - { - "epoch": 0.9422452952628163, - "grad_norm": 0.439453125, - "learning_rate": 3.892511906091921e-06, - "loss": 1.3368, - "step": 968 - }, - { - "epoch": 0.9432186891628812, - "grad_norm": 0.431640625, - "learning_rate": 3.890387177576437e-06, - "loss": 1.3565, - "step": 969 - }, - { - "epoch": 0.9441920830629461, - "grad_norm": 0.466796875, - "learning_rate": 3.88826099401795e-06, - "loss": 1.3467, - "step": 970 - }, - { - "epoch": 0.945165476963011, - "grad_norm": 0.427734375, - "learning_rate": 3.886133357641516e-06, - "loss": 1.351, - "step": 971 - }, - { - "epoch": 0.9461388708630759, - "grad_norm": 0.4296875, - "learning_rate": 3.884004270673711e-06, - "loss": 1.312, - "step": 972 - }, - { - "epoch": 0.9471122647631408, - "grad_norm": 0.4375, - "learning_rate": 3.88187373534263e-06, - "loss": 1.334, - "step": 973 - }, - { - "epoch": 0.9480856586632057, - "grad_norm": 0.47265625, - "learning_rate": 3.879741753877881e-06, - "loss": 1.3522, - "step": 974 - }, - { - "epoch": 0.9490590525632706, - "grad_norm": 0.44140625, - "learning_rate": 3.877608328510587e-06, - "loss": 1.3301, - "step": 975 - }, - { - "epoch": 0.9500324464633355, - "grad_norm": 0.419921875, - "learning_rate": 3.875473461473383e-06, - "loss": 1.3216, - "step": 976 - }, - { - "epoch": 0.9510058403634004, - "grad_norm": 0.4375, - "learning_rate": 3.873337155000409e-06, - "loss": 1.3288, - "step": 977 - }, - { - "epoch": 0.9519792342634653, - "grad_norm": 0.44140625, - "learning_rate": 3.871199411327318e-06, - "loss": 1.3373, - "step": 978 - }, - { - "epoch": 0.9529526281635302, - "grad_norm": 0.455078125, - "learning_rate": 3.86906023269126e-06, - "loss": 1.3399, - "step": 979 - }, - { - "epoch": 0.9539260220635951, - "grad_norm": 0.44140625, - "learning_rate": 3.866919621330892e-06, - "loss": 1.329, - "step": 980 - }, - { - "epoch": 0.95489941596366, - "grad_norm": 0.466796875, - "learning_rate": 3.864777579486366e-06, - "loss": 1.3293, - "step": 981 - }, - { - "epoch": 0.9558728098637248, - "grad_norm": 0.443359375, - "learning_rate": 3.8626341093993346e-06, - "loss": 1.3483, - "step": 982 - }, - { - "epoch": 0.9568462037637897, - "grad_norm": 0.435546875, - "learning_rate": 3.860489213312943e-06, - "loss": 1.3413, - "step": 983 - }, - { - "epoch": 0.9578195976638546, - "grad_norm": 0.458984375, - "learning_rate": 3.85834289347183e-06, - "loss": 1.3715, - "step": 984 - }, - { - "epoch": 0.9587929915639195, - "grad_norm": 0.4453125, - "learning_rate": 3.856195152122123e-06, - "loss": 1.3392, - "step": 985 - }, - { - "epoch": 0.9597663854639844, - "grad_norm": 0.423828125, - "learning_rate": 3.854045991511438e-06, - "loss": 1.3204, - "step": 986 - }, - { - "epoch": 0.9607397793640493, - "grad_norm": 0.419921875, - "learning_rate": 3.851895413888875e-06, - "loss": 1.3382, - "step": 987 - }, - { - "epoch": 0.9617131732641142, - "grad_norm": 0.423828125, - "learning_rate": 3.849743421505019e-06, - "loss": 1.347, - "step": 988 - }, - { - "epoch": 0.9626865671641791, - "grad_norm": 0.43359375, - "learning_rate": 3.847590016611934e-06, - "loss": 1.3517, - "step": 989 - }, - { - "epoch": 0.963659961064244, - "grad_norm": 0.4296875, - "learning_rate": 3.8454352014631624e-06, - "loss": 1.3339, - "step": 990 - }, - { - "epoch": 0.9646333549643089, - "grad_norm": 0.427734375, - "learning_rate": 3.843278978313724e-06, - "loss": 1.3211, - "step": 991 - }, - { - "epoch": 0.9656067488643738, - "grad_norm": 0.4140625, - "learning_rate": 3.841121349420109e-06, - "loss": 1.3338, - "step": 992 - }, - { - "epoch": 0.9665801427644387, - "grad_norm": 0.439453125, - "learning_rate": 3.83896231704028e-06, - "loss": 1.3416, - "step": 993 - }, - { - "epoch": 0.9675535366645036, - "grad_norm": 0.455078125, - "learning_rate": 3.8368018834336694e-06, - "loss": 1.3275, - "step": 994 - }, - { - "epoch": 0.9685269305645685, - "grad_norm": 0.4453125, - "learning_rate": 3.834640050861177e-06, - "loss": 1.3369, - "step": 995 - }, - { - "epoch": 0.9695003244646333, - "grad_norm": 0.412109375, - "learning_rate": 3.832476821585164e-06, - "loss": 1.333, - "step": 996 - }, - { - "epoch": 0.9704737183646982, - "grad_norm": 0.41015625, - "learning_rate": 3.830312197869453e-06, - "loss": 1.3361, - "step": 997 - }, - { - "epoch": 0.9714471122647631, - "grad_norm": 0.41796875, - "learning_rate": 3.828146181979327e-06, - "loss": 1.3312, - "step": 998 - }, - { - "epoch": 0.972420506164828, - "grad_norm": 0.423828125, - "learning_rate": 3.825978776181528e-06, - "loss": 1.3344, - "step": 999 - }, - { - "epoch": 0.9733939000648929, - "grad_norm": 0.41796875, - "learning_rate": 3.8238099827442494e-06, - "loss": 1.3319, - "step": 1000 - }, - { - "epoch": 0.9743672939649578, - "grad_norm": 0.42578125, - "learning_rate": 3.821639803937138e-06, - "loss": 1.3529, - "step": 1001 - }, - { - "epoch": 0.9753406878650227, - "grad_norm": 0.41796875, - "learning_rate": 3.819468242031291e-06, - "loss": 1.3247, - "step": 1002 - }, - { - "epoch": 0.9763140817650876, - "grad_norm": 0.4140625, - "learning_rate": 3.8172952992992515e-06, - "loss": 1.3332, - "step": 1003 - }, - { - "epoch": 0.9772874756651525, - "grad_norm": 0.4140625, - "learning_rate": 3.815120978015008e-06, - "loss": 1.3292, - "step": 1004 - }, - { - "epoch": 0.9782608695652174, - "grad_norm": 0.408203125, - "learning_rate": 3.8129452804539934e-06, - "loss": 1.3356, - "step": 1005 - }, - { - "epoch": 0.9792342634652823, - "grad_norm": 0.4296875, - "learning_rate": 3.8107682088930797e-06, - "loss": 1.3534, - "step": 1006 - }, - { - "epoch": 0.9802076573653472, - "grad_norm": 0.416015625, - "learning_rate": 3.808589765610575e-06, - "loss": 1.3265, - "step": 1007 - }, - { - "epoch": 0.9811810512654121, - "grad_norm": 0.41015625, - "learning_rate": 3.806409952886226e-06, - "loss": 1.2973, - "step": 1008 - }, - { - "epoch": 0.982154445165477, - "grad_norm": 0.4140625, - "learning_rate": 3.8042287730012117e-06, - "loss": 1.3629, - "step": 1009 - }, - { - "epoch": 0.9831278390655419, - "grad_norm": 0.416015625, - "learning_rate": 3.8020462282381397e-06, - "loss": 1.3478, - "step": 1010 - }, - { - "epoch": 0.9841012329656067, - "grad_norm": 0.41015625, - "learning_rate": 3.799862320881048e-06, - "loss": 1.3204, - "step": 1011 - }, - { - "epoch": 0.9850746268656716, - "grad_norm": 0.419921875, - "learning_rate": 3.7976770532154006e-06, - "loss": 1.3189, - "step": 1012 - }, - { - "epoch": 0.9860480207657365, - "grad_norm": 0.419921875, - "learning_rate": 3.7954904275280844e-06, - "loss": 1.333, - "step": 1013 - }, - { - "epoch": 0.9870214146658014, - "grad_norm": 0.419921875, - "learning_rate": 3.7933024461074075e-06, - "loss": 1.3276, - "step": 1014 - }, - { - "epoch": 0.9879948085658663, - "grad_norm": 0.40625, - "learning_rate": 3.7911131112430966e-06, - "loss": 1.3269, - "step": 1015 - }, - { - "epoch": 0.9889682024659312, - "grad_norm": 0.400390625, - "learning_rate": 3.7889224252262956e-06, - "loss": 1.3074, - "step": 1016 - }, - { - "epoch": 0.9899415963659961, - "grad_norm": 0.412109375, - "learning_rate": 3.786730390349561e-06, - "loss": 1.3545, - "step": 1017 - }, - { - "epoch": 0.990914990266061, - "grad_norm": 0.431640625, - "learning_rate": 3.7845370089068626e-06, - "loss": 1.318, - "step": 1018 - }, - { - "epoch": 0.9918883841661259, - "grad_norm": 0.423828125, - "learning_rate": 3.7823422831935796e-06, - "loss": 1.3276, - "step": 1019 - }, - { - "epoch": 0.9928617780661908, - "grad_norm": 0.408203125, - "learning_rate": 3.780146215506494e-06, - "loss": 1.3135, - "step": 1020 - }, - { - "epoch": 0.9938351719662557, - "grad_norm": 0.408203125, - "learning_rate": 3.777948808143797e-06, - "loss": 1.3509, - "step": 1021 - }, - { - "epoch": 0.9948085658663206, - "grad_norm": 0.43359375, - "learning_rate": 3.7757500634050797e-06, - "loss": 1.3448, - "step": 1022 - }, - { - "epoch": 0.9957819597663855, - "grad_norm": 0.41796875, - "learning_rate": 3.7735499835913324e-06, - "loss": 1.3266, - "step": 1023 - }, - { - "epoch": 0.9967553536664504, - "grad_norm": 0.421875, - "learning_rate": 3.7713485710049445e-06, - "loss": 1.3315, - "step": 1024 - }, - { - "epoch": 0.9977287475665152, - "grad_norm": 0.44921875, - "learning_rate": 3.769145827949697e-06, - "loss": 1.3565, - "step": 1025 - }, - { - "epoch": 0.9987021414665801, - "grad_norm": 0.44140625, - "learning_rate": 3.766941756730766e-06, - "loss": 1.3316, - "step": 1026 - }, - { - "epoch": 0.999675535366645, - "grad_norm": 0.416015625, - "learning_rate": 3.764736359654716e-06, - "loss": 1.3254, - "step": 1027 - }, - { - "epoch": 1.00064892926671, - "grad_norm": 0.412109375, - "learning_rate": 3.7625296390294996e-06, - "loss": 1.3144, - "step": 1028 - }, - { - "epoch": 1.001622323166775, - "grad_norm": 0.41796875, - "learning_rate": 3.7603215971644545e-06, - "loss": 1.3256, - "step": 1029 - }, - { - "epoch": 1.001622323166775, - "eval_loss": 1.3576956987380981, - "eval_runtime": 1522.156, - "eval_samples_per_second": 27.431, - "eval_steps_per_second": 3.429, - "step": 1029 - }, - { - "epoch": 1.0005680204487362, - "grad_norm": 0.421875, - "learning_rate": 3.7581122363703016e-06, - "loss": 1.3287, - "step": 1030 - }, - { - "epoch": 1.0015417697894267, - "grad_norm": 0.423828125, - "learning_rate": 3.7559015589591397e-06, - "loss": 1.3425, - "step": 1031 - }, - { - "epoch": 1.0025155191301172, - "grad_norm": 0.419921875, - "learning_rate": 3.753689567244449e-06, - "loss": 1.3431, - "step": 1032 - }, - { - "epoch": 1.0034892684708079, - "grad_norm": 0.41015625, - "learning_rate": 3.751476263541083e-06, - "loss": 1.3238, - "step": 1033 - }, - { - "epoch": 1.0044630178114984, - "grad_norm": 0.41015625, - "learning_rate": 3.7492616501652674e-06, - "loss": 1.3389, - "step": 1034 - }, - { - "epoch": 1.0054367671521889, - "grad_norm": 0.412109375, - "learning_rate": 3.7470457294346012e-06, - "loss": 1.3284, - "step": 1035 - }, - { - "epoch": 1.0064105164928794, - "grad_norm": 0.416015625, - "learning_rate": 3.744828503668049e-06, - "loss": 1.3488, - "step": 1036 - }, - { - "epoch": 1.00738426583357, - "grad_norm": 0.412109375, - "learning_rate": 3.7426099751859413e-06, - "loss": 1.3519, - "step": 1037 - }, - { - "epoch": 1.0083580151742606, - "grad_norm": 0.421875, - "learning_rate": 3.7403901463099745e-06, - "loss": 1.3461, - "step": 1038 - }, - { - "epoch": 1.009331764514951, - "grad_norm": 0.4140625, - "learning_rate": 3.7381690193632027e-06, - "loss": 1.3512, - "step": 1039 - }, - { - "epoch": 1.0103055138556416, - "grad_norm": 0.421875, - "learning_rate": 3.7359465966700405e-06, - "loss": 1.3379, - "step": 1040 - }, - { - "epoch": 1.011279263196332, - "grad_norm": 0.3984375, - "learning_rate": 3.7337228805562566e-06, - "loss": 1.3377, - "step": 1041 - }, - { - "epoch": 1.0122530125370228, - "grad_norm": 0.423828125, - "learning_rate": 3.7314978733489754e-06, - "loss": 1.3243, - "step": 1042 - }, - { - "epoch": 1.0132267618777133, - "grad_norm": 0.416015625, - "learning_rate": 3.7292715773766715e-06, - "loss": 1.3242, - "step": 1043 - }, - { - "epoch": 1.0142005112184038, - "grad_norm": 0.42578125, - "learning_rate": 3.7270439949691677e-06, - "loss": 1.3469, - "step": 1044 - }, - { - "epoch": 1.0151742605590943, - "grad_norm": 0.419921875, - "learning_rate": 3.7248151284576347e-06, - "loss": 1.3463, - "step": 1045 - }, - { - "epoch": 1.016148009899785, - "grad_norm": 0.421875, - "learning_rate": 3.7225849801745835e-06, - "loss": 1.3616, - "step": 1046 - }, - { - "epoch": 1.0171217592404755, - "grad_norm": 0.41015625, - "learning_rate": 3.7203535524538704e-06, - "loss": 1.317, - "step": 1047 - }, - { - "epoch": 1.018095508581166, - "grad_norm": 0.3984375, - "learning_rate": 3.7181208476306892e-06, - "loss": 1.3045, - "step": 1048 - }, - { - "epoch": 1.0190692579218565, - "grad_norm": 0.41015625, - "learning_rate": 3.71588686804157e-06, - "loss": 1.3566, - "step": 1049 - }, - { - "epoch": 1.0200430072625473, - "grad_norm": 0.4296875, - "learning_rate": 3.713651616024376e-06, - "loss": 1.3379, - "step": 1050 - }, - { - "epoch": 1.0210167566032378, - "grad_norm": 0.427734375, - "learning_rate": 3.711415093918303e-06, - "loss": 1.3284, - "step": 1051 - }, - { - "epoch": 1.0219905059439283, - "grad_norm": 0.431640625, - "learning_rate": 3.709177304063877e-06, - "loss": 1.3292, - "step": 1052 - }, - { - "epoch": 1.0229642552846188, - "grad_norm": 0.412109375, - "learning_rate": 3.7069382488029494e-06, - "loss": 1.3282, - "step": 1053 - }, - { - "epoch": 1.0239380046253093, - "grad_norm": 0.412109375, - "learning_rate": 3.7046979304786958e-06, - "loss": 1.3306, - "step": 1054 - }, - { - "epoch": 1.024911753966, - "grad_norm": 0.421875, - "learning_rate": 3.7024563514356132e-06, - "loss": 1.3347, - "step": 1055 - }, - { - "epoch": 1.0258855033066905, - "grad_norm": 0.4296875, - "learning_rate": 3.7002135140195193e-06, - "loss": 1.3151, - "step": 1056 - }, - { - "epoch": 1.026859252647381, - "grad_norm": 0.40625, - "learning_rate": 3.6979694205775486e-06, - "loss": 1.3277, - "step": 1057 - }, - { - "epoch": 1.0278330019880715, - "grad_norm": 0.412109375, - "learning_rate": 3.695724073458149e-06, - "loss": 1.3234, - "step": 1058 - }, - { - "epoch": 1.0288067513287622, - "grad_norm": 0.42578125, - "learning_rate": 3.6934774750110796e-06, - "loss": 1.3497, - "step": 1059 - }, - { - "epoch": 1.0297805006694527, - "grad_norm": 0.41015625, - "learning_rate": 3.691229627587412e-06, - "loss": 1.3346, - "step": 1060 - }, - { - "epoch": 1.0307542500101432, - "grad_norm": 0.435546875, - "learning_rate": 3.6889805335395216e-06, - "loss": 1.3188, - "step": 1061 - }, - { - "epoch": 1.0317279993508337, - "grad_norm": 0.412109375, - "learning_rate": 3.686730195221091e-06, - "loss": 1.3469, - "step": 1062 - }, - { - "epoch": 1.0327017486915244, - "grad_norm": 0.408203125, - "learning_rate": 3.6844786149871044e-06, - "loss": 1.347, - "step": 1063 - }, - { - "epoch": 1.033675498032215, - "grad_norm": 0.41796875, - "learning_rate": 3.6822257951938432e-06, - "loss": 1.3236, - "step": 1064 - }, - { - "epoch": 1.0346492473729054, - "grad_norm": 0.408203125, - "learning_rate": 3.6799717381988885e-06, - "loss": 1.3313, - "step": 1065 - }, - { - "epoch": 1.035622996713596, - "grad_norm": 0.4140625, - "learning_rate": 3.677716446361116e-06, - "loss": 1.3147, - "step": 1066 - }, - { - "epoch": 1.0365967460542864, - "grad_norm": 0.42578125, - "learning_rate": 3.6754599220406935e-06, - "loss": 1.3147, - "step": 1067 - }, - { - "epoch": 1.0375704953949771, - "grad_norm": 0.421875, - "learning_rate": 3.673202167599078e-06, - "loss": 1.3392, - "step": 1068 - }, - { - "epoch": 1.0385442447356676, - "grad_norm": 0.408203125, - "learning_rate": 3.670943185399013e-06, - "loss": 1.3457, - "step": 1069 - }, - { - "epoch": 1.0395179940763581, - "grad_norm": 0.42578125, - "learning_rate": 3.668682977804529e-06, - "loss": 1.3177, - "step": 1070 - }, - { - "epoch": 1.0404917434170486, - "grad_norm": 0.416015625, - "learning_rate": 3.666421547180938e-06, - "loss": 1.3379, - "step": 1071 - }, - { - "epoch": 1.0414654927577394, - "grad_norm": 0.40234375, - "learning_rate": 3.6641588958948315e-06, - "loss": 1.3175, - "step": 1072 - }, - { - "epoch": 1.0424392420984299, - "grad_norm": 0.41796875, - "learning_rate": 3.6618950263140785e-06, - "loss": 1.3534, - "step": 1073 - }, - { - "epoch": 1.0434129914391204, - "grad_norm": 0.416015625, - "learning_rate": 3.6596299408078228e-06, - "loss": 1.349, - "step": 1074 - }, - { - "epoch": 1.0443867407798109, - "grad_norm": 0.41015625, - "learning_rate": 3.657363641746482e-06, - "loss": 1.3124, - "step": 1075 - }, - { - "epoch": 1.0453604901205016, - "grad_norm": 0.421875, - "learning_rate": 3.6550961315017412e-06, - "loss": 1.3346, - "step": 1076 - }, - { - "epoch": 1.046334239461192, - "grad_norm": 0.43359375, - "learning_rate": 3.652827412446556e-06, - "loss": 1.3266, - "step": 1077 - }, - { - "epoch": 1.0473079888018826, - "grad_norm": 0.423828125, - "learning_rate": 3.650557486955145e-06, - "loss": 1.3395, - "step": 1078 - }, - { - "epoch": 1.048281738142573, - "grad_norm": 0.4140625, - "learning_rate": 3.6482863574029893e-06, - "loss": 1.3218, - "step": 1079 - }, - { - "epoch": 1.0492554874832636, - "grad_norm": 0.41796875, - "learning_rate": 3.646014026166831e-06, - "loss": 1.3231, - "step": 1080 - }, - { - "epoch": 1.0502292368239543, - "grad_norm": 0.42578125, - "learning_rate": 3.6437404956246686e-06, - "loss": 1.3384, - "step": 1081 - }, - { - "epoch": 1.0512029861646448, - "grad_norm": 0.416015625, - "learning_rate": 3.6414657681557576e-06, - "loss": 1.3292, - "step": 1082 - }, - { - "epoch": 1.0521767355053353, - "grad_norm": 0.421875, - "learning_rate": 3.6391898461406045e-06, - "loss": 1.3263, - "step": 1083 - }, - { - "epoch": 1.0531504848460258, - "grad_norm": 0.4140625, - "learning_rate": 3.636912731960966e-06, - "loss": 1.3308, - "step": 1084 - }, - { - "epoch": 1.0541242341867165, - "grad_norm": 0.408203125, - "learning_rate": 3.634634427999847e-06, - "loss": 1.3353, - "step": 1085 - }, - { - "epoch": 1.055097983527407, - "grad_norm": 0.416015625, - "learning_rate": 3.632354936641497e-06, - "loss": 1.3165, - "step": 1086 - }, - { - "epoch": 1.0560717328680975, - "grad_norm": 0.42578125, - "learning_rate": 3.630074260271409e-06, - "loss": 1.3247, - "step": 1087 - }, - { - "epoch": 1.057045482208788, - "grad_norm": 0.439453125, - "learning_rate": 3.6277924012763145e-06, - "loss": 1.346, - "step": 1088 - }, - { - "epoch": 1.0580192315494787, - "grad_norm": 0.4296875, - "learning_rate": 3.6255093620441835e-06, - "loss": 1.3533, - "step": 1089 - }, - { - "epoch": 1.0589929808901692, - "grad_norm": 0.419921875, - "learning_rate": 3.6232251449642225e-06, - "loss": 1.3351, - "step": 1090 - }, - { - "epoch": 1.0599667302308597, - "grad_norm": 0.421875, - "learning_rate": 3.620939752426868e-06, - "loss": 1.3338, - "step": 1091 - }, - { - "epoch": 1.0609404795715502, - "grad_norm": 0.42578125, - "learning_rate": 3.6186531868237882e-06, - "loss": 1.3226, - "step": 1092 - }, - { - "epoch": 1.0619142289122407, - "grad_norm": 0.41015625, - "learning_rate": 3.6163654505478796e-06, - "loss": 1.3064, - "step": 1093 - }, - { - "epoch": 1.0628879782529315, - "grad_norm": 0.4296875, - "learning_rate": 3.6140765459932615e-06, - "loss": 1.3237, - "step": 1094 - }, - { - "epoch": 1.063861727593622, - "grad_norm": 0.447265625, - "learning_rate": 3.611786475555278e-06, - "loss": 1.3342, - "step": 1095 - }, - { - "epoch": 1.0648354769343125, - "grad_norm": 0.4296875, - "learning_rate": 3.6094952416304923e-06, - "loss": 1.3248, - "step": 1096 - }, - { - "epoch": 1.065809226275003, - "grad_norm": 0.421875, - "learning_rate": 3.607202846616685e-06, - "loss": 1.3255, - "step": 1097 - }, - { - "epoch": 1.0667829756156937, - "grad_norm": 0.431640625, - "learning_rate": 3.6049092929128527e-06, - "loss": 1.322, - "step": 1098 - }, - { - "epoch": 1.0677567249563842, - "grad_norm": 0.416015625, - "learning_rate": 3.6026145829192033e-06, - "loss": 1.3385, - "step": 1099 - }, - { - "epoch": 1.0687304742970747, - "grad_norm": 0.4140625, - "learning_rate": 3.600318719037156e-06, - "loss": 1.3149, - "step": 1100 - }, - { - "epoch": 1.0697042236377652, - "grad_norm": 0.3984375, - "learning_rate": 3.598021703669337e-06, - "loss": 1.3329, - "step": 1101 - }, - { - "epoch": 1.070677972978456, - "grad_norm": 0.4140625, - "learning_rate": 3.5957235392195777e-06, - "loss": 1.3234, - "step": 1102 - }, - { - "epoch": 1.0716517223191464, - "grad_norm": 0.421875, - "learning_rate": 3.593424228092911e-06, - "loss": 1.3348, - "step": 1103 - }, - { - "epoch": 1.072625471659837, - "grad_norm": 0.431640625, - "learning_rate": 3.5911237726955717e-06, - "loss": 1.3505, - "step": 1104 - }, - { - "epoch": 1.0735992210005274, - "grad_norm": 0.421875, - "learning_rate": 3.58882217543499e-06, - "loss": 1.3292, - "step": 1105 - }, - { - "epoch": 1.074572970341218, - "grad_norm": 0.435546875, - "learning_rate": 3.5865194387197932e-06, - "loss": 1.321, - "step": 1106 - }, - { - "epoch": 1.0755467196819086, - "grad_norm": 0.40625, - "learning_rate": 3.5842155649597993e-06, - "loss": 1.2894, - "step": 1107 - }, - { - "epoch": 1.0765204690225991, - "grad_norm": 0.421875, - "learning_rate": 3.581910556566017e-06, - "loss": 1.3462, - "step": 1108 - }, - { - "epoch": 1.0774942183632896, - "grad_norm": 0.40625, - "learning_rate": 3.579604415950642e-06, - "loss": 1.3344, - "step": 1109 - }, - { - "epoch": 1.0784679677039801, - "grad_norm": 0.40625, - "learning_rate": 3.5772971455270554e-06, - "loss": 1.3305, - "step": 1110 - }, - { - "epoch": 1.0794417170446708, - "grad_norm": 0.43359375, - "learning_rate": 3.5749887477098213e-06, - "loss": 1.3326, - "step": 1111 - }, - { - "epoch": 1.0804154663853613, - "grad_norm": 0.421875, - "learning_rate": 3.5726792249146814e-06, - "loss": 1.331, - "step": 1112 - }, - { - "epoch": 1.0813892157260518, - "grad_norm": 0.48828125, - "learning_rate": 3.5703685795585558e-06, - "loss": 1.3332, - "step": 1113 - }, - { - "epoch": 1.0823629650667423, - "grad_norm": 0.4375, - "learning_rate": 3.56805681405954e-06, - "loss": 1.3129, - "step": 1114 - }, - { - "epoch": 1.0833367144074328, - "grad_norm": 0.421875, - "learning_rate": 3.565743930836902e-06, - "loss": 1.323, - "step": 1115 - }, - { - "epoch": 1.0843104637481236, - "grad_norm": 0.41796875, - "learning_rate": 3.5634299323110777e-06, - "loss": 1.3322, - "step": 1116 - }, - { - "epoch": 1.085284213088814, - "grad_norm": 0.40625, - "learning_rate": 3.5611148209036716e-06, - "loss": 1.3294, - "step": 1117 - }, - { - "epoch": 1.0862579624295046, - "grad_norm": 0.421875, - "learning_rate": 3.5587985990374535e-06, - "loss": 1.3162, - "step": 1118 - }, - { - "epoch": 1.087231711770195, - "grad_norm": 0.4375, - "learning_rate": 3.5564812691363527e-06, - "loss": 1.3204, - "step": 1119 - }, - { - "epoch": 1.0882054611108858, - "grad_norm": 0.423828125, - "learning_rate": 3.55416283362546e-06, - "loss": 1.3289, - "step": 1120 - }, - { - "epoch": 1.0891792104515763, - "grad_norm": 0.42578125, - "learning_rate": 3.551843294931024e-06, - "loss": 1.3371, - "step": 1121 - }, - { - "epoch": 1.0901529597922668, - "grad_norm": 0.416015625, - "learning_rate": 3.5495226554804453e-06, - "loss": 1.3267, - "step": 1122 - }, - { - "epoch": 1.0911267091329573, - "grad_norm": 0.423828125, - "learning_rate": 3.547200917702279e-06, - "loss": 1.3404, - "step": 1123 - }, - { - "epoch": 1.092100458473648, - "grad_norm": 0.412109375, - "learning_rate": 3.5448780840262274e-06, - "loss": 1.3423, - "step": 1124 - }, - { - "epoch": 1.0930742078143385, - "grad_norm": 0.423828125, - "learning_rate": 3.5425541568831406e-06, - "loss": 1.3314, - "step": 1125 - }, - { - "epoch": 1.094047957155029, - "grad_norm": 0.41015625, - "learning_rate": 3.5402291387050135e-06, - "loss": 1.3427, - "step": 1126 - }, - { - "epoch": 1.0950217064957195, - "grad_norm": 0.408203125, - "learning_rate": 3.537903031924983e-06, - "loss": 1.3438, - "step": 1127 - }, - { - "epoch": 1.0959954558364102, - "grad_norm": 0.412109375, - "learning_rate": 3.535575838977323e-06, - "loss": 1.3431, - "step": 1128 - }, - { - "epoch": 1.0969692051771007, - "grad_norm": 0.416015625, - "learning_rate": 3.5332475622974477e-06, - "loss": 1.3289, - "step": 1129 - }, - { - "epoch": 1.0979429545177912, - "grad_norm": 0.42578125, - "learning_rate": 3.5309182043219016e-06, - "loss": 1.3165, - "step": 1130 - }, - { - "epoch": 1.0989167038584817, - "grad_norm": 0.4140625, - "learning_rate": 3.528587767488363e-06, - "loss": 1.3249, - "step": 1131 - }, - { - "epoch": 1.0998904531991722, - "grad_norm": 0.408203125, - "learning_rate": 3.526256254235638e-06, - "loss": 1.3293, - "step": 1132 - }, - { - "epoch": 1.100864202539863, - "grad_norm": 0.4140625, - "learning_rate": 3.523923667003662e-06, - "loss": 1.3224, - "step": 1133 - }, - { - "epoch": 1.1018379518805534, - "grad_norm": 0.421875, - "learning_rate": 3.521590008233491e-06, - "loss": 1.3258, - "step": 1134 - }, - { - "epoch": 1.102811701221244, - "grad_norm": 0.416015625, - "learning_rate": 3.5192552803673043e-06, - "loss": 1.3404, - "step": 1135 - }, - { - "epoch": 1.1037854505619344, - "grad_norm": 0.400390625, - "learning_rate": 3.516919485848398e-06, - "loss": 1.322, - "step": 1136 - }, - { - "epoch": 1.1047591999026252, - "grad_norm": 0.4140625, - "learning_rate": 3.5145826271211862e-06, - "loss": 1.3295, - "step": 1137 - }, - { - "epoch": 1.1057329492433157, - "grad_norm": 0.421875, - "learning_rate": 3.5122447066311973e-06, - "loss": 1.3475, - "step": 1138 - }, - { - "epoch": 1.1067066985840062, - "grad_norm": 0.4140625, - "learning_rate": 3.5099057268250692e-06, - "loss": 1.3253, - "step": 1139 - }, - { - "epoch": 1.1076804479246967, - "grad_norm": 0.419921875, - "learning_rate": 3.5075656901505495e-06, - "loss": 1.32, - "step": 1140 - }, - { - "epoch": 1.1086541972653872, - "grad_norm": 0.419921875, - "learning_rate": 3.5052245990564902e-06, - "loss": 1.3076, - "step": 1141 - }, - { - "epoch": 1.1096279466060779, - "grad_norm": 0.4140625, - "learning_rate": 3.5028824559928488e-06, - "loss": 1.3443, - "step": 1142 - }, - { - "epoch": 1.1106016959467684, - "grad_norm": 0.41015625, - "learning_rate": 3.5005392634106827e-06, - "loss": 1.3314, - "step": 1143 - }, - { - "epoch": 1.1115754452874589, - "grad_norm": 0.412109375, - "learning_rate": 3.4981950237621476e-06, - "loss": 1.3521, - "step": 1144 - }, - { - "epoch": 1.1125491946281494, - "grad_norm": 0.400390625, - "learning_rate": 3.4958497395004946e-06, - "loss": 1.3114, - "step": 1145 - }, - { - "epoch": 1.11352294396884, - "grad_norm": 0.408203125, - "learning_rate": 3.4935034130800693e-06, - "loss": 1.3359, - "step": 1146 - }, - { - "epoch": 1.1144966933095306, - "grad_norm": 0.41796875, - "learning_rate": 3.491156046956306e-06, - "loss": 1.3377, - "step": 1147 - }, - { - "epoch": 1.115470442650221, - "grad_norm": 0.42578125, - "learning_rate": 3.4888076435857286e-06, - "loss": 1.3571, - "step": 1148 - }, - { - "epoch": 1.1164441919909116, - "grad_norm": 0.412109375, - "learning_rate": 3.4864582054259474e-06, - "loss": 1.3289, - "step": 1149 - }, - { - "epoch": 1.1174179413316023, - "grad_norm": 0.4140625, - "learning_rate": 3.4841077349356518e-06, - "loss": 1.3388, - "step": 1150 - }, - { - "epoch": 1.1183916906722928, - "grad_norm": 0.419921875, - "learning_rate": 3.4817562345746146e-06, - "loss": 1.318, - "step": 1151 - }, - { - "epoch": 1.1193654400129833, - "grad_norm": 0.416015625, - "learning_rate": 3.4794037068036867e-06, - "loss": 1.3307, - "step": 1152 - }, - { - "epoch": 1.1203391893536738, - "grad_norm": 0.412109375, - "learning_rate": 3.4770501540847935e-06, - "loss": 1.3155, - "step": 1153 - }, - { - "epoch": 1.1213129386943643, - "grad_norm": 0.416015625, - "learning_rate": 3.4746955788809306e-06, - "loss": 1.3097, - "step": 1154 - }, - { - "epoch": 1.122286688035055, - "grad_norm": 0.3984375, - "learning_rate": 3.472339983656167e-06, - "loss": 1.3396, - "step": 1155 - }, - { - "epoch": 1.1232604373757455, - "grad_norm": 0.416015625, - "learning_rate": 3.469983370875638e-06, - "loss": 1.3431, - "step": 1156 - }, - { - "epoch": 1.124234186716436, - "grad_norm": 0.43359375, - "learning_rate": 3.4676257430055438e-06, - "loss": 1.3206, - "step": 1157 - }, - { - "epoch": 1.1252079360571265, - "grad_norm": 0.416015625, - "learning_rate": 3.4652671025131457e-06, - "loss": 1.3194, - "step": 1158 - }, - { - "epoch": 1.1261816853978173, - "grad_norm": 0.40234375, - "learning_rate": 3.4629074518667666e-06, - "loss": 1.3156, - "step": 1159 - }, - { - "epoch": 1.1271554347385078, - "grad_norm": 0.408203125, - "learning_rate": 3.4605467935357856e-06, - "loss": 1.3325, - "step": 1160 - }, - { - "epoch": 1.1281291840791983, - "grad_norm": 0.4140625, - "learning_rate": 3.4581851299906367e-06, - "loss": 1.326, - "step": 1161 - }, - { - "epoch": 1.1291029334198888, - "grad_norm": 0.421875, - "learning_rate": 3.4558224637028055e-06, - "loss": 1.3283, - "step": 1162 - }, - { - "epoch": 1.1300766827605795, - "grad_norm": 0.40625, - "learning_rate": 3.4534587971448265e-06, - "loss": 1.3237, - "step": 1163 - }, - { - "epoch": 1.13105043210127, - "grad_norm": 0.41796875, - "learning_rate": 3.4510941327902815e-06, - "loss": 1.327, - "step": 1164 - }, - { - "epoch": 1.1320241814419605, - "grad_norm": 0.412109375, - "learning_rate": 3.448728473113797e-06, - "loss": 1.3273, - "step": 1165 - }, - { - "epoch": 1.132997930782651, - "grad_norm": 0.41796875, - "learning_rate": 3.446361820591041e-06, - "loss": 1.3341, - "step": 1166 - }, - { - "epoch": 1.1339716801233415, - "grad_norm": 0.43359375, - "learning_rate": 3.4439941776987195e-06, - "loss": 1.3253, - "step": 1167 - }, - { - "epoch": 1.1349454294640322, - "grad_norm": 0.416015625, - "learning_rate": 3.4416255469145765e-06, - "loss": 1.3398, - "step": 1168 - }, - { - "epoch": 1.1359191788047227, - "grad_norm": 0.41796875, - "learning_rate": 3.4392559307173876e-06, - "loss": 1.3284, - "step": 1169 - }, - { - "epoch": 1.1368929281454132, - "grad_norm": 0.421875, - "learning_rate": 3.4368853315869616e-06, - "loss": 1.3433, - "step": 1170 - }, - { - "epoch": 1.1378666774861037, - "grad_norm": 0.431640625, - "learning_rate": 3.4345137520041354e-06, - "loss": 1.3196, - "step": 1171 - }, - { - "epoch": 1.1388404268267944, - "grad_norm": 0.427734375, - "learning_rate": 3.432141194450772e-06, - "loss": 1.3346, - "step": 1172 - }, - { - "epoch": 1.139814176167485, - "grad_norm": 0.419921875, - "learning_rate": 3.4297676614097573e-06, - "loss": 1.3232, - "step": 1173 - }, - { - "epoch": 1.1407879255081754, - "grad_norm": 0.41796875, - "learning_rate": 3.4273931553649986e-06, - "loss": 1.3498, - "step": 1174 - }, - { - "epoch": 1.141761674848866, - "grad_norm": 0.412109375, - "learning_rate": 3.4250176788014217e-06, - "loss": 1.349, - "step": 1175 - }, - { - "epoch": 1.1427354241895564, - "grad_norm": 0.423828125, - "learning_rate": 3.4226412342049662e-06, - "loss": 1.3685, - "step": 1176 - }, - { - "epoch": 1.1437091735302471, - "grad_norm": 0.462890625, - "learning_rate": 3.420263824062588e-06, - "loss": 1.3228, - "step": 1177 - }, - { - "epoch": 1.1446829228709376, - "grad_norm": 0.41796875, - "learning_rate": 3.4178854508622506e-06, - "loss": 1.3361, - "step": 1178 - }, - { - "epoch": 1.1456566722116281, - "grad_norm": 0.408203125, - "learning_rate": 3.415506117092927e-06, - "loss": 1.3417, - "step": 1179 - }, - { - "epoch": 1.1466304215523186, - "grad_norm": 0.4140625, - "learning_rate": 3.4131258252445947e-06, - "loss": 1.3278, - "step": 1180 - }, - { - "epoch": 1.1476041708930094, - "grad_norm": 0.412109375, - "learning_rate": 3.4107445778082334e-06, - "loss": 1.3197, - "step": 1181 - }, - { - "epoch": 1.1485779202336999, - "grad_norm": 0.41796875, - "learning_rate": 3.4083623772758236e-06, - "loss": 1.3394, - "step": 1182 - }, - { - "epoch": 1.1495516695743904, - "grad_norm": 0.4140625, - "learning_rate": 3.4059792261403422e-06, - "loss": 1.3305, - "step": 1183 - }, - { - "epoch": 1.1505254189150809, - "grad_norm": 0.4140625, - "learning_rate": 3.4035951268957647e-06, - "loss": 1.3281, - "step": 1184 - }, - { - "epoch": 1.1514991682557716, - "grad_norm": 0.41796875, - "learning_rate": 3.401210082037052e-06, - "loss": 1.3526, - "step": 1185 - }, - { - "epoch": 1.152472917596462, - "grad_norm": 0.40625, - "learning_rate": 3.3988240940601604e-06, - "loss": 1.3178, - "step": 1186 - }, - { - "epoch": 1.1534466669371526, - "grad_norm": 0.408203125, - "learning_rate": 3.3964371654620305e-06, - "loss": 1.3464, - "step": 1187 - }, - { - "epoch": 1.154420416277843, - "grad_norm": 0.408203125, - "learning_rate": 3.3940492987405888e-06, - "loss": 1.3251, - "step": 1188 - }, - { - "epoch": 1.1553941656185338, - "grad_norm": 0.416015625, - "learning_rate": 3.391660496394742e-06, - "loss": 1.3338, - "step": 1189 - }, - { - "epoch": 1.1563679149592243, - "grad_norm": 0.3984375, - "learning_rate": 3.389270760924377e-06, - "loss": 1.3217, - "step": 1190 - }, - { - "epoch": 1.1573416642999148, - "grad_norm": 0.416015625, - "learning_rate": 3.386880094830356e-06, - "loss": 1.3445, - "step": 1191 - }, - { - "epoch": 1.1583154136406053, - "grad_norm": 0.40625, - "learning_rate": 3.3844885006145162e-06, - "loss": 1.3222, - "step": 1192 - }, - { - "epoch": 1.1592891629812958, - "grad_norm": 0.412109375, - "learning_rate": 3.382095980779666e-06, - "loss": 1.3295, - "step": 1193 - }, - { - "epoch": 1.1602629123219865, - "grad_norm": 0.423828125, - "learning_rate": 3.3797025378295826e-06, - "loss": 1.3269, - "step": 1194 - }, - { - "epoch": 1.161236661662677, - "grad_norm": 0.4375, - "learning_rate": 3.3773081742690097e-06, - "loss": 1.3061, - "step": 1195 - }, - { - "epoch": 1.1622104110033675, - "grad_norm": 0.408203125, - "learning_rate": 3.374912892603651e-06, - "loss": 1.3263, - "step": 1196 - }, - { - "epoch": 1.163184160344058, - "grad_norm": 0.41015625, - "learning_rate": 3.372516695340176e-06, - "loss": 1.3231, - "step": 1197 - }, - { - "epoch": 1.1641579096847487, - "grad_norm": 0.4140625, - "learning_rate": 3.370119584986209e-06, - "loss": 1.3239, - "step": 1198 - }, - { - "epoch": 1.1651316590254392, - "grad_norm": 0.41015625, - "learning_rate": 3.36772156405033e-06, - "loss": 1.3341, - "step": 1199 - }, - { - "epoch": 1.1661054083661297, - "grad_norm": 0.416015625, - "learning_rate": 3.365322635042075e-06, - "loss": 1.338, - "step": 1200 - }, - { - "epoch": 1.1670791577068202, - "grad_norm": 0.423828125, - "learning_rate": 3.362922800471927e-06, - "loss": 1.3264, - "step": 1201 - }, - { - "epoch": 1.1680529070475107, - "grad_norm": 0.427734375, - "learning_rate": 3.360522062851317e-06, - "loss": 1.3335, - "step": 1202 - }, - { - "epoch": 1.1690266563882015, - "grad_norm": 0.408203125, - "learning_rate": 3.3581204246926223e-06, - "loss": 1.3147, - "step": 1203 - }, - { - "epoch": 1.170000405728892, - "grad_norm": 0.4140625, - "learning_rate": 3.3557178885091625e-06, - "loss": 1.3319, - "step": 1204 - }, - { - "epoch": 1.1709741550695825, - "grad_norm": 0.4296875, - "learning_rate": 3.3533144568151956e-06, - "loss": 1.3282, - "step": 1205 - }, - { - "epoch": 1.171947904410273, - "grad_norm": 0.4140625, - "learning_rate": 3.35091013212592e-06, - "loss": 1.325, - "step": 1206 - }, - { - "epoch": 1.1729216537509637, - "grad_norm": 0.427734375, - "learning_rate": 3.348504916957463e-06, - "loss": 1.3234, - "step": 1207 - }, - { - "epoch": 1.1738954030916542, - "grad_norm": 0.421875, - "learning_rate": 3.3460988138268895e-06, - "loss": 1.3274, - "step": 1208 - }, - { - "epoch": 1.1748691524323447, - "grad_norm": 0.431640625, - "learning_rate": 3.343691825252191e-06, - "loss": 1.3238, - "step": 1209 - }, - { - "epoch": 1.1758429017730352, - "grad_norm": 0.431640625, - "learning_rate": 3.3412839537522858e-06, - "loss": 1.3196, - "step": 1210 - }, - { - "epoch": 1.1768166511137257, - "grad_norm": 0.4140625, - "learning_rate": 3.3388752018470156e-06, - "loss": 1.3278, - "step": 1211 - }, - { - "epoch": 1.1777904004544164, - "grad_norm": 0.42578125, - "learning_rate": 3.3364655720571453e-06, - "loss": 1.3312, - "step": 1212 - }, - { - "epoch": 1.178764149795107, - "grad_norm": 0.42578125, - "learning_rate": 3.334055066904357e-06, - "loss": 1.3238, - "step": 1213 - }, - { - "epoch": 1.1797378991357974, - "grad_norm": 0.427734375, - "learning_rate": 3.331643688911248e-06, - "loss": 1.3206, - "step": 1214 - }, - { - "epoch": 1.1807116484764881, - "grad_norm": 0.412109375, - "learning_rate": 3.329231440601332e-06, - "loss": 1.311, - "step": 1215 - }, - { - "epoch": 1.1816853978171786, - "grad_norm": 0.412109375, - "learning_rate": 3.3268183244990308e-06, - "loss": 1.3446, - "step": 1216 - }, - { - "epoch": 1.1826591471578691, - "grad_norm": 0.41015625, - "learning_rate": 3.324404343129676e-06, - "loss": 1.3251, - "step": 1217 - }, - { - "epoch": 1.1836328964985596, - "grad_norm": 0.421875, - "learning_rate": 3.3219894990195036e-06, - "loss": 1.3382, - "step": 1218 - }, - { - "epoch": 1.1846066458392501, - "grad_norm": 0.4296875, - "learning_rate": 3.3195737946956525e-06, - "loss": 1.3156, - "step": 1219 - }, - { - "epoch": 1.1855803951799408, - "grad_norm": 0.427734375, - "learning_rate": 3.3171572326861624e-06, - "loss": 1.3352, - "step": 1220 - }, - { - "epoch": 1.1865541445206313, - "grad_norm": 0.419921875, - "learning_rate": 3.3147398155199706e-06, - "loss": 1.2905, - "step": 1221 - }, - { - "epoch": 1.1875278938613218, - "grad_norm": 0.408203125, - "learning_rate": 3.3123215457269086e-06, - "loss": 1.3017, - "step": 1222 - }, - { - "epoch": 1.1885016432020123, - "grad_norm": 0.4375, - "learning_rate": 3.3099024258377017e-06, - "loss": 1.3375, - "step": 1223 - }, - { - "epoch": 1.189475392542703, - "grad_norm": 0.412109375, - "learning_rate": 3.307482458383962e-06, - "loss": 1.3315, - "step": 1224 - }, - { - "epoch": 1.1904491418833936, - "grad_norm": 0.3984375, - "learning_rate": 3.305061645898192e-06, - "loss": 1.3343, - "step": 1225 - }, - { - "epoch": 1.191422891224084, - "grad_norm": 0.427734375, - "learning_rate": 3.3026399909137757e-06, - "loss": 1.3336, - "step": 1226 - }, - { - "epoch": 1.1923966405647746, - "grad_norm": 0.431640625, - "learning_rate": 3.300217495964981e-06, - "loss": 1.3156, - "step": 1227 - }, - { - "epoch": 1.193370389905465, - "grad_norm": 0.43359375, - "learning_rate": 3.2977941635869537e-06, - "loss": 1.3331, - "step": 1228 - }, - { - "epoch": 1.1943441392461558, - "grad_norm": 0.443359375, - "learning_rate": 3.295369996315715e-06, - "loss": 1.3356, - "step": 1229 - }, - { - "epoch": 1.1953178885868463, - "grad_norm": 0.41796875, - "learning_rate": 3.292944996688161e-06, - "loss": 1.3177, - "step": 1230 - }, - { - "epoch": 1.1962916379275368, - "grad_norm": 0.416015625, - "learning_rate": 3.29051916724206e-06, - "loss": 1.3256, - "step": 1231 - }, - { - "epoch": 1.1972653872682273, - "grad_norm": 0.4140625, - "learning_rate": 3.2880925105160464e-06, - "loss": 1.3083, - "step": 1232 - }, - { - "epoch": 1.198239136608918, - "grad_norm": 0.4140625, - "learning_rate": 3.2856650290496216e-06, - "loss": 1.3297, - "step": 1233 - }, - { - "epoch": 1.1992128859496085, - "grad_norm": 0.41015625, - "learning_rate": 3.2832367253831506e-06, - "loss": 1.3139, - "step": 1234 - }, - { - "epoch": 1.200186635290299, - "grad_norm": 0.421875, - "learning_rate": 3.280807602057857e-06, - "loss": 1.3298, - "step": 1235 - }, - { - "epoch": 1.2011603846309895, - "grad_norm": 0.4296875, - "learning_rate": 3.278377661615823e-06, - "loss": 1.3233, - "step": 1236 - }, - { - "epoch": 1.20213413397168, - "grad_norm": 0.41796875, - "learning_rate": 3.2759469065999877e-06, - "loss": 1.3144, - "step": 1237 - }, - { - "epoch": 1.2031078833123707, - "grad_norm": 0.421875, - "learning_rate": 3.2735153395541403e-06, - "loss": 1.3305, - "step": 1238 - }, - { - "epoch": 1.2040816326530612, - "grad_norm": 0.404296875, - "learning_rate": 3.2710829630229208e-06, - "loss": 1.3223, - "step": 1239 - }, - { - "epoch": 1.2050553819937517, - "grad_norm": 0.408203125, - "learning_rate": 3.268649779551816e-06, - "loss": 1.3294, - "step": 1240 - }, - { - "epoch": 1.2060291313344425, - "grad_norm": 0.41015625, - "learning_rate": 3.266215791687157e-06, - "loss": 1.3111, - "step": 1241 - }, - { - "epoch": 1.207002880675133, - "grad_norm": 0.43359375, - "learning_rate": 3.263781001976118e-06, - "loss": 1.3286, - "step": 1242 - }, - { - "epoch": 1.2079766300158234, - "grad_norm": 0.423828125, - "learning_rate": 3.261345412966711e-06, - "loss": 1.3251, - "step": 1243 - }, - { - "epoch": 1.208950379356514, - "grad_norm": 0.439453125, - "learning_rate": 3.2589090272077845e-06, - "loss": 1.3358, - "step": 1244 - }, - { - "epoch": 1.2099241286972044, - "grad_norm": 0.416015625, - "learning_rate": 3.2564718472490215e-06, - "loss": 1.3325, - "step": 1245 - }, - { - "epoch": 1.2108978780378952, - "grad_norm": 0.40625, - "learning_rate": 3.2540338756409353e-06, - "loss": 1.3274, - "step": 1246 - }, - { - "epoch": 1.2118716273785857, - "grad_norm": 0.423828125, - "learning_rate": 3.2515951149348683e-06, - "loss": 1.3338, - "step": 1247 - }, - { - "epoch": 1.2128453767192762, - "grad_norm": 0.423828125, - "learning_rate": 3.2491555676829894e-06, - "loss": 1.3228, - "step": 1248 - }, - { - "epoch": 1.2138191260599667, - "grad_norm": 0.4296875, - "learning_rate": 3.2467152364382883e-06, - "loss": 1.328, - "step": 1249 - }, - { - "epoch": 1.2147928754006574, - "grad_norm": 0.41015625, - "learning_rate": 3.244274123754578e-06, - "loss": 1.3115, - "step": 1250 - }, - { - "epoch": 1.215766624741348, - "grad_norm": 0.40625, - "learning_rate": 3.2418322321864866e-06, - "loss": 1.3481, - "step": 1251 - }, - { - "epoch": 1.2167403740820384, - "grad_norm": 0.423828125, - "learning_rate": 3.2393895642894596e-06, - "loss": 1.3045, - "step": 1252 - }, - { - "epoch": 1.217714123422729, - "grad_norm": 0.4140625, - "learning_rate": 3.236946122619753e-06, - "loss": 1.304, - "step": 1253 - }, - { - "epoch": 1.2186878727634194, - "grad_norm": 0.421875, - "learning_rate": 3.2345019097344336e-06, - "loss": 1.3004, - "step": 1254 - }, - { - "epoch": 1.21966162210411, - "grad_norm": 0.427734375, - "learning_rate": 3.232056928191376e-06, - "loss": 1.3315, - "step": 1255 - }, - { - "epoch": 1.2206353714448006, - "grad_norm": 0.4140625, - "learning_rate": 3.229611180549258e-06, - "loss": 1.303, - "step": 1256 - }, - { - "epoch": 1.221609120785491, - "grad_norm": 0.42578125, - "learning_rate": 3.2271646693675596e-06, - "loss": 1.3242, - "step": 1257 - }, - { - "epoch": 1.2225828701261816, - "grad_norm": 0.40234375, - "learning_rate": 3.2247173972065593e-06, - "loss": 1.2989, - "step": 1258 - }, - { - "epoch": 1.2235566194668723, - "grad_norm": 0.42578125, - "learning_rate": 3.2222693666273318e-06, - "loss": 1.336, - "step": 1259 - }, - { - "epoch": 1.2245303688075628, - "grad_norm": 0.3984375, - "learning_rate": 3.219820580191747e-06, - "loss": 1.336, - "step": 1260 - }, - { - "epoch": 1.2255041181482533, - "grad_norm": 0.423828125, - "learning_rate": 3.2173710404624646e-06, - "loss": 1.3578, - "step": 1261 - }, - { - "epoch": 1.2264778674889438, - "grad_norm": 0.42578125, - "learning_rate": 3.2149207500029337e-06, - "loss": 1.3048, - "step": 1262 - }, - { - "epoch": 1.2274516168296343, - "grad_norm": 0.400390625, - "learning_rate": 3.212469711377387e-06, - "loss": 1.3316, - "step": 1263 - }, - { - "epoch": 1.228425366170325, - "grad_norm": 0.400390625, - "learning_rate": 3.210017927150842e-06, - "loss": 1.3255, - "step": 1264 - }, - { - "epoch": 1.2293991155110155, - "grad_norm": 0.421875, - "learning_rate": 3.2075653998890953e-06, - "loss": 1.3153, - "step": 1265 - }, - { - "epoch": 1.230372864851706, - "grad_norm": 0.419921875, - "learning_rate": 3.205112132158722e-06, - "loss": 1.3091, - "step": 1266 - }, - { - "epoch": 1.2313466141923965, - "grad_norm": 0.4296875, - "learning_rate": 3.202658126527073e-06, - "loss": 1.3172, - "step": 1267 - }, - { - "epoch": 1.2323203635330873, - "grad_norm": 0.419921875, - "learning_rate": 3.2002033855622683e-06, - "loss": 1.3196, - "step": 1268 - }, - { - "epoch": 1.2332941128737778, - "grad_norm": 0.427734375, - "learning_rate": 3.1977479118331994e-06, - "loss": 1.3345, - "step": 1269 - }, - { - "epoch": 1.2342678622144683, - "grad_norm": 0.42578125, - "learning_rate": 3.195291707909526e-06, - "loss": 1.3345, - "step": 1270 - }, - { - "epoch": 1.2352416115551588, - "grad_norm": 0.40625, - "learning_rate": 3.192834776361669e-06, - "loss": 1.3376, - "step": 1271 - }, - { - "epoch": 1.2362153608958493, - "grad_norm": 0.419921875, - "learning_rate": 3.190377119760813e-06, - "loss": 1.332, - "step": 1272 - }, - { - "epoch": 1.23718911023654, - "grad_norm": 0.42578125, - "learning_rate": 3.1879187406789014e-06, - "loss": 1.3407, - "step": 1273 - }, - { - "epoch": 1.2381628595772305, - "grad_norm": 0.41796875, - "learning_rate": 3.1854596416886313e-06, - "loss": 1.3402, - "step": 1274 - }, - { - "epoch": 1.239136608917921, - "grad_norm": 0.42578125, - "learning_rate": 3.1829998253634554e-06, - "loss": 1.3287, - "step": 1275 - }, - { - "epoch": 1.2401103582586117, - "grad_norm": 0.396484375, - "learning_rate": 3.180539294277577e-06, - "loss": 1.3168, - "step": 1276 - }, - { - "epoch": 1.2410841075993022, - "grad_norm": 0.40625, - "learning_rate": 3.178078051005946e-06, - "loss": 1.3151, - "step": 1277 - }, - { - "epoch": 1.2420578569399927, - "grad_norm": 0.416015625, - "learning_rate": 3.1756160981242596e-06, - "loss": 1.3262, - "step": 1278 - }, - { - "epoch": 1.2430316062806832, - "grad_norm": 0.41015625, - "learning_rate": 3.1731534382089552e-06, - "loss": 1.3139, - "step": 1279 - }, - { - "epoch": 1.2440053556213737, - "grad_norm": 0.443359375, - "learning_rate": 3.170690073837212e-06, - "loss": 1.3259, - "step": 1280 - }, - { - "epoch": 1.2449791049620644, - "grad_norm": 0.419921875, - "learning_rate": 3.168226007586946e-06, - "loss": 1.3313, - "step": 1281 - }, - { - "epoch": 1.245952854302755, - "grad_norm": 0.41796875, - "learning_rate": 3.1657612420368074e-06, - "loss": 1.3243, - "step": 1282 - }, - { - "epoch": 1.2469266036434454, - "grad_norm": 0.412109375, - "learning_rate": 3.163295779766178e-06, - "loss": 1.3218, - "step": 1283 - }, - { - "epoch": 1.247900352984136, - "grad_norm": 0.416015625, - "learning_rate": 3.16082962335517e-06, - "loss": 1.3141, - "step": 1284 - }, - { - "epoch": 1.2488741023248267, - "grad_norm": 0.412109375, - "learning_rate": 3.15836277538462e-06, - "loss": 1.3284, - "step": 1285 - }, - { - "epoch": 1.2498478516655172, - "grad_norm": 0.423828125, - "learning_rate": 3.15589523843609e-06, - "loss": 1.3417, - "step": 1286 - }, - { - "epoch": 1.2508216010062077, - "grad_norm": 0.416015625, - "learning_rate": 3.1534270150918616e-06, - "loss": 1.3158, - "step": 1287 - }, - { - "epoch": 1.2517953503468982, - "grad_norm": 0.419921875, - "learning_rate": 3.1509581079349373e-06, - "loss": 1.3368, - "step": 1288 - }, - { - "epoch": 1.2527690996875886, - "grad_norm": 0.427734375, - "learning_rate": 3.1484885195490323e-06, - "loss": 1.3687, - "step": 1289 - }, - { - "epoch": 1.2537428490282794, - "grad_norm": 0.416015625, - "learning_rate": 3.146018252518575e-06, - "loss": 1.3235, - "step": 1290 - }, - { - "epoch": 1.2547165983689699, - "grad_norm": 0.4140625, - "learning_rate": 3.1435473094287063e-06, - "loss": 1.3594, - "step": 1291 - }, - { - "epoch": 1.2556903477096604, - "grad_norm": 0.4296875, - "learning_rate": 3.1410756928652723e-06, - "loss": 1.3163, - "step": 1292 - }, - { - "epoch": 1.256664097050351, - "grad_norm": 0.41015625, - "learning_rate": 3.1386034054148258e-06, - "loss": 1.324, - "step": 1293 - }, - { - "epoch": 1.2576378463910416, - "grad_norm": 0.419921875, - "learning_rate": 3.1361304496646195e-06, - "loss": 1.3288, - "step": 1294 - }, - { - "epoch": 1.258611595731732, - "grad_norm": 0.416015625, - "learning_rate": 3.133656828202607e-06, - "loss": 1.3164, - "step": 1295 - }, - { - "epoch": 1.2595853450724226, - "grad_norm": 0.419921875, - "learning_rate": 3.131182543617439e-06, - "loss": 1.3116, - "step": 1296 - }, - { - "epoch": 1.260559094413113, - "grad_norm": 0.40625, - "learning_rate": 3.1287075984984573e-06, - "loss": 1.3235, - "step": 1297 - }, - { - "epoch": 1.2615328437538036, - "grad_norm": 0.412109375, - "learning_rate": 3.126231995435699e-06, - "loss": 1.3236, - "step": 1298 - }, - { - "epoch": 1.2625065930944943, - "grad_norm": 0.427734375, - "learning_rate": 3.123755737019886e-06, - "loss": 1.3198, - "step": 1299 - }, - { - "epoch": 1.2634803424351848, - "grad_norm": 0.416015625, - "learning_rate": 3.1212788258424304e-06, - "loss": 1.3232, - "step": 1300 - }, - { - "epoch": 1.2644540917758753, - "grad_norm": 0.41796875, - "learning_rate": 3.118801264495422e-06, - "loss": 1.3241, - "step": 1301 - }, - { - "epoch": 1.265427841116566, - "grad_norm": 0.423828125, - "learning_rate": 3.116323055571635e-06, - "loss": 1.3101, - "step": 1302 - }, - { - "epoch": 1.2664015904572565, - "grad_norm": 0.416015625, - "learning_rate": 3.1138442016645197e-06, - "loss": 1.3365, - "step": 1303 - }, - { - "epoch": 1.267375339797947, - "grad_norm": 0.43359375, - "learning_rate": 3.1113647053682024e-06, - "loss": 1.3186, - "step": 1304 - }, - { - "epoch": 1.2683490891386375, - "grad_norm": 0.419921875, - "learning_rate": 3.1088845692774798e-06, - "loss": 1.3203, - "step": 1305 - }, - { - "epoch": 1.269322838479328, - "grad_norm": 0.421875, - "learning_rate": 3.106403795987821e-06, - "loss": 1.3171, - "step": 1306 - }, - { - "epoch": 1.2702965878200185, - "grad_norm": 0.45703125, - "learning_rate": 3.1039223880953593e-06, - "loss": 1.3087, - "step": 1307 - }, - { - "epoch": 1.2712703371607093, - "grad_norm": 0.412109375, - "learning_rate": 3.1014403481968936e-06, - "loss": 1.3124, - "step": 1308 - }, - { - "epoch": 1.2722440865013998, - "grad_norm": 0.423828125, - "learning_rate": 3.098957678889882e-06, - "loss": 1.3306, - "step": 1309 - }, - { - "epoch": 1.2732178358420903, - "grad_norm": 0.439453125, - "learning_rate": 3.0964743827724453e-06, - "loss": 1.3571, - "step": 1310 - }, - { - "epoch": 1.274191585182781, - "grad_norm": 0.4296875, - "learning_rate": 3.093990462443357e-06, - "loss": 1.3433, - "step": 1311 - }, - { - "epoch": 1.2751653345234715, - "grad_norm": 0.408203125, - "learning_rate": 3.0915059205020443e-06, - "loss": 1.3189, - "step": 1312 - }, - { - "epoch": 1.276139083864162, - "grad_norm": 0.41015625, - "learning_rate": 3.0890207595485856e-06, - "loss": 1.3024, - "step": 1313 - }, - { - "epoch": 1.2771128332048525, - "grad_norm": 0.416015625, - "learning_rate": 3.086534982183707e-06, - "loss": 1.3039, - "step": 1314 - }, - { - "epoch": 1.278086582545543, - "grad_norm": 0.416015625, - "learning_rate": 3.0840485910087797e-06, - "loss": 1.3412, - "step": 1315 - }, - { - "epoch": 1.2790603318862337, - "grad_norm": 0.423828125, - "learning_rate": 3.081561588625817e-06, - "loss": 1.3334, - "step": 1316 - }, - { - "epoch": 1.2800340812269242, - "grad_norm": 0.41015625, - "learning_rate": 3.079073977637472e-06, - "loss": 1.33, - "step": 1317 - }, - { - "epoch": 1.2810078305676147, - "grad_norm": 0.416015625, - "learning_rate": 3.0765857606470353e-06, - "loss": 1.329, - "step": 1318 - }, - { - "epoch": 1.2819815799083052, - "grad_norm": 0.4375, - "learning_rate": 3.0740969402584287e-06, - "loss": 1.3007, - "step": 1319 - }, - { - "epoch": 1.282955329248996, - "grad_norm": 0.41796875, - "learning_rate": 3.0716075190762103e-06, - "loss": 1.3283, - "step": 1320 - }, - { - "epoch": 1.2839290785896864, - "grad_norm": 0.41015625, - "learning_rate": 3.0691174997055632e-06, - "loss": 1.3142, - "step": 1321 - }, - { - "epoch": 1.284902827930377, - "grad_norm": 0.431640625, - "learning_rate": 3.0666268847522984e-06, - "loss": 1.3155, - "step": 1322 - }, - { - "epoch": 1.2858765772710674, - "grad_norm": 0.416015625, - "learning_rate": 3.064135676822849e-06, - "loss": 1.3017, - "step": 1323 - }, - { - "epoch": 1.286850326611758, - "grad_norm": 0.40625, - "learning_rate": 3.061643878524269e-06, - "loss": 1.3107, - "step": 1324 - }, - { - "epoch": 1.2878240759524486, - "grad_norm": 0.4140625, - "learning_rate": 3.059151492464231e-06, - "loss": 1.3271, - "step": 1325 - }, - { - "epoch": 1.2887978252931391, - "grad_norm": 0.435546875, - "learning_rate": 3.0566585212510213e-06, - "loss": 1.32, - "step": 1326 - }, - { - "epoch": 1.2897715746338296, - "grad_norm": 0.41796875, - "learning_rate": 3.05416496749354e-06, - "loss": 1.3155, - "step": 1327 - }, - { - "epoch": 1.2907453239745204, - "grad_norm": 0.431640625, - "learning_rate": 3.051670833801296e-06, - "loss": 1.3437, - "step": 1328 - }, - { - "epoch": 1.2917190733152109, - "grad_norm": 0.412109375, - "learning_rate": 3.0491761227844054e-06, - "loss": 1.313, - "step": 1329 - }, - { - "epoch": 1.2926928226559014, - "grad_norm": 0.42578125, - "learning_rate": 3.0466808370535878e-06, - "loss": 1.3255, - "step": 1330 - }, - { - "epoch": 1.2936665719965919, - "grad_norm": 0.4296875, - "learning_rate": 3.044184979220165e-06, - "loss": 1.3475, - "step": 1331 - }, - { - "epoch": 1.2946403213372824, - "grad_norm": 0.421875, - "learning_rate": 3.041688551896057e-06, - "loss": 1.3305, - "step": 1332 - }, - { - "epoch": 1.2956140706779729, - "grad_norm": 0.408203125, - "learning_rate": 3.039191557693782e-06, - "loss": 1.2907, - "step": 1333 - }, - { - "epoch": 1.2965878200186636, - "grad_norm": 0.404296875, - "learning_rate": 3.0366939992264467e-06, - "loss": 1.3218, - "step": 1334 - }, - { - "epoch": 1.297561569359354, - "grad_norm": 0.435546875, - "learning_rate": 3.0341958791077524e-06, - "loss": 1.3171, - "step": 1335 - }, - { - "epoch": 1.2985353187000446, - "grad_norm": 0.439453125, - "learning_rate": 3.031697199951987e-06, - "loss": 1.3191, - "step": 1336 - }, - { - "epoch": 1.2995090680407353, - "grad_norm": 0.43359375, - "learning_rate": 3.0291979643740235e-06, - "loss": 1.3216, - "step": 1337 - }, - { - "epoch": 1.3004828173814258, - "grad_norm": 0.43359375, - "learning_rate": 3.026698174989316e-06, - "loss": 1.3069, - "step": 1338 - }, - { - "epoch": 1.3014565667221163, - "grad_norm": 0.419921875, - "learning_rate": 3.0241978344139e-06, - "loss": 1.3342, - "step": 1339 - }, - { - "epoch": 1.3024303160628068, - "grad_norm": 0.41015625, - "learning_rate": 3.021696945264387e-06, - "loss": 1.3129, - "step": 1340 - }, - { - "epoch": 1.3034040654034973, - "grad_norm": 0.423828125, - "learning_rate": 3.019195510157962e-06, - "loss": 1.3072, - "step": 1341 - }, - { - "epoch": 1.304377814744188, - "grad_norm": 0.423828125, - "learning_rate": 3.0166935317123824e-06, - "loss": 1.3239, - "step": 1342 - }, - { - "epoch": 1.3053515640848785, - "grad_norm": 0.423828125, - "learning_rate": 3.0141910125459744e-06, - "loss": 1.3281, - "step": 1343 - }, - { - "epoch": 1.306325313425569, - "grad_norm": 0.421875, - "learning_rate": 3.011687955277628e-06, - "loss": 1.3332, - "step": 1344 - }, - { - "epoch": 1.3072990627662595, - "grad_norm": 0.41796875, - "learning_rate": 3.0091843625267975e-06, - "loss": 1.3286, - "step": 1345 - }, - { - "epoch": 1.3082728121069502, - "grad_norm": 0.41015625, - "learning_rate": 3.0066802369134994e-06, - "loss": 1.3131, - "step": 1346 - }, - { - "epoch": 1.3092465614476407, - "grad_norm": 0.431640625, - "learning_rate": 3.0041755810583057e-06, - "loss": 1.3039, - "step": 1347 - }, - { - "epoch": 1.3102203107883312, - "grad_norm": 0.443359375, - "learning_rate": 3.0016703975823424e-06, - "loss": 1.3248, - "step": 1348 - }, - { - "epoch": 1.3111940601290217, - "grad_norm": 0.41796875, - "learning_rate": 2.9991646891072913e-06, - "loss": 1.2892, - "step": 1349 - }, - { - "epoch": 1.3121678094697122, - "grad_norm": 0.41015625, - "learning_rate": 2.99665845825538e-06, - "loss": 1.3322, - "step": 1350 - }, - { - "epoch": 1.313141558810403, - "grad_norm": 0.41796875, - "learning_rate": 2.994151707649384e-06, - "loss": 1.3231, - "step": 1351 - }, - { - "epoch": 1.3141153081510935, - "grad_norm": 0.41015625, - "learning_rate": 2.9916444399126245e-06, - "loss": 1.3194, - "step": 1352 - }, - { - "epoch": 1.315089057491784, - "grad_norm": 0.431640625, - "learning_rate": 2.98913665766896e-06, - "loss": 1.3136, - "step": 1353 - }, - { - "epoch": 1.3160628068324747, - "grad_norm": 0.41796875, - "learning_rate": 2.986628363542792e-06, - "loss": 1.3007, - "step": 1354 - }, - { - "epoch": 1.3170365561731652, - "grad_norm": 0.423828125, - "learning_rate": 2.9841195601590526e-06, - "loss": 1.3282, - "step": 1355 - }, - { - "epoch": 1.3180103055138557, - "grad_norm": 0.427734375, - "learning_rate": 2.981610250143213e-06, - "loss": 1.3295, - "step": 1356 - }, - { - "epoch": 1.3189840548545462, - "grad_norm": 0.41796875, - "learning_rate": 2.9791004361212687e-06, - "loss": 1.3109, - "step": 1357 - }, - { - "epoch": 1.3199578041952367, - "grad_norm": 0.431640625, - "learning_rate": 2.976590120719746e-06, - "loss": 1.3077, - "step": 1358 - }, - { - "epoch": 1.3209315535359272, - "grad_norm": 0.421875, - "learning_rate": 2.9740793065656953e-06, - "loss": 1.3384, - "step": 1359 - }, - { - "epoch": 1.321905302876618, - "grad_norm": 0.41796875, - "learning_rate": 2.9715679962866885e-06, - "loss": 1.2938, - "step": 1360 - }, - { - "epoch": 1.3228790522173084, - "grad_norm": 0.42578125, - "learning_rate": 2.9690561925108168e-06, - "loss": 1.3282, - "step": 1361 - }, - { - "epoch": 1.323852801557999, - "grad_norm": 0.431640625, - "learning_rate": 2.966543897866688e-06, - "loss": 1.3424, - "step": 1362 - }, - { - "epoch": 1.3248265508986896, - "grad_norm": 0.4296875, - "learning_rate": 2.9640311149834234e-06, - "loss": 1.3368, - "step": 1363 - }, - { - "epoch": 1.3258003002393801, - "grad_norm": 0.42578125, - "learning_rate": 2.9615178464906558e-06, - "loss": 1.318, - "step": 1364 - }, - { - "epoch": 1.3267740495800706, - "grad_norm": 0.43359375, - "learning_rate": 2.959004095018525e-06, - "loss": 1.3023, - "step": 1365 - }, - { - "epoch": 1.3277477989207611, - "grad_norm": 0.447265625, - "learning_rate": 2.9564898631976786e-06, - "loss": 1.306, - "step": 1366 - }, - { - "epoch": 1.3287215482614516, - "grad_norm": 0.421875, - "learning_rate": 2.953975153659263e-06, - "loss": 1.3101, - "step": 1367 - }, - { - "epoch": 1.3296952976021421, - "grad_norm": 0.421875, - "learning_rate": 2.9514599690349287e-06, - "loss": 1.3358, - "step": 1368 - }, - { - "epoch": 1.3306690469428328, - "grad_norm": 0.416015625, - "learning_rate": 2.9489443119568204e-06, - "loss": 1.3106, - "step": 1369 - }, - { - "epoch": 1.3316427962835233, - "grad_norm": 0.404296875, - "learning_rate": 2.9464281850575795e-06, - "loss": 1.3143, - "step": 1370 - }, - { - "epoch": 1.3326165456242138, - "grad_norm": 0.408203125, - "learning_rate": 2.9439115909703366e-06, - "loss": 1.3247, - "step": 1371 - }, - { - "epoch": 1.3335902949649046, - "grad_norm": 0.43359375, - "learning_rate": 2.9413945323287136e-06, - "loss": 1.3274, - "step": 1372 - }, - { - "epoch": 1.334564044305595, - "grad_norm": 0.447265625, - "learning_rate": 2.938877011766817e-06, - "loss": 1.3221, - "step": 1373 - }, - { - "epoch": 1.3355377936462856, - "grad_norm": 0.435546875, - "learning_rate": 2.936359031919237e-06, - "loss": 1.3119, - "step": 1374 - }, - { - "epoch": 1.336511542986976, - "grad_norm": 0.42578125, - "learning_rate": 2.9338405954210446e-06, - "loss": 1.3163, - "step": 1375 - }, - { - "epoch": 1.3374852923276666, - "grad_norm": 0.41796875, - "learning_rate": 2.9313217049077887e-06, - "loss": 1.3262, - "step": 1376 - }, - { - "epoch": 1.3384590416683573, - "grad_norm": 0.408203125, - "learning_rate": 2.928802363015494e-06, - "loss": 1.3322, - "step": 1377 - }, - { - "epoch": 1.3394327910090478, - "grad_norm": 0.4140625, - "learning_rate": 2.9262825723806563e-06, - "loss": 1.3207, - "step": 1378 - }, - { - "epoch": 1.3404065403497383, - "grad_norm": 0.43359375, - "learning_rate": 2.9237623356402423e-06, - "loss": 1.33, - "step": 1379 - }, - { - "epoch": 1.3413802896904288, - "grad_norm": 0.44140625, - "learning_rate": 2.9212416554316837e-06, - "loss": 1.3286, - "step": 1380 - }, - { - "epoch": 1.3423540390311195, - "grad_norm": 0.412109375, - "learning_rate": 2.9187205343928783e-06, - "loss": 1.3208, - "step": 1381 - }, - { - "epoch": 1.34332778837181, - "grad_norm": 0.44140625, - "learning_rate": 2.916198975162184e-06, - "loss": 1.3396, - "step": 1382 - }, - { - "epoch": 1.3443015377125005, - "grad_norm": 0.412109375, - "learning_rate": 2.913676980378418e-06, - "loss": 1.3267, - "step": 1383 - }, - { - "epoch": 1.345275287053191, - "grad_norm": 0.4140625, - "learning_rate": 2.9111545526808528e-06, - "loss": 1.3116, - "step": 1384 - }, - { - "epoch": 1.3462490363938815, - "grad_norm": 0.4296875, - "learning_rate": 2.9086316947092135e-06, - "loss": 1.3048, - "step": 1385 - }, - { - "epoch": 1.3472227857345722, - "grad_norm": 0.447265625, - "learning_rate": 2.9061084091036767e-06, - "loss": 1.3175, - "step": 1386 - }, - { - "epoch": 1.3481965350752627, - "grad_norm": 0.4453125, - "learning_rate": 2.9035846985048666e-06, - "loss": 1.3457, - "step": 1387 - }, - { - "epoch": 1.3491702844159532, - "grad_norm": 0.423828125, - "learning_rate": 2.90106056555385e-06, - "loss": 1.3214, - "step": 1388 - }, - { - "epoch": 1.350144033756644, - "grad_norm": 0.41796875, - "learning_rate": 2.8985360128921388e-06, - "loss": 1.316, - "step": 1389 - }, - { - "epoch": 1.3511177830973344, - "grad_norm": 0.4140625, - "learning_rate": 2.8960110431616806e-06, - "loss": 1.3004, - "step": 1390 - }, - { - "epoch": 1.352091532438025, - "grad_norm": 0.427734375, - "learning_rate": 2.8934856590048625e-06, - "loss": 1.3256, - "step": 1391 - }, - { - "epoch": 1.3530652817787154, - "grad_norm": 0.4609375, - "learning_rate": 2.8909598630645042e-06, - "loss": 1.3187, - "step": 1392 - }, - { - "epoch": 1.354039031119406, - "grad_norm": 0.423828125, - "learning_rate": 2.8884336579838556e-06, - "loss": 1.3232, - "step": 1393 - }, - { - "epoch": 1.3550127804600964, - "grad_norm": 0.4375, - "learning_rate": 2.8859070464065963e-06, - "loss": 1.3082, - "step": 1394 - }, - { - "epoch": 1.3559865298007872, - "grad_norm": 0.45703125, - "learning_rate": 2.88338003097683e-06, - "loss": 1.3194, - "step": 1395 - }, - { - "epoch": 1.3569602791414777, - "grad_norm": 0.4375, - "learning_rate": 2.880852614339083e-06, - "loss": 1.3119, - "step": 1396 - }, - { - "epoch": 1.3579340284821682, - "grad_norm": 0.404296875, - "learning_rate": 2.878324799138303e-06, - "loss": 1.2957, - "step": 1397 - }, - { - "epoch": 1.3589077778228589, - "grad_norm": 0.408203125, - "learning_rate": 2.8757965880198523e-06, - "loss": 1.3294, - "step": 1398 - }, - { - "epoch": 1.3598815271635494, - "grad_norm": 0.4296875, - "learning_rate": 2.8732679836295103e-06, - "loss": 1.3227, - "step": 1399 - }, - { - "epoch": 1.3608552765042399, - "grad_norm": 0.4375, - "learning_rate": 2.870738988613467e-06, - "loss": 1.3249, - "step": 1400 - }, - { - "epoch": 1.3618290258449304, - "grad_norm": 0.423828125, - "learning_rate": 2.868209605618319e-06, - "loss": 1.3452, - "step": 1401 - }, - { - "epoch": 1.3628027751856209, - "grad_norm": 0.4140625, - "learning_rate": 2.8656798372910714e-06, - "loss": 1.3414, - "step": 1402 - }, - { - "epoch": 1.3637765245263116, - "grad_norm": 0.427734375, - "learning_rate": 2.863149686279133e-06, - "loss": 1.3234, - "step": 1403 - }, - { - "epoch": 1.364750273867002, - "grad_norm": 0.43359375, - "learning_rate": 2.8606191552303103e-06, - "loss": 1.3342, - "step": 1404 - }, - { - "epoch": 1.3657240232076926, - "grad_norm": 0.41015625, - "learning_rate": 2.858088246792811e-06, - "loss": 1.3379, - "step": 1405 - }, - { - "epoch": 1.366697772548383, - "grad_norm": 0.4296875, - "learning_rate": 2.855556963615234e-06, - "loss": 1.3388, - "step": 1406 - }, - { - "epoch": 1.3676715218890738, - "grad_norm": 0.439453125, - "learning_rate": 2.853025308346574e-06, - "loss": 1.331, - "step": 1407 - }, - { - "epoch": 1.3686452712297643, - "grad_norm": 0.462890625, - "learning_rate": 2.850493283636212e-06, - "loss": 1.3115, - "step": 1408 - }, - { - "epoch": 1.3696190205704548, - "grad_norm": 0.40625, - "learning_rate": 2.847960892133918e-06, - "loss": 1.3168, - "step": 1409 - }, - { - "epoch": 1.3705927699111453, - "grad_norm": 0.419921875, - "learning_rate": 2.845428136489844e-06, - "loss": 1.319, - "step": 1410 - }, - { - "epoch": 1.3715665192518358, - "grad_norm": 0.419921875, - "learning_rate": 2.842895019354525e-06, - "loss": 1.347, - "step": 1411 - }, - { - "epoch": 1.3725402685925265, - "grad_norm": 0.416015625, - "learning_rate": 2.8403615433788722e-06, - "loss": 1.3205, - "step": 1412 - }, - { - "epoch": 1.373514017933217, - "grad_norm": 0.427734375, - "learning_rate": 2.837827711214173e-06, - "loss": 1.306, - "step": 1413 - }, - { - "epoch": 1.3744877672739075, - "grad_norm": 0.412109375, - "learning_rate": 2.8352935255120893e-06, - "loss": 1.3281, - "step": 1414 - }, - { - "epoch": 1.3754615166145983, - "grad_norm": 0.416015625, - "learning_rate": 2.8327589889246513e-06, - "loss": 1.3142, - "step": 1415 - }, - { - "epoch": 1.3764352659552888, - "grad_norm": 0.41796875, - "learning_rate": 2.8302241041042564e-06, - "loss": 1.3356, - "step": 1416 - }, - { - "epoch": 1.3774090152959793, - "grad_norm": 0.419921875, - "learning_rate": 2.8276888737036657e-06, - "loss": 1.3273, - "step": 1417 - }, - { - "epoch": 1.3783827646366698, - "grad_norm": 0.41015625, - "learning_rate": 2.8251533003760044e-06, - "loss": 1.3069, - "step": 1418 - }, - { - "epoch": 1.3793565139773603, - "grad_norm": 0.419921875, - "learning_rate": 2.822617386774754e-06, - "loss": 1.3231, - "step": 1419 - }, - { - "epoch": 1.3803302633180508, - "grad_norm": 0.43359375, - "learning_rate": 2.8200811355537543e-06, - "loss": 1.3291, - "step": 1420 - }, - { - "epoch": 1.3813040126587415, - "grad_norm": 0.408203125, - "learning_rate": 2.817544549367197e-06, - "loss": 1.3474, - "step": 1421 - }, - { - "epoch": 1.382277761999432, - "grad_norm": 0.408203125, - "learning_rate": 2.8150076308696247e-06, - "loss": 1.3157, - "step": 1422 - }, - { - "epoch": 1.3832515113401225, - "grad_norm": 0.412109375, - "learning_rate": 2.812470382715927e-06, - "loss": 1.3027, - "step": 1423 - }, - { - "epoch": 1.3842252606808132, - "grad_norm": 0.41015625, - "learning_rate": 2.8099328075613403e-06, - "loss": 1.3373, - "step": 1424 - }, - { - "epoch": 1.3851990100215037, - "grad_norm": 0.419921875, - "learning_rate": 2.807394908061441e-06, - "loss": 1.3396, - "step": 1425 - }, - { - "epoch": 1.3861727593621942, - "grad_norm": 0.4140625, - "learning_rate": 2.8048566868721473e-06, - "loss": 1.3278, - "step": 1426 - }, - { - "epoch": 1.3871465087028847, - "grad_norm": 0.416015625, - "learning_rate": 2.802318146649713e-06, - "loss": 1.2906, - "step": 1427 - }, - { - "epoch": 1.3881202580435752, - "grad_norm": 0.416015625, - "learning_rate": 2.7997792900507236e-06, - "loss": 1.3173, - "step": 1428 - }, - { - "epoch": 1.389094007384266, - "grad_norm": 0.419921875, - "learning_rate": 2.7972401197321e-06, - "loss": 1.3236, - "step": 1429 - }, - { - "epoch": 1.3900677567249564, - "grad_norm": 0.42578125, - "learning_rate": 2.7947006383510868e-06, - "loss": 1.3112, - "step": 1430 - }, - { - "epoch": 1.391041506065647, - "grad_norm": 0.41015625, - "learning_rate": 2.7921608485652585e-06, - "loss": 1.3085, - "step": 1431 - }, - { - "epoch": 1.3920152554063374, - "grad_norm": 0.40234375, - "learning_rate": 2.7896207530325093e-06, - "loss": 1.3015, - "step": 1432 - }, - { - "epoch": 1.3929890047470281, - "grad_norm": 0.412109375, - "learning_rate": 2.7870803544110546e-06, - "loss": 1.3219, - "step": 1433 - }, - { - "epoch": 1.3939627540877186, - "grad_norm": 0.4296875, - "learning_rate": 2.7845396553594267e-06, - "loss": 1.3122, - "step": 1434 - }, - { - "epoch": 1.3949365034284091, - "grad_norm": 0.42578125, - "learning_rate": 2.7819986585364715e-06, - "loss": 1.2978, - "step": 1435 - }, - { - "epoch": 1.3959102527690996, - "grad_norm": 0.47265625, - "learning_rate": 2.7794573666013485e-06, - "loss": 1.3255, - "step": 1436 - }, - { - "epoch": 1.3968840021097901, - "grad_norm": 0.4140625, - "learning_rate": 2.7769157822135246e-06, - "loss": 1.3289, - "step": 1437 - }, - { - "epoch": 1.3978577514504809, - "grad_norm": 0.40625, - "learning_rate": 2.7743739080327724e-06, - "loss": 1.2938, - "step": 1438 - }, - { - "epoch": 1.3988315007911714, - "grad_norm": 0.421875, - "learning_rate": 2.771831746719169e-06, - "loss": 1.3264, - "step": 1439 - }, - { - "epoch": 1.3998052501318619, - "grad_norm": 0.404296875, - "learning_rate": 2.769289300933091e-06, - "loss": 1.3336, - "step": 1440 - }, - { - "epoch": 1.4007789994725526, - "grad_norm": 0.404296875, - "learning_rate": 2.7667465733352133e-06, - "loss": 1.317, - "step": 1441 - }, - { - "epoch": 1.401752748813243, - "grad_norm": 0.408203125, - "learning_rate": 2.7642035665865065e-06, - "loss": 1.3164, - "step": 1442 - }, - { - "epoch": 1.4027264981539336, - "grad_norm": 0.42578125, - "learning_rate": 2.76166028334823e-06, - "loss": 1.3167, - "step": 1443 - }, - { - "epoch": 1.403700247494624, - "grad_norm": 0.427734375, - "learning_rate": 2.7591167262819356e-06, - "loss": 1.3123, - "step": 1444 - }, - { - "epoch": 1.4046739968353146, - "grad_norm": 0.404296875, - "learning_rate": 2.7565728980494622e-06, - "loss": 1.3292, - "step": 1445 - }, - { - "epoch": 1.405647746176005, - "grad_norm": 0.42578125, - "learning_rate": 2.754028801312931e-06, - "loss": 1.325, - "step": 1446 - }, - { - "epoch": 1.4066214955166958, - "grad_norm": 0.416015625, - "learning_rate": 2.7514844387347434e-06, - "loss": 1.3307, - "step": 1447 - }, - { - "epoch": 1.4075952448573863, - "grad_norm": 0.400390625, - "learning_rate": 2.7489398129775803e-06, - "loss": 1.3054, - "step": 1448 - }, - { - "epoch": 1.4085689941980768, - "grad_norm": 0.404296875, - "learning_rate": 2.7463949267043978e-06, - "loss": 1.3235, - "step": 1449 - }, - { - "epoch": 1.4095427435387675, - "grad_norm": 0.40625, - "learning_rate": 2.7438497825784242e-06, - "loss": 1.3009, - "step": 1450 - }, - { - "epoch": 1.410516492879458, - "grad_norm": 0.408203125, - "learning_rate": 2.74130438326316e-06, - "loss": 1.3134, - "step": 1451 - }, - { - "epoch": 1.4114902422201485, - "grad_norm": 0.41015625, - "learning_rate": 2.7387587314223673e-06, - "loss": 1.3411, - "step": 1452 - }, - { - "epoch": 1.412463991560839, - "grad_norm": 0.408203125, - "learning_rate": 2.7362128297200784e-06, - "loss": 1.3121, - "step": 1453 - }, - { - "epoch": 1.4134377409015295, - "grad_norm": 0.3984375, - "learning_rate": 2.7336666808205844e-06, - "loss": 1.3225, - "step": 1454 - }, - { - "epoch": 1.41441149024222, - "grad_norm": 0.404296875, - "learning_rate": 2.7311202873884345e-06, - "loss": 1.31, - "step": 1455 - }, - { - "epoch": 1.4153852395829107, - "grad_norm": 0.41015625, - "learning_rate": 2.7285736520884355e-06, - "loss": 1.3111, - "step": 1456 - }, - { - "epoch": 1.4163589889236012, - "grad_norm": 0.41015625, - "learning_rate": 2.726026777585645e-06, - "loss": 1.3287, - "step": 1457 - }, - { - "epoch": 1.4173327382642917, - "grad_norm": 0.4140625, - "learning_rate": 2.723479666545373e-06, - "loss": 1.3063, - "step": 1458 - }, - { - "epoch": 1.4183064876049825, - "grad_norm": 0.40625, - "learning_rate": 2.7209323216331774e-06, - "loss": 1.3414, - "step": 1459 - }, - { - "epoch": 1.419280236945673, - "grad_norm": 0.4296875, - "learning_rate": 2.7183847455148594e-06, - "loss": 1.3401, - "step": 1460 - }, - { - "epoch": 1.4202539862863635, - "grad_norm": 0.40625, - "learning_rate": 2.715836940856461e-06, - "loss": 1.3144, - "step": 1461 - }, - { - "epoch": 1.421227735627054, - "grad_norm": 0.400390625, - "learning_rate": 2.7132889103242664e-06, - "loss": 1.322, - "step": 1462 - }, - { - "epoch": 1.4222014849677445, - "grad_norm": 0.4140625, - "learning_rate": 2.7107406565847943e-06, - "loss": 1.3164, - "step": 1463 - }, - { - "epoch": 1.4231752343084352, - "grad_norm": 0.419921875, - "learning_rate": 2.708192182304798e-06, - "loss": 1.333, - "step": 1464 - }, - { - "epoch": 1.4241489836491257, - "grad_norm": 0.412109375, - "learning_rate": 2.70564349015126e-06, - "loss": 1.3385, - "step": 1465 - }, - { - "epoch": 1.4251227329898162, - "grad_norm": 0.400390625, - "learning_rate": 2.7030945827913935e-06, - "loss": 1.312, - "step": 1466 - }, - { - "epoch": 1.4260964823305067, - "grad_norm": 0.3984375, - "learning_rate": 2.700545462892633e-06, - "loss": 1.3395, - "step": 1467 - }, - { - "epoch": 1.4270702316711974, - "grad_norm": 0.4140625, - "learning_rate": 2.6979961331226386e-06, - "loss": 1.3315, - "step": 1468 - }, - { - "epoch": 1.428043981011888, - "grad_norm": 0.404296875, - "learning_rate": 2.6954465961492892e-06, - "loss": 1.306, - "step": 1469 - }, - { - "epoch": 1.4290177303525784, - "grad_norm": 0.408203125, - "learning_rate": 2.692896854640681e-06, - "loss": 1.3272, - "step": 1470 - }, - { - "epoch": 1.429991479693269, - "grad_norm": 0.416015625, - "learning_rate": 2.6903469112651233e-06, - "loss": 1.3382, - "step": 1471 - }, - { - "epoch": 1.4309652290339594, - "grad_norm": 0.416015625, - "learning_rate": 2.687796768691136e-06, - "loss": 1.3116, - "step": 1472 - }, - { - "epoch": 1.4319389783746501, - "grad_norm": 0.412109375, - "learning_rate": 2.685246429587449e-06, - "loss": 1.3214, - "step": 1473 - }, - { - "epoch": 1.4329127277153406, - "grad_norm": 0.40234375, - "learning_rate": 2.6826958966229988e-06, - "loss": 1.3184, - "step": 1474 - }, - { - "epoch": 1.4338864770560311, - "grad_norm": 0.40625, - "learning_rate": 2.680145172466921e-06, - "loss": 1.3083, - "step": 1475 - }, - { - "epoch": 1.4348602263967218, - "grad_norm": 0.392578125, - "learning_rate": 2.677594259788555e-06, - "loss": 1.3113, - "step": 1476 - }, - { - "epoch": 1.4358339757374123, - "grad_norm": 0.41796875, - "learning_rate": 2.6750431612574366e-06, - "loss": 1.335, - "step": 1477 - }, - { - "epoch": 1.4368077250781028, - "grad_norm": 0.423828125, - "learning_rate": 2.6724918795432942e-06, - "loss": 1.3067, - "step": 1478 - }, - { - "epoch": 1.4377814744187933, - "grad_norm": 0.3984375, - "learning_rate": 2.6699404173160496e-06, - "loss": 1.3199, - "step": 1479 - }, - { - "epoch": 1.4387552237594838, - "grad_norm": 0.40234375, - "learning_rate": 2.667388777245814e-06, - "loss": 1.2809, - "step": 1480 - }, - { - "epoch": 1.4397289731001743, - "grad_norm": 0.4140625, - "learning_rate": 2.664836962002883e-06, - "loss": 1.3153, - "step": 1481 - }, - { - "epoch": 1.440702722440865, - "grad_norm": 0.40234375, - "learning_rate": 2.662284974257738e-06, - "loss": 1.3068, - "step": 1482 - }, - { - "epoch": 1.4416764717815556, - "grad_norm": 0.40234375, - "learning_rate": 2.659732816681037e-06, - "loss": 1.3182, - "step": 1483 - }, - { - "epoch": 1.442650221122246, - "grad_norm": 0.41796875, - "learning_rate": 2.6571804919436195e-06, - "loss": 1.3285, - "step": 1484 - }, - { - "epoch": 1.4436239704629368, - "grad_norm": 0.4140625, - "learning_rate": 2.6546280027164985e-06, - "loss": 1.3073, - "step": 1485 - }, - { - "epoch": 1.4445977198036273, - "grad_norm": 0.400390625, - "learning_rate": 2.652075351670858e-06, - "loss": 1.3153, - "step": 1486 - }, - { - "epoch": 1.4455714691443178, - "grad_norm": 0.404296875, - "learning_rate": 2.6495225414780545e-06, - "loss": 1.3037, - "step": 1487 - }, - { - "epoch": 1.4465452184850083, - "grad_norm": 0.40625, - "learning_rate": 2.646969574809608e-06, - "loss": 1.326, - "step": 1488 - }, - { - "epoch": 1.4475189678256988, - "grad_norm": 0.40625, - "learning_rate": 2.644416454337203e-06, - "loss": 1.3135, - "step": 1489 - }, - { - "epoch": 1.4484927171663895, - "grad_norm": 0.41796875, - "learning_rate": 2.6418631827326857e-06, - "loss": 1.3028, - "step": 1490 - }, - { - "epoch": 1.44946646650708, - "grad_norm": 0.4296875, - "learning_rate": 2.639309762668059e-06, - "loss": 1.3188, - "step": 1491 - }, - { - "epoch": 1.4504402158477705, - "grad_norm": 0.4140625, - "learning_rate": 2.636756196815484e-06, - "loss": 1.3191, - "step": 1492 - }, - { - "epoch": 1.451413965188461, - "grad_norm": 0.404296875, - "learning_rate": 2.634202487847271e-06, - "loss": 1.3261, - "step": 1493 - }, - { - "epoch": 1.4523877145291517, - "grad_norm": 0.40234375, - "learning_rate": 2.631648638435883e-06, - "loss": 1.3176, - "step": 1494 - }, - { - "epoch": 1.4533614638698422, - "grad_norm": 0.4140625, - "learning_rate": 2.629094651253927e-06, - "loss": 1.3236, - "step": 1495 - }, - { - "epoch": 1.4543352132105327, - "grad_norm": 0.400390625, - "learning_rate": 2.6265405289741567e-06, - "loss": 1.3197, - "step": 1496 - }, - { - "epoch": 1.4553089625512232, - "grad_norm": 0.404296875, - "learning_rate": 2.6239862742694668e-06, - "loss": 1.3219, - "step": 1497 - }, - { - "epoch": 1.4562827118919137, - "grad_norm": 0.41796875, - "learning_rate": 2.621431889812888e-06, - "loss": 1.3157, - "step": 1498 - }, - { - "epoch": 1.4572564612326044, - "grad_norm": 0.416015625, - "learning_rate": 2.6188773782775917e-06, - "loss": 1.318, - "step": 1499 - }, - { - "epoch": 1.458230210573295, - "grad_norm": 0.40625, - "learning_rate": 2.616322742336877e-06, - "loss": 1.3216, - "step": 1500 - }, - { - "epoch": 1.4592039599139854, - "grad_norm": 0.416015625, - "learning_rate": 2.6137679846641765e-06, - "loss": 1.3314, - "step": 1501 - }, - { - "epoch": 1.4601777092546762, - "grad_norm": 0.412109375, - "learning_rate": 2.6112131079330494e-06, - "loss": 1.3215, - "step": 1502 - }, - { - "epoch": 1.4611514585953667, - "grad_norm": 0.39453125, - "learning_rate": 2.6086581148171784e-06, - "loss": 1.3238, - "step": 1503 - }, - { - "epoch": 1.4621252079360572, - "grad_norm": 0.427734375, - "learning_rate": 2.606103007990371e-06, - "loss": 1.3354, - "step": 1504 - }, - { - "epoch": 1.4630989572767477, - "grad_norm": 0.41015625, - "learning_rate": 2.6035477901265506e-06, - "loss": 1.325, - "step": 1505 - }, - { - "epoch": 1.4640727066174382, - "grad_norm": 0.4140625, - "learning_rate": 2.6009924638997566e-06, - "loss": 1.3116, - "step": 1506 - }, - { - "epoch": 1.4650464559581287, - "grad_norm": 0.41015625, - "learning_rate": 2.5984370319841444e-06, - "loss": 1.3227, - "step": 1507 - }, - { - "epoch": 1.4660202052988194, - "grad_norm": 0.4140625, - "learning_rate": 2.595881497053979e-06, - "loss": 1.3134, - "step": 1508 - }, - { - "epoch": 1.4669939546395099, - "grad_norm": 0.40234375, - "learning_rate": 2.593325861783632e-06, - "loss": 1.3098, - "step": 1509 - }, - { - "epoch": 1.4679677039802004, - "grad_norm": 0.40234375, - "learning_rate": 2.590770128847582e-06, - "loss": 1.3104, - "step": 1510 - }, - { - "epoch": 1.468941453320891, - "grad_norm": 0.412109375, - "learning_rate": 2.5882143009204074e-06, - "loss": 1.3373, - "step": 1511 - }, - { - "epoch": 1.4699152026615816, - "grad_norm": 0.404296875, - "learning_rate": 2.585658380676788e-06, - "loss": 1.3148, - "step": 1512 - }, - { - "epoch": 1.470888952002272, - "grad_norm": 0.408203125, - "learning_rate": 2.5831023707915e-06, - "loss": 1.299, - "step": 1513 - }, - { - "epoch": 1.4718627013429626, - "grad_norm": 0.41015625, - "learning_rate": 2.5805462739394118e-06, - "loss": 1.3312, - "step": 1514 - }, - { - "epoch": 1.472836450683653, - "grad_norm": 0.412109375, - "learning_rate": 2.5779900927954844e-06, - "loss": 1.2993, - "step": 1515 - }, - { - "epoch": 1.4738102000243436, - "grad_norm": 0.41796875, - "learning_rate": 2.575433830034767e-06, - "loss": 1.3354, - "step": 1516 - }, - { - "epoch": 1.4747839493650343, - "grad_norm": 0.408203125, - "learning_rate": 2.572877488332393e-06, - "loss": 1.3343, - "step": 1517 - }, - { - "epoch": 1.4757576987057248, - "grad_norm": 0.3984375, - "learning_rate": 2.57032107036358e-06, - "loss": 1.317, - "step": 1518 - }, - { - "epoch": 1.4767314480464153, - "grad_norm": 0.40625, - "learning_rate": 2.5677645788036245e-06, - "loss": 1.3155, - "step": 1519 - }, - { - "epoch": 1.477705197387106, - "grad_norm": 0.41796875, - "learning_rate": 2.5652080163278986e-06, - "loss": 1.3329, - "step": 1520 - }, - { - "epoch": 1.4786789467277965, - "grad_norm": 0.427734375, - "learning_rate": 2.5626513856118527e-06, - "loss": 1.3123, - "step": 1521 - }, - { - "epoch": 1.479652696068487, - "grad_norm": 0.40234375, - "learning_rate": 2.5600946893310043e-06, - "loss": 1.329, - "step": 1522 - }, - { - "epoch": 1.4806264454091775, - "grad_norm": 0.40234375, - "learning_rate": 2.557537930160941e-06, - "loss": 1.3235, - "step": 1523 - }, - { - "epoch": 1.481600194749868, - "grad_norm": 0.40625, - "learning_rate": 2.5549811107773175e-06, - "loss": 1.314, - "step": 1524 - }, - { - "epoch": 1.4825739440905588, - "grad_norm": 0.400390625, - "learning_rate": 2.5524242338558498e-06, - "loss": 1.3204, - "step": 1525 - }, - { - "epoch": 1.4835476934312493, - "grad_norm": 0.404296875, - "learning_rate": 2.5498673020723147e-06, - "loss": 1.31, - "step": 1526 - }, - { - "epoch": 1.4845214427719398, - "grad_norm": 0.435546875, - "learning_rate": 2.547310318102548e-06, - "loss": 1.3296, - "step": 1527 - }, - { - "epoch": 1.4854951921126303, - "grad_norm": 0.416015625, - "learning_rate": 2.5447532846224364e-06, - "loss": 1.3187, - "step": 1528 - }, - { - "epoch": 1.486468941453321, - "grad_norm": 0.41796875, - "learning_rate": 2.5421962043079217e-06, - "loss": 1.3136, - "step": 1529 - }, - { - "epoch": 1.4874426907940115, - "grad_norm": 0.416015625, - "learning_rate": 2.539639079834994e-06, - "loss": 1.3299, - "step": 1530 - }, - { - "epoch": 1.488416440134702, - "grad_norm": 0.41015625, - "learning_rate": 2.5370819138796884e-06, - "loss": 1.3279, - "step": 1531 - }, - { - "epoch": 1.4893901894753925, - "grad_norm": 0.4140625, - "learning_rate": 2.534524709118085e-06, - "loss": 1.3395, - "step": 1532 - }, - { - "epoch": 1.490363938816083, - "grad_norm": 0.4375, - "learning_rate": 2.5319674682263033e-06, - "loss": 1.3349, - "step": 1533 - }, - { - "epoch": 1.4913376881567737, - "grad_norm": 0.423828125, - "learning_rate": 2.5294101938805017e-06, - "loss": 1.3386, - "step": 1534 - }, - { - "epoch": 1.4923114374974642, - "grad_norm": 0.42578125, - "learning_rate": 2.5268528887568723e-06, - "loss": 1.3233, - "step": 1535 - }, - { - "epoch": 1.4932851868381547, - "grad_norm": 0.427734375, - "learning_rate": 2.5242955555316396e-06, - "loss": 1.3073, - "step": 1536 - }, - { - "epoch": 1.4942589361788454, - "grad_norm": 0.408203125, - "learning_rate": 2.52173819688106e-06, - "loss": 1.3273, - "step": 1537 - }, - { - "epoch": 1.495232685519536, - "grad_norm": 0.40234375, - "learning_rate": 2.5191808154814124e-06, - "loss": 1.3105, - "step": 1538 - }, - { - "epoch": 1.4962064348602264, - "grad_norm": 0.423828125, - "learning_rate": 2.516623414009003e-06, - "loss": 1.309, - "step": 1539 - }, - { - "epoch": 1.497180184200917, - "grad_norm": 0.40234375, - "learning_rate": 2.514065995140156e-06, - "loss": 1.316, - "step": 1540 - }, - { - "epoch": 1.4981539335416074, - "grad_norm": 0.419921875, - "learning_rate": 2.5115085615512173e-06, - "loss": 1.3237, - "step": 1541 - }, - { - "epoch": 1.499127682882298, - "grad_norm": 0.404296875, - "learning_rate": 2.5089511159185465e-06, - "loss": 1.309, - "step": 1542 - }, - { - "epoch": 1.5001014322229886, - "grad_norm": 0.404296875, - "learning_rate": 2.5063936609185133e-06, - "loss": 1.3211, - "step": 1543 - }, - { - "epoch": 1.5010751815636791, - "grad_norm": 0.4140625, - "learning_rate": 2.503836199227502e-06, - "loss": 1.3328, - "step": 1544 - }, - { - "epoch": 1.5020489309043699, - "grad_norm": 0.419921875, - "learning_rate": 2.5012787335218997e-06, - "loss": 1.3355, - "step": 1545 - }, - { - "epoch": 1.5030226802450604, - "grad_norm": 0.412109375, - "learning_rate": 2.498721266478101e-06, - "loss": 1.3007, - "step": 1546 - }, - { - "epoch": 1.5039964295857509, - "grad_norm": 0.41015625, - "learning_rate": 2.496163800772499e-06, - "loss": 1.308, - "step": 1547 - }, - { - "epoch": 1.5049701789264414, - "grad_norm": 0.40625, - "learning_rate": 2.493606339081487e-06, - "loss": 1.326, - "step": 1548 - }, - { - "epoch": 1.5059439282671319, - "grad_norm": 0.423828125, - "learning_rate": 2.4910488840814543e-06, - "loss": 1.3158, - "step": 1549 - }, - { - "epoch": 1.5069176776078224, - "grad_norm": 0.41015625, - "learning_rate": 2.4884914384487822e-06, - "loss": 1.3075, - "step": 1550 - }, - { - "epoch": 1.5078914269485129, - "grad_norm": 0.408203125, - "learning_rate": 2.4859340048598438e-06, - "loss": 1.3076, - "step": 1551 - }, - { - "epoch": 1.5088651762892036, - "grad_norm": 0.44140625, - "learning_rate": 2.483376585990998e-06, - "loss": 1.317, - "step": 1552 - }, - { - "epoch": 1.509838925629894, - "grad_norm": 0.396484375, - "learning_rate": 2.4808191845185885e-06, - "loss": 1.3139, - "step": 1553 - }, - { - "epoch": 1.5108126749705848, - "grad_norm": 0.408203125, - "learning_rate": 2.4782618031189405e-06, - "loss": 1.3104, - "step": 1554 - }, - { - "epoch": 1.5117864243112753, - "grad_norm": 0.404296875, - "learning_rate": 2.475704444468361e-06, - "loss": 1.3422, - "step": 1555 - }, - { - "epoch": 1.5127601736519658, - "grad_norm": 0.4140625, - "learning_rate": 2.473147111243129e-06, - "loss": 1.3094, - "step": 1556 - }, - { - "epoch": 1.5137339229926563, - "grad_norm": 0.431640625, - "learning_rate": 2.470589806119499e-06, - "loss": 1.3046, - "step": 1557 - }, - { - "epoch": 1.5147076723333468, - "grad_norm": 0.416015625, - "learning_rate": 2.468032531773697e-06, - "loss": 1.3303, - "step": 1558 - }, - { - "epoch": 1.5156814216740373, - "grad_norm": 0.41796875, - "learning_rate": 2.4654752908819153e-06, - "loss": 1.319, - "step": 1559 - }, - { - "epoch": 1.5166551710147278, - "grad_norm": 0.408203125, - "learning_rate": 2.4629180861203116e-06, - "loss": 1.3279, - "step": 1560 - }, - { - "epoch": 1.5176289203554185, - "grad_norm": 0.3984375, - "learning_rate": 2.4603609201650066e-06, - "loss": 1.3235, - "step": 1561 - }, - { - "epoch": 1.518602669696109, - "grad_norm": 0.3984375, - "learning_rate": 2.4578037956920783e-06, - "loss": 1.3155, - "step": 1562 - }, - { - "epoch": 1.5195764190367997, - "grad_norm": 0.419921875, - "learning_rate": 2.455246715377564e-06, - "loss": 1.3035, - "step": 1563 - }, - { - "epoch": 1.5205501683774902, - "grad_norm": 0.421875, - "learning_rate": 2.4526896818974534e-06, - "loss": 1.312, - "step": 1564 - }, - { - "epoch": 1.5215239177181807, - "grad_norm": 0.435546875, - "learning_rate": 2.4501326979276857e-06, - "loss": 1.3306, - "step": 1565 - }, - { - "epoch": 1.5224976670588712, - "grad_norm": 0.419921875, - "learning_rate": 2.447575766144151e-06, - "loss": 1.3141, - "step": 1566 - }, - { - "epoch": 1.5234714163995617, - "grad_norm": 0.41796875, - "learning_rate": 2.4450188892226834e-06, - "loss": 1.3198, - "step": 1567 - }, - { - "epoch": 1.5244451657402522, - "grad_norm": 0.412109375, - "learning_rate": 2.4424620698390597e-06, - "loss": 1.3094, - "step": 1568 - }, - { - "epoch": 1.525418915080943, - "grad_norm": 0.400390625, - "learning_rate": 2.4399053106689965e-06, - "loss": 1.3242, - "step": 1569 - }, - { - "epoch": 1.5263926644216335, - "grad_norm": 0.40625, - "learning_rate": 2.4373486143881477e-06, - "loss": 1.3158, - "step": 1570 - }, - { - "epoch": 1.527366413762324, - "grad_norm": 0.42578125, - "learning_rate": 2.4347919836721014e-06, - "loss": 1.3152, - "step": 1571 - }, - { - "epoch": 1.5283401631030147, - "grad_norm": 0.419921875, - "learning_rate": 2.432235421196376e-06, - "loss": 1.3307, - "step": 1572 - }, - { - "epoch": 1.5293139124437052, - "grad_norm": 0.416015625, - "learning_rate": 2.4296789296364202e-06, - "loss": 1.3083, - "step": 1573 - }, - { - "epoch": 1.5302876617843957, - "grad_norm": 0.4140625, - "learning_rate": 2.427122511667608e-06, - "loss": 1.3176, - "step": 1574 - }, - { - "epoch": 1.5312614111250862, - "grad_norm": 0.40234375, - "learning_rate": 2.4245661699652343e-06, - "loss": 1.3168, - "step": 1575 - }, - { - "epoch": 1.5322351604657767, - "grad_norm": 0.40234375, - "learning_rate": 2.4220099072045164e-06, - "loss": 1.327, - "step": 1576 - }, - { - "epoch": 1.5332089098064672, - "grad_norm": 0.408203125, - "learning_rate": 2.4194537260605895e-06, - "loss": 1.3198, - "step": 1577 - }, - { - "epoch": 1.534182659147158, - "grad_norm": 0.40625, - "learning_rate": 2.4168976292085013e-06, - "loss": 1.3137, - "step": 1578 - }, - { - "epoch": 1.5351564084878484, - "grad_norm": 0.412109375, - "learning_rate": 2.4143416193232123e-06, - "loss": 1.3177, - "step": 1579 - }, - { - "epoch": 1.5361301578285391, - "grad_norm": 0.40234375, - "learning_rate": 2.411785699079593e-06, - "loss": 1.3057, - "step": 1580 - }, - { - "epoch": 1.5371039071692296, - "grad_norm": 0.42578125, - "learning_rate": 2.4092298711524183e-06, - "loss": 1.3536, - "step": 1581 - }, - { - "epoch": 1.5380776565099201, - "grad_norm": 0.40234375, - "learning_rate": 2.406674138216368e-06, - "loss": 1.303, - "step": 1582 - }, - { - "epoch": 1.5390514058506106, - "grad_norm": 0.40234375, - "learning_rate": 2.4041185029460214e-06, - "loss": 1.3182, - "step": 1583 - }, - { - "epoch": 1.5400251551913011, - "grad_norm": 0.3984375, - "learning_rate": 2.4015629680158555e-06, - "loss": 1.3098, - "step": 1584 - }, - { - "epoch": 1.5409989045319916, - "grad_norm": 0.40234375, - "learning_rate": 2.3990075361002447e-06, - "loss": 1.3131, - "step": 1585 - }, - { - "epoch": 1.5419726538726821, - "grad_norm": 0.40625, - "learning_rate": 2.396452209873451e-06, - "loss": 1.3019, - "step": 1586 - }, - { - "epoch": 1.5429464032133728, - "grad_norm": 0.416015625, - "learning_rate": 2.39389699200963e-06, - "loss": 1.3238, - "step": 1587 - }, - { - "epoch": 1.5439201525540633, - "grad_norm": 0.404296875, - "learning_rate": 2.391341885182822e-06, - "loss": 1.3219, - "step": 1588 - }, - { - "epoch": 1.544893901894754, - "grad_norm": 0.40625, - "learning_rate": 2.3887868920669514e-06, - "loss": 1.3393, - "step": 1589 - }, - { - "epoch": 1.5458676512354446, - "grad_norm": 0.404296875, - "learning_rate": 2.3862320153358243e-06, - "loss": 1.3254, - "step": 1590 - }, - { - "epoch": 1.546841400576135, - "grad_norm": 0.419921875, - "learning_rate": 2.3836772576631236e-06, - "loss": 1.3146, - "step": 1591 - }, - { - "epoch": 1.5478151499168256, - "grad_norm": 0.41796875, - "learning_rate": 2.3811226217224087e-06, - "loss": 1.3222, - "step": 1592 - }, - { - "epoch": 1.548788899257516, - "grad_norm": 0.416015625, - "learning_rate": 2.378568110187112e-06, - "loss": 1.3232, - "step": 1593 - }, - { - "epoch": 1.5497626485982066, - "grad_norm": 0.41796875, - "learning_rate": 2.3760137257305336e-06, - "loss": 1.3076, - "step": 1594 - }, - { - "epoch": 1.5507363979388973, - "grad_norm": 0.404296875, - "learning_rate": 2.373459471025843e-06, - "loss": 1.3447, - "step": 1595 - }, - { - "epoch": 1.5517101472795878, - "grad_norm": 0.412109375, - "learning_rate": 2.3709053487460738e-06, - "loss": 1.3181, - "step": 1596 - }, - { - "epoch": 1.5526838966202783, - "grad_norm": 0.41796875, - "learning_rate": 2.368351361564118e-06, - "loss": 1.3174, - "step": 1597 - }, - { - "epoch": 1.553657645960969, - "grad_norm": 0.41796875, - "learning_rate": 2.3657975121527295e-06, - "loss": 1.3236, - "step": 1598 - }, - { - "epoch": 1.5546313953016595, - "grad_norm": 0.41015625, - "learning_rate": 2.3632438031845167e-06, - "loss": 1.3348, - "step": 1599 - }, - { - "epoch": 1.55560514464235, - "grad_norm": 0.408203125, - "learning_rate": 2.3606902373319414e-06, - "loss": 1.3159, - "step": 1600 - }, - { - "epoch": 1.5565788939830405, - "grad_norm": 0.40625, - "learning_rate": 2.358136817267315e-06, - "loss": 1.2833, - "step": 1601 - }, - { - "epoch": 1.557552643323731, - "grad_norm": 0.412109375, - "learning_rate": 2.3555835456627976e-06, - "loss": 1.3318, - "step": 1602 - }, - { - "epoch": 1.5585263926644215, - "grad_norm": 0.408203125, - "learning_rate": 2.3530304251903926e-06, - "loss": 1.2971, - "step": 1603 - }, - { - "epoch": 1.5595001420051122, - "grad_norm": 0.408203125, - "learning_rate": 2.350477458521946e-06, - "loss": 1.3297, - "step": 1604 - }, - { - "epoch": 1.5604738913458027, - "grad_norm": 0.40625, - "learning_rate": 2.347924648329142e-06, - "loss": 1.3151, - "step": 1605 - }, - { - "epoch": 1.5614476406864934, - "grad_norm": 0.421875, - "learning_rate": 2.345371997283502e-06, - "loss": 1.3315, - "step": 1606 - }, - { - "epoch": 1.562421390027184, - "grad_norm": 0.40234375, - "learning_rate": 2.342819508056382e-06, - "loss": 1.3173, - "step": 1607 - }, - { - "epoch": 1.5633951393678744, - "grad_norm": 0.416015625, - "learning_rate": 2.3402671833189646e-06, - "loss": 1.3461, - "step": 1608 - }, - { - "epoch": 1.564368888708565, - "grad_norm": 0.3984375, - "learning_rate": 2.3377150257422637e-06, - "loss": 1.312, - "step": 1609 - }, - { - "epoch": 1.5653426380492554, - "grad_norm": 0.40625, - "learning_rate": 2.3351630379971176e-06, - "loss": 1.3165, - "step": 1610 - }, - { - "epoch": 1.566316387389946, - "grad_norm": 0.400390625, - "learning_rate": 2.332611222754187e-06, - "loss": 1.3003, - "step": 1611 - }, - { - "epoch": 1.5672901367306364, - "grad_norm": 0.392578125, - "learning_rate": 2.330059582683951e-06, - "loss": 1.315, - "step": 1612 - }, - { - "epoch": 1.5682638860713272, - "grad_norm": 0.416015625, - "learning_rate": 2.3275081204567066e-06, - "loss": 1.3279, - "step": 1613 - }, - { - "epoch": 1.5692376354120177, - "grad_norm": 0.4140625, - "learning_rate": 2.3249568387425642e-06, - "loss": 1.306, - "step": 1614 - }, - { - "epoch": 1.5702113847527084, - "grad_norm": 0.40234375, - "learning_rate": 2.322405740211445e-06, - "loss": 1.3177, - "step": 1615 - }, - { - "epoch": 1.5711851340933989, - "grad_norm": 0.400390625, - "learning_rate": 2.319854827533079e-06, - "loss": 1.339, - "step": 1616 - }, - { - "epoch": 1.5721588834340894, - "grad_norm": 0.408203125, - "learning_rate": 2.3173041033770016e-06, - "loss": 1.3263, - "step": 1617 - }, - { - "epoch": 1.5731326327747799, - "grad_norm": 0.40234375, - "learning_rate": 2.314753570412552e-06, - "loss": 1.3069, - "step": 1618 - }, - { - "epoch": 1.5741063821154704, - "grad_norm": 0.396484375, - "learning_rate": 2.312203231308865e-06, - "loss": 1.3169, - "step": 1619 - }, - { - "epoch": 1.5750801314561609, - "grad_norm": 0.40625, - "learning_rate": 2.309653088734878e-06, - "loss": 1.2937, - "step": 1620 - }, - { - "epoch": 1.5760538807968514, - "grad_norm": 0.39453125, - "learning_rate": 2.30710314535932e-06, - "loss": 1.3169, - "step": 1621 - }, - { - "epoch": 1.577027630137542, - "grad_norm": 0.40234375, - "learning_rate": 2.304553403850711e-06, - "loss": 1.2974, - "step": 1622 - }, - { - "epoch": 1.5780013794782326, - "grad_norm": 0.41015625, - "learning_rate": 2.302003866877362e-06, - "loss": 1.3381, - "step": 1623 - }, - { - "epoch": 1.5789751288189233, - "grad_norm": 0.4140625, - "learning_rate": 2.2994545371073677e-06, - "loss": 1.3296, - "step": 1624 - }, - { - "epoch": 1.5799488781596138, - "grad_norm": 0.396484375, - "learning_rate": 2.2969054172086073e-06, - "loss": 1.3238, - "step": 1625 - }, - { - "epoch": 1.5809226275003043, - "grad_norm": 0.412109375, - "learning_rate": 2.29435650984874e-06, - "loss": 1.318, - "step": 1626 - }, - { - "epoch": 1.5818963768409948, - "grad_norm": 0.435546875, - "learning_rate": 2.291807817695202e-06, - "loss": 1.3192, - "step": 1627 - }, - { - "epoch": 1.5828701261816853, - "grad_norm": 0.404296875, - "learning_rate": 2.289259343415206e-06, - "loss": 1.3055, - "step": 1628 - }, - { - "epoch": 1.5838438755223758, - "grad_norm": 0.3984375, - "learning_rate": 2.286711089675735e-06, - "loss": 1.3111, - "step": 1629 - }, - { - "epoch": 1.5848176248630665, - "grad_norm": 0.421875, - "learning_rate": 2.2841630591435403e-06, - "loss": 1.3275, - "step": 1630 - }, - { - "epoch": 1.585791374203757, - "grad_norm": 0.40625, - "learning_rate": 2.2816152544851423e-06, - "loss": 1.3026, - "step": 1631 - }, - { - "epoch": 1.5867651235444478, - "grad_norm": 0.40234375, - "learning_rate": 2.2790676783668235e-06, - "loss": 1.3393, - "step": 1632 - }, - { - "epoch": 1.5877388728851383, - "grad_norm": 0.40234375, - "learning_rate": 2.2765203334546272e-06, - "loss": 1.3204, - "step": 1633 - }, - { - "epoch": 1.5887126222258288, - "grad_norm": 0.41796875, - "learning_rate": 2.2739732224143556e-06, - "loss": 1.3085, - "step": 1634 - }, - { - "epoch": 1.5896863715665193, - "grad_norm": 0.419921875, - "learning_rate": 2.2714263479115658e-06, - "loss": 1.3125, - "step": 1635 - }, - { - "epoch": 1.5906601209072098, - "grad_norm": 0.3984375, - "learning_rate": 2.268879712611566e-06, - "loss": 1.3001, - "step": 1636 - }, - { - "epoch": 1.5916338702479003, - "grad_norm": 0.40625, - "learning_rate": 2.2663333191794165e-06, - "loss": 1.3272, - "step": 1637 - }, - { - "epoch": 1.5926076195885908, - "grad_norm": 0.40234375, - "learning_rate": 2.263787170279922e-06, - "loss": 1.3116, - "step": 1638 - }, - { - "epoch": 1.5935813689292815, - "grad_norm": 0.404296875, - "learning_rate": 2.2612412685776335e-06, - "loss": 1.308, - "step": 1639 - }, - { - "epoch": 1.594555118269972, - "grad_norm": 0.404296875, - "learning_rate": 2.258695616736842e-06, - "loss": 1.3227, - "step": 1640 - }, - { - "epoch": 1.5955288676106627, - "grad_norm": 0.421875, - "learning_rate": 2.256150217421576e-06, - "loss": 1.3295, - "step": 1641 - }, - { - "epoch": 1.5965026169513532, - "grad_norm": 0.41015625, - "learning_rate": 2.2536050732956035e-06, - "loss": 1.3188, - "step": 1642 - }, - { - "epoch": 1.5974763662920437, - "grad_norm": 0.40234375, - "learning_rate": 2.2510601870224206e-06, - "loss": 1.3081, - "step": 1643 - }, - { - "epoch": 1.5984501156327342, - "grad_norm": 0.40234375, - "learning_rate": 2.2485155612652574e-06, - "loss": 1.3342, - "step": 1644 - }, - { - "epoch": 1.5994238649734247, - "grad_norm": 0.421875, - "learning_rate": 2.2459711986870704e-06, - "loss": 1.3203, - "step": 1645 - }, - { - "epoch": 1.6003976143141152, - "grad_norm": 0.404296875, - "learning_rate": 2.243427101950538e-06, - "loss": 1.3271, - "step": 1646 - }, - { - "epoch": 1.6013713636548057, - "grad_norm": 0.404296875, - "learning_rate": 2.2408832737180648e-06, - "loss": 1.3244, - "step": 1647 - }, - { - "epoch": 1.6023451129954964, - "grad_norm": 0.42578125, - "learning_rate": 2.238339716651771e-06, - "loss": 1.3197, - "step": 1648 - }, - { - "epoch": 1.603318862336187, - "grad_norm": 0.39453125, - "learning_rate": 2.2357964334134947e-06, - "loss": 1.3157, - "step": 1649 - }, - { - "epoch": 1.6042926116768776, - "grad_norm": 0.412109375, - "learning_rate": 2.2332534266647867e-06, - "loss": 1.3465, - "step": 1650 - }, - { - "epoch": 1.6052663610175681, - "grad_norm": 0.40625, - "learning_rate": 2.2307106990669093e-06, - "loss": 1.33, - "step": 1651 - }, - { - "epoch": 1.6062401103582586, - "grad_norm": 0.408203125, - "learning_rate": 2.2281682532808317e-06, - "loss": 1.3204, - "step": 1652 - }, - { - "epoch": 1.6072138596989491, - "grad_norm": 0.41796875, - "learning_rate": 2.225626091967228e-06, - "loss": 1.3131, - "step": 1653 - }, - { - "epoch": 1.6081876090396396, - "grad_norm": 0.4296875, - "learning_rate": 2.2230842177864766e-06, - "loss": 1.3227, - "step": 1654 - }, - { - "epoch": 1.6091613583803301, - "grad_norm": 0.400390625, - "learning_rate": 2.220542633398652e-06, - "loss": 1.3009, - "step": 1655 - }, - { - "epoch": 1.6101351077210209, - "grad_norm": 0.41015625, - "learning_rate": 2.218001341463529e-06, - "loss": 1.3153, - "step": 1656 - }, - { - "epoch": 1.6111088570617114, - "grad_norm": 0.4140625, - "learning_rate": 2.215460344640574e-06, - "loss": 1.3216, - "step": 1657 - }, - { - "epoch": 1.6120826064024019, - "grad_norm": 0.408203125, - "learning_rate": 2.212919645588946e-06, - "loss": 1.3238, - "step": 1658 - }, - { - "epoch": 1.6130563557430926, - "grad_norm": 0.41796875, - "learning_rate": 2.210379246967491e-06, - "loss": 1.3253, - "step": 1659 - }, - { - "epoch": 1.614030105083783, - "grad_norm": 0.416015625, - "learning_rate": 2.207839151434742e-06, - "loss": 1.3132, - "step": 1660 - }, - { - "epoch": 1.6150038544244736, - "grad_norm": 0.40234375, - "learning_rate": 2.205299361648913e-06, - "loss": 1.3274, - "step": 1661 - }, - { - "epoch": 1.615977603765164, - "grad_norm": 0.408203125, - "learning_rate": 2.2027598802679013e-06, - "loss": 1.3096, - "step": 1662 - }, - { - "epoch": 1.6169513531058546, - "grad_norm": 0.41015625, - "learning_rate": 2.2002207099492776e-06, - "loss": 1.319, - "step": 1663 - }, - { - "epoch": 1.617925102446545, - "grad_norm": 0.404296875, - "learning_rate": 2.1976818533502886e-06, - "loss": 1.3269, - "step": 1664 - }, - { - "epoch": 1.6188988517872358, - "grad_norm": 0.41796875, - "learning_rate": 2.1951433131278535e-06, - "loss": 1.3117, - "step": 1665 - }, - { - "epoch": 1.6198726011279263, - "grad_norm": 0.396484375, - "learning_rate": 2.1926050919385594e-06, - "loss": 1.3188, - "step": 1666 - }, - { - "epoch": 1.620846350468617, - "grad_norm": 0.408203125, - "learning_rate": 2.1900671924386606e-06, - "loss": 1.3052, - "step": 1667 - }, - { - "epoch": 1.6218200998093075, - "grad_norm": 0.4140625, - "learning_rate": 2.1875296172840737e-06, - "loss": 1.3015, - "step": 1668 - }, - { - "epoch": 1.622793849149998, - "grad_norm": 0.416015625, - "learning_rate": 2.1849923691303757e-06, - "loss": 1.3102, - "step": 1669 - }, - { - "epoch": 1.6237675984906885, - "grad_norm": 0.408203125, - "learning_rate": 2.1824554506328033e-06, - "loss": 1.3063, - "step": 1670 - }, - { - "epoch": 1.624741347831379, - "grad_norm": 0.400390625, - "learning_rate": 2.1799188644462457e-06, - "loss": 1.3198, - "step": 1671 - }, - { - "epoch": 1.6257150971720695, - "grad_norm": 0.421875, - "learning_rate": 2.1773826132252456e-06, - "loss": 1.3444, - "step": 1672 - }, - { - "epoch": 1.62668884651276, - "grad_norm": 0.40234375, - "learning_rate": 2.174846699623997e-06, - "loss": 1.315, - "step": 1673 - }, - { - "epoch": 1.6276625958534507, - "grad_norm": 0.419921875, - "learning_rate": 2.172311126296335e-06, - "loss": 1.3202, - "step": 1674 - }, - { - "epoch": 1.6286363451941412, - "grad_norm": 0.4140625, - "learning_rate": 2.169775895895745e-06, - "loss": 1.3349, - "step": 1675 - }, - { - "epoch": 1.629610094534832, - "grad_norm": 0.4140625, - "learning_rate": 2.1672410110753495e-06, - "loss": 1.3099, - "step": 1676 - }, - { - "epoch": 1.6305838438755225, - "grad_norm": 0.41796875, - "learning_rate": 2.164706474487911e-06, - "loss": 1.3121, - "step": 1677 - }, - { - "epoch": 1.631557593216213, - "grad_norm": 0.40234375, - "learning_rate": 2.1621722887858273e-06, - "loss": 1.3133, - "step": 1678 - }, - { - "epoch": 1.6325313425569035, - "grad_norm": 0.4140625, - "learning_rate": 2.1596384566211286e-06, - "loss": 1.3271, - "step": 1679 - }, - { - "epoch": 1.633505091897594, - "grad_norm": 0.41796875, - "learning_rate": 2.157104980645476e-06, - "loss": 1.3363, - "step": 1680 - }, - { - "epoch": 1.6344788412382845, - "grad_norm": 0.40234375, - "learning_rate": 2.1545718635101563e-06, - "loss": 1.3166, - "step": 1681 - }, - { - "epoch": 1.635452590578975, - "grad_norm": 0.41015625, - "learning_rate": 2.1520391078660823e-06, - "loss": 1.2876, - "step": 1682 - }, - { - "epoch": 1.6364263399196657, - "grad_norm": 0.41015625, - "learning_rate": 2.149506716363788e-06, - "loss": 1.3082, - "step": 1683 - }, - { - "epoch": 1.6374000892603562, - "grad_norm": 0.41796875, - "learning_rate": 2.146974691653427e-06, - "loss": 1.318, - "step": 1684 - }, - { - "epoch": 1.638373838601047, - "grad_norm": 0.408203125, - "learning_rate": 2.144443036384767e-06, - "loss": 1.317, - "step": 1685 - }, - { - "epoch": 1.6393475879417374, - "grad_norm": 0.421875, - "learning_rate": 2.1419117532071906e-06, - "loss": 1.3125, - "step": 1686 - }, - { - "epoch": 1.640321337282428, - "grad_norm": 0.416015625, - "learning_rate": 2.1393808447696906e-06, - "loss": 1.3225, - "step": 1687 - }, - { - "epoch": 1.6412950866231184, - "grad_norm": 0.40625, - "learning_rate": 2.136850313720868e-06, - "loss": 1.3023, - "step": 1688 - }, - { - "epoch": 1.642268835963809, - "grad_norm": 0.40625, - "learning_rate": 2.134320162708929e-06, - "loss": 1.3211, - "step": 1689 - }, - { - "epoch": 1.6432425853044994, - "grad_norm": 0.42578125, - "learning_rate": 2.131790394381682e-06, - "loss": 1.3269, - "step": 1690 - }, - { - "epoch": 1.6442163346451901, - "grad_norm": 0.419921875, - "learning_rate": 2.129261011386534e-06, - "loss": 1.3202, - "step": 1691 - }, - { - "epoch": 1.6451900839858806, - "grad_norm": 0.412109375, - "learning_rate": 2.1267320163704897e-06, - "loss": 1.3397, - "step": 1692 - }, - { - "epoch": 1.6461638333265713, - "grad_norm": 0.416015625, - "learning_rate": 2.1242034119801477e-06, - "loss": 1.3212, - "step": 1693 - }, - { - "epoch": 1.6471375826672618, - "grad_norm": 0.41015625, - "learning_rate": 2.1216752008616974e-06, - "loss": 1.3154, - "step": 1694 - }, - { - "epoch": 1.6481113320079523, - "grad_norm": 0.408203125, - "learning_rate": 2.1191473856609182e-06, - "loss": 1.2978, - "step": 1695 - }, - { - "epoch": 1.6490850813486428, - "grad_norm": 0.40234375, - "learning_rate": 2.1166199690231715e-06, - "loss": 1.3023, - "step": 1696 - }, - { - "epoch": 1.6500588306893333, - "grad_norm": 0.41015625, - "learning_rate": 2.114092953593405e-06, - "loss": 1.3054, - "step": 1697 - }, - { - "epoch": 1.6510325800300238, - "grad_norm": 0.40234375, - "learning_rate": 2.111566342016145e-06, - "loss": 1.3176, - "step": 1698 - }, - { - "epoch": 1.6520063293707143, - "grad_norm": 0.3984375, - "learning_rate": 2.1090401369354966e-06, - "loss": 1.3254, - "step": 1699 - }, - { - "epoch": 1.652980078711405, - "grad_norm": 0.41796875, - "learning_rate": 2.1065143409951384e-06, - "loss": 1.3353, - "step": 1700 - }, - { - "epoch": 1.6539538280520956, - "grad_norm": 0.4140625, - "learning_rate": 2.1039889568383202e-06, - "loss": 1.3259, - "step": 1701 - }, - { - "epoch": 1.6549275773927863, - "grad_norm": 0.416015625, - "learning_rate": 2.1014639871078625e-06, - "loss": 1.3283, - "step": 1702 - }, - { - "epoch": 1.6559013267334768, - "grad_norm": 0.419921875, - "learning_rate": 2.09893943444615e-06, - "loss": 1.3329, - "step": 1703 - }, - { - "epoch": 1.6568750760741673, - "grad_norm": 0.400390625, - "learning_rate": 2.096415301495134e-06, - "loss": 1.2812, - "step": 1704 - }, - { - "epoch": 1.6578488254148578, - "grad_norm": 0.400390625, - "learning_rate": 2.0938915908963232e-06, - "loss": 1.3271, - "step": 1705 - }, - { - "epoch": 1.6588225747555483, - "grad_norm": 0.40234375, - "learning_rate": 2.091368305290788e-06, - "loss": 1.3063, - "step": 1706 - }, - { - "epoch": 1.6597963240962388, - "grad_norm": 0.40234375, - "learning_rate": 2.088845447319149e-06, - "loss": 1.3102, - "step": 1707 - }, - { - "epoch": 1.6607700734369293, - "grad_norm": 0.4296875, - "learning_rate": 2.0863230196215833e-06, - "loss": 1.3243, - "step": 1708 - }, - { - "epoch": 1.66174382277762, - "grad_norm": 0.408203125, - "learning_rate": 2.083801024837817e-06, - "loss": 1.3017, - "step": 1709 - }, - { - "epoch": 1.6627175721183105, - "grad_norm": 0.412109375, - "learning_rate": 2.081279465607123e-06, - "loss": 1.3047, - "step": 1710 - }, - { - "epoch": 1.6636913214590012, - "grad_norm": 0.431640625, - "learning_rate": 2.078758344568317e-06, - "loss": 1.2974, - "step": 1711 - }, - { - "epoch": 1.6646650707996917, - "grad_norm": 0.404296875, - "learning_rate": 2.0762376643597586e-06, - "loss": 1.3087, - "step": 1712 - }, - { - "epoch": 1.6656388201403822, - "grad_norm": 0.400390625, - "learning_rate": 2.073717427619344e-06, - "loss": 1.3216, - "step": 1713 - }, - { - "epoch": 1.6666125694810727, - "grad_norm": 0.40625, - "learning_rate": 2.0711976369845065e-06, - "loss": 1.341, - "step": 1714 - }, - { - "epoch": 1.6675863188217632, - "grad_norm": 0.40625, - "learning_rate": 2.0686782950922113e-06, - "loss": 1.3201, - "step": 1715 - }, - { - "epoch": 1.6685600681624537, - "grad_norm": 0.412109375, - "learning_rate": 2.066159404578956e-06, - "loss": 1.3223, - "step": 1716 - }, - { - "epoch": 1.6695338175031444, - "grad_norm": 0.400390625, - "learning_rate": 2.0636409680807646e-06, - "loss": 1.3329, - "step": 1717 - }, - { - "epoch": 1.670507566843835, - "grad_norm": 0.404296875, - "learning_rate": 2.0611229882331844e-06, - "loss": 1.3013, - "step": 1718 - }, - { - "epoch": 1.6714813161845254, - "grad_norm": 0.4140625, - "learning_rate": 2.0586054676712873e-06, - "loss": 1.3448, - "step": 1719 - }, - { - "epoch": 1.6724550655252162, - "grad_norm": 0.396484375, - "learning_rate": 2.0560884090296642e-06, - "loss": 1.3304, - "step": 1720 - }, - { - "epoch": 1.6734288148659067, - "grad_norm": 0.40625, - "learning_rate": 2.0535718149424213e-06, - "loss": 1.3149, - "step": 1721 - }, - { - "epoch": 1.6744025642065972, - "grad_norm": 0.40234375, - "learning_rate": 2.05105568804318e-06, - "loss": 1.3121, - "step": 1722 - }, - { - "epoch": 1.6753763135472877, - "grad_norm": 0.40234375, - "learning_rate": 2.048540030965072e-06, - "loss": 1.3351, - "step": 1723 - }, - { - "epoch": 1.6763500628879782, - "grad_norm": 0.412109375, - "learning_rate": 2.0460248463407377e-06, - "loss": 1.3046, - "step": 1724 - }, - { - "epoch": 1.6773238122286687, - "grad_norm": 0.40234375, - "learning_rate": 2.0435101368023222e-06, - "loss": 1.3182, - "step": 1725 - }, - { - "epoch": 1.6782975615693594, - "grad_norm": 0.396484375, - "learning_rate": 2.0409959049814753e-06, - "loss": 1.334, - "step": 1726 - }, - { - "epoch": 1.6792713109100499, - "grad_norm": 0.416015625, - "learning_rate": 2.038482153509345e-06, - "loss": 1.327, - "step": 1727 - }, - { - "epoch": 1.6802450602507406, - "grad_norm": 0.419921875, - "learning_rate": 2.0359688850165775e-06, - "loss": 1.324, - "step": 1728 - }, - { - "epoch": 1.681218809591431, - "grad_norm": 0.40625, - "learning_rate": 2.033456102133313e-06, - "loss": 1.3314, - "step": 1729 - }, - { - "epoch": 1.6821925589321216, - "grad_norm": 0.400390625, - "learning_rate": 2.030943807489184e-06, - "loss": 1.316, - "step": 1730 - }, - { - "epoch": 1.683166308272812, - "grad_norm": 0.404296875, - "learning_rate": 2.0284320037133124e-06, - "loss": 1.3223, - "step": 1731 - }, - { - "epoch": 1.6841400576135026, - "grad_norm": 0.40625, - "learning_rate": 2.025920693434305e-06, - "loss": 1.3131, - "step": 1732 - }, - { - "epoch": 1.685113806954193, - "grad_norm": 0.40234375, - "learning_rate": 2.0234098792802543e-06, - "loss": 1.3433, - "step": 1733 - }, - { - "epoch": 1.6860875562948836, - "grad_norm": 0.412109375, - "learning_rate": 2.0208995638787317e-06, - "loss": 1.3107, - "step": 1734 - }, - { - "epoch": 1.6870613056355743, - "grad_norm": 0.408203125, - "learning_rate": 2.0183897498567874e-06, - "loss": 1.3225, - "step": 1735 - }, - { - "epoch": 1.6880350549762648, - "grad_norm": 0.416015625, - "learning_rate": 2.0158804398409478e-06, - "loss": 1.3166, - "step": 1736 - }, - { - "epoch": 1.6890088043169555, - "grad_norm": 0.404296875, - "learning_rate": 2.0133716364572094e-06, - "loss": 1.3202, - "step": 1737 - }, - { - "epoch": 1.689982553657646, - "grad_norm": 0.408203125, - "learning_rate": 2.0108633423310407e-06, - "loss": 1.3136, - "step": 1738 - }, - { - "epoch": 1.6909563029983365, - "grad_norm": 0.40625, - "learning_rate": 2.008355560087377e-06, - "loss": 1.3043, - "step": 1739 - }, - { - "epoch": 1.691930052339027, - "grad_norm": 0.41015625, - "learning_rate": 2.0058482923506168e-06, - "loss": 1.3084, - "step": 1740 - }, - { - "epoch": 1.6929038016797175, - "grad_norm": 0.400390625, - "learning_rate": 2.0033415417446213e-06, - "loss": 1.2842, - "step": 1741 - }, - { - "epoch": 1.693877551020408, - "grad_norm": 0.40625, - "learning_rate": 2.0008353108927096e-06, - "loss": 1.3056, - "step": 1742 - }, - { - "epoch": 1.6948513003610988, - "grad_norm": 0.41015625, - "learning_rate": 1.998329602417658e-06, - "loss": 1.3268, - "step": 1743 - }, - { - "epoch": 1.6958250497017893, - "grad_norm": 0.41015625, - "learning_rate": 1.9958244189416955e-06, - "loss": 1.3373, - "step": 1744 - }, - { - "epoch": 1.6967987990424798, - "grad_norm": 0.408203125, - "learning_rate": 1.9933197630865014e-06, - "loss": 1.3278, - "step": 1745 - }, - { - "epoch": 1.6977725483831705, - "grad_norm": 0.3984375, - "learning_rate": 1.990815637473203e-06, - "loss": 1.3251, - "step": 1746 - }, - { - "epoch": 1.698746297723861, - "grad_norm": 0.400390625, - "learning_rate": 1.988312044722373e-06, - "loss": 1.3134, - "step": 1747 - }, - { - "epoch": 1.6997200470645515, - "grad_norm": 0.4140625, - "learning_rate": 1.9858089874540264e-06, - "loss": 1.3539, - "step": 1748 - }, - { - "epoch": 1.700693796405242, - "grad_norm": 0.408203125, - "learning_rate": 1.9833064682876175e-06, - "loss": 1.3348, - "step": 1749 - }, - { - "epoch": 1.7016675457459325, - "grad_norm": 0.416015625, - "learning_rate": 1.9808044898420387e-06, - "loss": 1.3189, - "step": 1750 - }, - { - "epoch": 1.702641295086623, - "grad_norm": 0.408203125, - "learning_rate": 1.9783030547356134e-06, - "loss": 1.3063, - "step": 1751 - }, - { - "epoch": 1.7036150444273137, - "grad_norm": 0.4140625, - "learning_rate": 1.9758021655861005e-06, - "loss": 1.3115, - "step": 1752 - }, - { - "epoch": 1.7045887937680042, - "grad_norm": 0.4140625, - "learning_rate": 1.973301825010685e-06, - "loss": 1.3155, - "step": 1753 - }, - { - "epoch": 1.705562543108695, - "grad_norm": 0.404296875, - "learning_rate": 1.970802035625978e-06, - "loss": 1.301, - "step": 1754 - }, - { - "epoch": 1.7065362924493854, - "grad_norm": 0.392578125, - "learning_rate": 1.9683028000480135e-06, - "loss": 1.3158, - "step": 1755 - }, - { - "epoch": 1.707510041790076, - "grad_norm": 0.40234375, - "learning_rate": 1.965804120892248e-06, - "loss": 1.3212, - "step": 1756 - }, - { - "epoch": 1.7084837911307664, - "grad_norm": 0.408203125, - "learning_rate": 1.963306000773554e-06, - "loss": 1.3197, - "step": 1757 - }, - { - "epoch": 1.709457540471457, - "grad_norm": 0.40234375, - "learning_rate": 1.960808442306219e-06, - "loss": 1.336, - "step": 1758 - }, - { - "epoch": 1.7104312898121474, - "grad_norm": 0.400390625, - "learning_rate": 1.9583114481039428e-06, - "loss": 1.3246, - "step": 1759 - }, - { - "epoch": 1.711405039152838, - "grad_norm": 0.41015625, - "learning_rate": 1.955815020779835e-06, - "loss": 1.3058, - "step": 1760 - }, - { - "epoch": 1.7123787884935286, - "grad_norm": 0.41796875, - "learning_rate": 1.953319162946413e-06, - "loss": 1.3121, - "step": 1761 - }, - { - "epoch": 1.7133525378342191, - "grad_norm": 0.41015625, - "learning_rate": 1.950823877215596e-06, - "loss": 1.3095, - "step": 1762 - }, - { - "epoch": 1.7143262871749099, - "grad_norm": 0.400390625, - "learning_rate": 1.948329166198705e-06, - "loss": 1.3092, - "step": 1763 - }, - { - "epoch": 1.7153000365156004, - "grad_norm": 0.404296875, - "learning_rate": 1.9458350325064606e-06, - "loss": 1.3149, - "step": 1764 - }, - { - "epoch": 1.7162737858562909, - "grad_norm": 0.41015625, - "learning_rate": 1.943341478748979e-06, - "loss": 1.3227, - "step": 1765 - }, - { - "epoch": 1.7172475351969814, - "grad_norm": 0.443359375, - "learning_rate": 1.9408485075357695e-06, - "loss": 1.2979, - "step": 1766 - }, - { - "epoch": 1.7182212845376719, - "grad_norm": 0.404296875, - "learning_rate": 1.9383561214757317e-06, - "loss": 1.3171, - "step": 1767 - }, - { - "epoch": 1.7191950338783624, - "grad_norm": 0.4140625, - "learning_rate": 1.9358643231771517e-06, - "loss": 1.3133, - "step": 1768 - }, - { - "epoch": 1.7201687832190529, - "grad_norm": 0.40234375, - "learning_rate": 1.933373115247702e-06, - "loss": 1.309, - "step": 1769 - }, - { - "epoch": 1.7211425325597436, - "grad_norm": 0.404296875, - "learning_rate": 1.930882500294437e-06, - "loss": 1.3193, - "step": 1770 - }, - { - "epoch": 1.722116281900434, - "grad_norm": 0.421875, - "learning_rate": 1.92839248092379e-06, - "loss": 1.3358, - "step": 1771 - }, - { - "epoch": 1.7230900312411248, - "grad_norm": 0.40625, - "learning_rate": 1.9259030597415725e-06, - "loss": 1.308, - "step": 1772 - }, - { - "epoch": 1.7240637805818153, - "grad_norm": 0.408203125, - "learning_rate": 1.9234142393529664e-06, - "loss": 1.3055, - "step": 1773 - }, - { - "epoch": 1.7250375299225058, - "grad_norm": 0.390625, - "learning_rate": 1.920926022362529e-06, - "loss": 1.2888, - "step": 1774 - }, - { - "epoch": 1.7260112792631963, - "grad_norm": 0.40234375, - "learning_rate": 1.918438411374184e-06, - "loss": 1.3207, - "step": 1775 - }, - { - "epoch": 1.7269850286038868, - "grad_norm": 0.400390625, - "learning_rate": 1.915951408991221e-06, - "loss": 1.3243, - "step": 1776 - }, - { - "epoch": 1.7279587779445773, - "grad_norm": 0.404296875, - "learning_rate": 1.9134650178162937e-06, - "loss": 1.3249, - "step": 1777 - }, - { - "epoch": 1.728932527285268, - "grad_norm": 0.41015625, - "learning_rate": 1.910979240451415e-06, - "loss": 1.3181, - "step": 1778 - }, - { - "epoch": 1.7299062766259585, - "grad_norm": 0.408203125, - "learning_rate": 1.908494079497956e-06, - "loss": 1.3125, - "step": 1779 - }, - { - "epoch": 1.7308800259666492, - "grad_norm": 0.39453125, - "learning_rate": 1.9060095375566434e-06, - "loss": 1.313, - "step": 1780 - }, - { - "epoch": 1.7318537753073397, - "grad_norm": 0.404296875, - "learning_rate": 1.903525617227555e-06, - "loss": 1.3111, - "step": 1781 - }, - { - "epoch": 1.7328275246480302, - "grad_norm": 0.404296875, - "learning_rate": 1.9010423211101179e-06, - "loss": 1.291, - "step": 1782 - }, - { - "epoch": 1.7338012739887207, - "grad_norm": 0.40625, - "learning_rate": 1.8985596518031069e-06, - "loss": 1.3198, - "step": 1783 - }, - { - "epoch": 1.7347750233294112, - "grad_norm": 0.404296875, - "learning_rate": 1.8960776119046417e-06, - "loss": 1.3269, - "step": 1784 - }, - { - "epoch": 1.7357487726701017, - "grad_norm": 0.40234375, - "learning_rate": 1.8935962040121797e-06, - "loss": 1.3087, - "step": 1785 - }, - { - "epoch": 1.7367225220107922, - "grad_norm": 0.404296875, - "learning_rate": 1.8911154307225204e-06, - "loss": 1.3191, - "step": 1786 - }, - { - "epoch": 1.737696271351483, - "grad_norm": 0.396484375, - "learning_rate": 1.8886352946317984e-06, - "loss": 1.3191, - "step": 1787 - }, - { - "epoch": 1.7386700206921735, - "grad_norm": 0.40234375, - "learning_rate": 1.8861557983354812e-06, - "loss": 1.3295, - "step": 1788 - }, - { - "epoch": 1.7396437700328642, - "grad_norm": 0.40234375, - "learning_rate": 1.8836769444283658e-06, - "loss": 1.3094, - "step": 1789 - }, - { - "epoch": 1.7406175193735547, - "grad_norm": 0.41796875, - "learning_rate": 1.8811987355045789e-06, - "loss": 1.3324, - "step": 1790 - }, - { - "epoch": 1.7415912687142452, - "grad_norm": 0.408203125, - "learning_rate": 1.8787211741575706e-06, - "loss": 1.3338, - "step": 1791 - }, - { - "epoch": 1.7425650180549357, - "grad_norm": 0.4140625, - "learning_rate": 1.8762442629801139e-06, - "loss": 1.3038, - "step": 1792 - }, - { - "epoch": 1.7435387673956262, - "grad_norm": 0.40234375, - "learning_rate": 1.8737680045643013e-06, - "loss": 1.3108, - "step": 1793 - }, - { - "epoch": 1.7445125167363167, - "grad_norm": 0.419921875, - "learning_rate": 1.8712924015015429e-06, - "loss": 1.3124, - "step": 1794 - }, - { - "epoch": 1.7454862660770072, - "grad_norm": 0.400390625, - "learning_rate": 1.8688174563825629e-06, - "loss": 1.343, - "step": 1795 - }, - { - "epoch": 1.746460015417698, - "grad_norm": 0.41015625, - "learning_rate": 1.8663431717973939e-06, - "loss": 1.3321, - "step": 1796 - }, - { - "epoch": 1.7474337647583884, - "grad_norm": 0.396484375, - "learning_rate": 1.8638695503353816e-06, - "loss": 1.3017, - "step": 1797 - }, - { - "epoch": 1.7484075140990791, - "grad_norm": 0.400390625, - "learning_rate": 1.8613965945851753e-06, - "loss": 1.3163, - "step": 1798 - }, - { - "epoch": 1.7493812634397696, - "grad_norm": 0.400390625, - "learning_rate": 1.8589243071347279e-06, - "loss": 1.3126, - "step": 1799 - }, - { - "epoch": 1.7503550127804601, - "grad_norm": 0.408203125, - "learning_rate": 1.8564526905712943e-06, - "loss": 1.3137, - "step": 1800 - }, - { - "epoch": 1.7513287621211506, - "grad_norm": 0.40234375, - "learning_rate": 1.8539817474814257e-06, - "loss": 1.3132, - "step": 1801 - }, - { - "epoch": 1.7523025114618411, - "grad_norm": 0.396484375, - "learning_rate": 1.8515114804509687e-06, - "loss": 1.3407, - "step": 1802 - }, - { - "epoch": 1.7532762608025316, - "grad_norm": 0.408203125, - "learning_rate": 1.8490418920650633e-06, - "loss": 1.3373, - "step": 1803 - }, - { - "epoch": 1.7542500101432223, - "grad_norm": 0.396484375, - "learning_rate": 1.8465729849081382e-06, - "loss": 1.3004, - "step": 1804 - }, - { - "epoch": 1.7552237594839128, - "grad_norm": 0.419921875, - "learning_rate": 1.8441047615639103e-06, - "loss": 1.3301, - "step": 1805 - }, - { - "epoch": 1.7561975088246033, - "grad_norm": 0.4140625, - "learning_rate": 1.8416372246153813e-06, - "loss": 1.2893, - "step": 1806 - }, - { - "epoch": 1.757171258165294, - "grad_norm": 0.4140625, - "learning_rate": 1.8391703766448314e-06, - "loss": 1.3386, - "step": 1807 - }, - { - "epoch": 1.7581450075059846, - "grad_norm": 0.400390625, - "learning_rate": 1.8367042202338228e-06, - "loss": 1.3159, - "step": 1808 - }, - { - "epoch": 1.759118756846675, - "grad_norm": 0.404296875, - "learning_rate": 1.8342387579631938e-06, - "loss": 1.3156, - "step": 1809 - }, - { - "epoch": 1.7600925061873656, - "grad_norm": 0.39453125, - "learning_rate": 1.8317739924130548e-06, - "loss": 1.3203, - "step": 1810 - }, - { - "epoch": 1.761066255528056, - "grad_norm": 0.404296875, - "learning_rate": 1.8293099261627887e-06, - "loss": 1.2946, - "step": 1811 - }, - { - "epoch": 1.7620400048687466, - "grad_norm": 0.392578125, - "learning_rate": 1.8268465617910456e-06, - "loss": 1.2961, - "step": 1812 - }, - { - "epoch": 1.7630137542094373, - "grad_norm": 0.3984375, - "learning_rate": 1.8243839018757412e-06, - "loss": 1.3179, - "step": 1813 - }, - { - "epoch": 1.7639875035501278, - "grad_norm": 0.408203125, - "learning_rate": 1.8219219489940542e-06, - "loss": 1.3049, - "step": 1814 - }, - { - "epoch": 1.7649612528908185, - "grad_norm": 0.40625, - "learning_rate": 1.8194607057224234e-06, - "loss": 1.3293, - "step": 1815 - }, - { - "epoch": 1.765935002231509, - "grad_norm": 0.40625, - "learning_rate": 1.8170001746365445e-06, - "loss": 1.3112, - "step": 1816 - }, - { - "epoch": 1.7669087515721995, - "grad_norm": 0.396484375, - "learning_rate": 1.81454035831137e-06, - "loss": 1.3313, - "step": 1817 - }, - { - "epoch": 1.76788250091289, - "grad_norm": 0.40625, - "learning_rate": 1.8120812593210999e-06, - "loss": 1.3131, - "step": 1818 - }, - { - "epoch": 1.7688562502535805, - "grad_norm": 0.40234375, - "learning_rate": 1.8096228802391875e-06, - "loss": 1.2929, - "step": 1819 - }, - { - "epoch": 1.769829999594271, - "grad_norm": 0.400390625, - "learning_rate": 1.8071652236383316e-06, - "loss": 1.3389, - "step": 1820 - }, - { - "epoch": 1.7708037489349615, - "grad_norm": 0.412109375, - "learning_rate": 1.8047082920904748e-06, - "loss": 1.3255, - "step": 1821 - }, - { - "epoch": 1.7717774982756522, - "grad_norm": 0.458984375, - "learning_rate": 1.802252088166801e-06, - "loss": 1.3184, - "step": 1822 - }, - { - "epoch": 1.7727512476163427, - "grad_norm": 0.4140625, - "learning_rate": 1.7997966144377328e-06, - "loss": 1.3293, - "step": 1823 - }, - { - "epoch": 1.7737249969570335, - "grad_norm": 0.412109375, - "learning_rate": 1.7973418734729278e-06, - "loss": 1.3226, - "step": 1824 - }, - { - "epoch": 1.774698746297724, - "grad_norm": 0.408203125, - "learning_rate": 1.7948878678412779e-06, - "loss": 1.3325, - "step": 1825 - }, - { - "epoch": 1.7756724956384144, - "grad_norm": 0.408203125, - "learning_rate": 1.792434600110905e-06, - "loss": 1.2998, - "step": 1826 - }, - { - "epoch": 1.776646244979105, - "grad_norm": 0.404296875, - "learning_rate": 1.789982072849159e-06, - "loss": 1.3398, - "step": 1827 - }, - { - "epoch": 1.7776199943197954, - "grad_norm": 0.40234375, - "learning_rate": 1.7875302886226143e-06, - "loss": 1.3189, - "step": 1828 - }, - { - "epoch": 1.778593743660486, - "grad_norm": 0.400390625, - "learning_rate": 1.7850792499970673e-06, - "loss": 1.3301, - "step": 1829 - }, - { - "epoch": 1.7795674930011764, - "grad_norm": 0.39453125, - "learning_rate": 1.7826289595375356e-06, - "loss": 1.3093, - "step": 1830 - }, - { - "epoch": 1.7805412423418672, - "grad_norm": 0.3984375, - "learning_rate": 1.7801794198082534e-06, - "loss": 1.3059, - "step": 1831 - }, - { - "epoch": 1.7815149916825577, - "grad_norm": 0.404296875, - "learning_rate": 1.7777306333726689e-06, - "loss": 1.3308, - "step": 1832 - }, - { - "epoch": 1.7824887410232484, - "grad_norm": 0.3984375, - "learning_rate": 1.7752826027934418e-06, - "loss": 1.3162, - "step": 1833 - }, - { - "epoch": 1.783462490363939, - "grad_norm": 0.400390625, - "learning_rate": 1.7728353306324408e-06, - "loss": 1.2821, - "step": 1834 - }, - { - "epoch": 1.7844362397046294, - "grad_norm": 0.408203125, - "learning_rate": 1.7703888194507425e-06, - "loss": 1.3234, - "step": 1835 - }, - { - "epoch": 1.78540998904532, - "grad_norm": 0.400390625, - "learning_rate": 1.7679430718086244e-06, - "loss": 1.3237, - "step": 1836 - }, - { - "epoch": 1.7863837383860104, - "grad_norm": 0.39453125, - "learning_rate": 1.7654980902655666e-06, - "loss": 1.3349, - "step": 1837 - }, - { - "epoch": 1.7873574877267009, - "grad_norm": 0.396484375, - "learning_rate": 1.7630538773802477e-06, - "loss": 1.3151, - "step": 1838 - }, - { - "epoch": 1.7883312370673916, - "grad_norm": 0.400390625, - "learning_rate": 1.7606104357105418e-06, - "loss": 1.2914, - "step": 1839 - }, - { - "epoch": 1.789304986408082, - "grad_norm": 0.40625, - "learning_rate": 1.7581677678135146e-06, - "loss": 1.3065, - "step": 1840 - }, - { - "epoch": 1.7902787357487728, - "grad_norm": 0.40234375, - "learning_rate": 1.7557258762454232e-06, - "loss": 1.3159, - "step": 1841 - }, - { - "epoch": 1.7912524850894633, - "grad_norm": 0.40625, - "learning_rate": 1.7532847635617123e-06, - "loss": 1.3185, - "step": 1842 - }, - { - "epoch": 1.7922262344301538, - "grad_norm": 0.404296875, - "learning_rate": 1.7508444323170115e-06, - "loss": 1.3384, - "step": 1843 - }, - { - "epoch": 1.7931999837708443, - "grad_norm": 0.39453125, - "learning_rate": 1.7484048850651325e-06, - "loss": 1.3023, - "step": 1844 - }, - { - "epoch": 1.7941737331115348, - "grad_norm": 0.404296875, - "learning_rate": 1.7459661243590656e-06, - "loss": 1.3117, - "step": 1845 - }, - { - "epoch": 1.7951474824522253, - "grad_norm": 0.41015625, - "learning_rate": 1.7435281527509796e-06, - "loss": 1.3083, - "step": 1846 - }, - { - "epoch": 1.7961212317929158, - "grad_norm": 0.40234375, - "learning_rate": 1.741090972792216e-06, - "loss": 1.3406, - "step": 1847 - }, - { - "epoch": 1.7970949811336066, - "grad_norm": 0.396484375, - "learning_rate": 1.7386545870332893e-06, - "loss": 1.3224, - "step": 1848 - }, - { - "epoch": 1.798068730474297, - "grad_norm": 0.419921875, - "learning_rate": 1.736218998023882e-06, - "loss": 1.3155, - "step": 1849 - }, - { - "epoch": 1.7990424798149878, - "grad_norm": 0.416015625, - "learning_rate": 1.7337842083128435e-06, - "loss": 1.3394, - "step": 1850 - }, - { - "epoch": 1.8000162291556783, - "grad_norm": 0.4140625, - "learning_rate": 1.7313502204481847e-06, - "loss": 1.2991, - "step": 1851 - }, - { - "epoch": 1.8009899784963688, - "grad_norm": 0.3984375, - "learning_rate": 1.7289170369770797e-06, - "loss": 1.3287, - "step": 1852 - }, - { - "epoch": 1.8019637278370593, - "grad_norm": 0.40234375, - "learning_rate": 1.7264846604458607e-06, - "loss": 1.2975, - "step": 1853 - }, - { - "epoch": 1.8029374771777498, - "grad_norm": 0.404296875, - "learning_rate": 1.7240530934000134e-06, - "loss": 1.2853, - "step": 1854 - }, - { - "epoch": 1.8039112265184403, - "grad_norm": 0.40234375, - "learning_rate": 1.7216223383841774e-06, - "loss": 1.3112, - "step": 1855 - }, - { - "epoch": 1.8048849758591308, - "grad_norm": 0.40625, - "learning_rate": 1.719192397942144e-06, - "loss": 1.3069, - "step": 1856 - }, - { - "epoch": 1.8058587251998215, - "grad_norm": 0.40234375, - "learning_rate": 1.7167632746168503e-06, - "loss": 1.2939, - "step": 1857 - }, - { - "epoch": 1.806832474540512, - "grad_norm": 0.404296875, - "learning_rate": 1.7143349709503786e-06, - "loss": 1.3234, - "step": 1858 - }, - { - "epoch": 1.8078062238812027, - "grad_norm": 0.3984375, - "learning_rate": 1.7119074894839538e-06, - "loss": 1.33, - "step": 1859 - }, - { - "epoch": 1.8087799732218932, - "grad_norm": 0.396484375, - "learning_rate": 1.7094808327579401e-06, - "loss": 1.3097, - "step": 1860 - }, - { - "epoch": 1.8097537225625837, - "grad_norm": 0.412109375, - "learning_rate": 1.7070550033118393e-06, - "loss": 1.3193, - "step": 1861 - }, - { - "epoch": 1.8107274719032742, - "grad_norm": 0.404296875, - "learning_rate": 1.7046300036842864e-06, - "loss": 1.3302, - "step": 1862 - }, - { - "epoch": 1.8117012212439647, - "grad_norm": 0.408203125, - "learning_rate": 1.7022058364130478e-06, - "loss": 1.3024, - "step": 1863 - }, - { - "epoch": 1.8126749705846552, - "grad_norm": 0.400390625, - "learning_rate": 1.6997825040350196e-06, - "loss": 1.299, - "step": 1864 - }, - { - "epoch": 1.813648719925346, - "grad_norm": 0.396484375, - "learning_rate": 1.6973600090862247e-06, - "loss": 1.3096, - "step": 1865 - }, - { - "epoch": 1.8146224692660364, - "grad_norm": 0.416015625, - "learning_rate": 1.6949383541018088e-06, - "loss": 1.3065, - "step": 1866 - }, - { - "epoch": 1.815596218606727, - "grad_norm": 0.404296875, - "learning_rate": 1.6925175416160387e-06, - "loss": 1.3274, - "step": 1867 - }, - { - "epoch": 1.8165699679474177, - "grad_norm": 0.400390625, - "learning_rate": 1.6900975741622994e-06, - "loss": 1.3031, - "step": 1868 - }, - { - "epoch": 1.8175437172881082, - "grad_norm": 0.40234375, - "learning_rate": 1.6876784542730918e-06, - "loss": 1.3244, - "step": 1869 - }, - { - "epoch": 1.8185174666287987, - "grad_norm": 0.39453125, - "learning_rate": 1.6852601844800298e-06, - "loss": 1.3006, - "step": 1870 - }, - { - "epoch": 1.8194912159694892, - "grad_norm": 0.40234375, - "learning_rate": 1.6828427673138378e-06, - "loss": 1.3155, - "step": 1871 - }, - { - "epoch": 1.8204649653101797, - "grad_norm": 0.40234375, - "learning_rate": 1.6804262053043488e-06, - "loss": 1.3076, - "step": 1872 - }, - { - "epoch": 1.8214387146508701, - "grad_norm": 0.400390625, - "learning_rate": 1.6780105009804976e-06, - "loss": 1.3134, - "step": 1873 - }, - { - "epoch": 1.8224124639915609, - "grad_norm": 0.40625, - "learning_rate": 1.6755956568703247e-06, - "loss": 1.3144, - "step": 1874 - }, - { - "epoch": 1.8233862133322514, - "grad_norm": 0.390625, - "learning_rate": 1.6731816755009696e-06, - "loss": 1.3084, - "step": 1875 - }, - { - "epoch": 1.824359962672942, - "grad_norm": 0.3984375, - "learning_rate": 1.6707685593986687e-06, - "loss": 1.2964, - "step": 1876 - }, - { - "epoch": 1.8253337120136326, - "grad_norm": 0.4140625, - "learning_rate": 1.6683563110887523e-06, - "loss": 1.308, - "step": 1877 - }, - { - "epoch": 1.826307461354323, - "grad_norm": 0.396484375, - "learning_rate": 1.665944933095644e-06, - "loss": 1.3091, - "step": 1878 - }, - { - "epoch": 1.8272812106950136, - "grad_norm": 0.39453125, - "learning_rate": 1.6635344279428553e-06, - "loss": 1.3191, - "step": 1879 - }, - { - "epoch": 1.828254960035704, - "grad_norm": 0.4140625, - "learning_rate": 1.6611247981529846e-06, - "loss": 1.3227, - "step": 1880 - }, - { - "epoch": 1.8292287093763946, - "grad_norm": 0.404296875, - "learning_rate": 1.6587160462477149e-06, - "loss": 1.3134, - "step": 1881 - }, - { - "epoch": 1.830202458717085, - "grad_norm": 0.41015625, - "learning_rate": 1.6563081747478093e-06, - "loss": 1.3145, - "step": 1882 - }, - { - "epoch": 1.8311762080577758, - "grad_norm": 0.404296875, - "learning_rate": 1.6539011861731115e-06, - "loss": 1.3177, - "step": 1883 - }, - { - "epoch": 1.8321499573984663, - "grad_norm": 0.400390625, - "learning_rate": 1.651495083042538e-06, - "loss": 1.3174, - "step": 1884 - }, - { - "epoch": 1.833123706739157, - "grad_norm": 0.3984375, - "learning_rate": 1.6490898678740819e-06, - "loss": 1.3085, - "step": 1885 - }, - { - "epoch": 1.8340974560798475, - "grad_norm": 0.400390625, - "learning_rate": 1.646685543184805e-06, - "loss": 1.3278, - "step": 1886 - }, - { - "epoch": 1.835071205420538, - "grad_norm": 0.400390625, - "learning_rate": 1.6442821114908385e-06, - "loss": 1.33, - "step": 1887 - }, - { - "epoch": 1.8360449547612285, - "grad_norm": 0.39453125, - "learning_rate": 1.6418795753073785e-06, - "loss": 1.3125, - "step": 1888 - }, - { - "epoch": 1.837018704101919, - "grad_norm": 0.40234375, - "learning_rate": 1.6394779371486838e-06, - "loss": 1.3102, - "step": 1889 - }, - { - "epoch": 1.8379924534426095, - "grad_norm": 0.3984375, - "learning_rate": 1.6370771995280737e-06, - "loss": 1.3222, - "step": 1890 - }, - { - "epoch": 1.8389662027833003, - "grad_norm": 0.396484375, - "learning_rate": 1.634677364957925e-06, - "loss": 1.3038, - "step": 1891 - }, - { - "epoch": 1.8399399521239908, - "grad_norm": 0.40234375, - "learning_rate": 1.6322784359496697e-06, - "loss": 1.335, - "step": 1892 - }, - { - "epoch": 1.8409137014646813, - "grad_norm": 0.408203125, - "learning_rate": 1.6298804150137914e-06, - "loss": 1.3319, - "step": 1893 - }, - { - "epoch": 1.841887450805372, - "grad_norm": 0.40625, - "learning_rate": 1.6274833046598254e-06, - "loss": 1.3062, - "step": 1894 - }, - { - "epoch": 1.8428612001460625, - "grad_norm": 0.40234375, - "learning_rate": 1.6250871073963498e-06, - "loss": 1.314, - "step": 1895 - }, - { - "epoch": 1.843834949486753, - "grad_norm": 0.416015625, - "learning_rate": 1.6226918257309916e-06, - "loss": 1.3173, - "step": 1896 - }, - { - "epoch": 1.8448086988274435, - "grad_norm": 0.4140625, - "learning_rate": 1.6202974621704176e-06, - "loss": 1.3172, - "step": 1897 - }, - { - "epoch": 1.845782448168134, - "grad_norm": 0.396484375, - "learning_rate": 1.6179040192203343e-06, - "loss": 1.3011, - "step": 1898 - }, - { - "epoch": 1.8467561975088245, - "grad_norm": 0.400390625, - "learning_rate": 1.6155114993854846e-06, - "loss": 1.3096, - "step": 1899 - }, - { - "epoch": 1.8477299468495152, - "grad_norm": 0.3984375, - "learning_rate": 1.613119905169645e-06, - "loss": 1.3185, - "step": 1900 - }, - { - "epoch": 1.8487036961902057, - "grad_norm": 0.408203125, - "learning_rate": 1.6107292390756241e-06, - "loss": 1.2993, - "step": 1901 - }, - { - "epoch": 1.8496774455308964, - "grad_norm": 0.390625, - "learning_rate": 1.6083395036052586e-06, - "loss": 1.3028, - "step": 1902 - }, - { - "epoch": 1.850651194871587, - "grad_norm": 0.400390625, - "learning_rate": 1.6059507012594116e-06, - "loss": 1.29, - "step": 1903 - }, - { - "epoch": 1.8516249442122774, - "grad_norm": 0.40625, - "learning_rate": 1.6035628345379695e-06, - "loss": 1.3278, - "step": 1904 - }, - { - "epoch": 1.852598693552968, - "grad_norm": 0.408203125, - "learning_rate": 1.601175905939841e-06, - "loss": 1.3225, - "step": 1905 - }, - { - "epoch": 1.8535724428936584, - "grad_norm": 0.400390625, - "learning_rate": 1.5987899179629492e-06, - "loss": 1.3327, - "step": 1906 - }, - { - "epoch": 1.854546192234349, - "grad_norm": 0.40234375, - "learning_rate": 1.596404873104237e-06, - "loss": 1.3084, - "step": 1907 - }, - { - "epoch": 1.8555199415750394, - "grad_norm": 0.43359375, - "learning_rate": 1.594020773859658e-06, - "loss": 1.3321, - "step": 1908 - }, - { - "epoch": 1.8564936909157301, - "grad_norm": 0.404296875, - "learning_rate": 1.5916376227241776e-06, - "loss": 1.3147, - "step": 1909 - }, - { - "epoch": 1.8574674402564206, - "grad_norm": 0.412109375, - "learning_rate": 1.5892554221917677e-06, - "loss": 1.3217, - "step": 1910 - }, - { - "epoch": 1.8584411895971114, - "grad_norm": 0.396484375, - "learning_rate": 1.5868741747554061e-06, - "loss": 1.3225, - "step": 1911 - }, - { - "epoch": 1.8594149389378019, - "grad_norm": 0.41796875, - "learning_rate": 1.5844938829070733e-06, - "loss": 1.3308, - "step": 1912 - }, - { - "epoch": 1.8603886882784924, - "grad_norm": 0.3984375, - "learning_rate": 1.5821145491377494e-06, - "loss": 1.3021, - "step": 1913 - }, - { - "epoch": 1.8613624376191829, - "grad_norm": 0.404296875, - "learning_rate": 1.579736175937412e-06, - "loss": 1.3113, - "step": 1914 - }, - { - "epoch": 1.8623361869598734, - "grad_norm": 0.400390625, - "learning_rate": 1.5773587657950338e-06, - "loss": 1.3239, - "step": 1915 - }, - { - "epoch": 1.8633099363005639, - "grad_norm": 0.400390625, - "learning_rate": 1.5749823211985798e-06, - "loss": 1.323, - "step": 1916 - }, - { - "epoch": 1.8642836856412544, - "grad_norm": 0.3984375, - "learning_rate": 1.5726068446350024e-06, - "loss": 1.3104, - "step": 1917 - }, - { - "epoch": 1.865257434981945, - "grad_norm": 0.40625, - "learning_rate": 1.5702323385902435e-06, - "loss": 1.3393, - "step": 1918 - }, - { - "epoch": 1.8662311843226356, - "grad_norm": 0.408203125, - "learning_rate": 1.5678588055492289e-06, - "loss": 1.3089, - "step": 1919 - }, - { - "epoch": 1.8672049336633263, - "grad_norm": 0.396484375, - "learning_rate": 1.5654862479958652e-06, - "loss": 1.306, - "step": 1920 - }, - { - "epoch": 1.8681786830040168, - "grad_norm": 0.396484375, - "learning_rate": 1.5631146684130389e-06, - "loss": 1.3229, - "step": 1921 - }, - { - "epoch": 1.8691524323447073, - "grad_norm": 0.4140625, - "learning_rate": 1.5607440692826132e-06, - "loss": 1.3009, - "step": 1922 - }, - { - "epoch": 1.8701261816853978, - "grad_norm": 0.4140625, - "learning_rate": 1.5583744530854243e-06, - "loss": 1.322, - "step": 1923 - }, - { - "epoch": 1.8710999310260883, - "grad_norm": 0.41015625, - "learning_rate": 1.5560058223012805e-06, - "loss": 1.3319, - "step": 1924 - }, - { - "epoch": 1.8720736803667788, - "grad_norm": 0.40234375, - "learning_rate": 1.553638179408959e-06, - "loss": 1.323, - "step": 1925 - }, - { - "epoch": 1.8730474297074695, - "grad_norm": 0.392578125, - "learning_rate": 1.5512715268862033e-06, - "loss": 1.3077, - "step": 1926 - }, - { - "epoch": 1.87402117904816, - "grad_norm": 0.408203125, - "learning_rate": 1.5489058672097195e-06, - "loss": 1.325, - "step": 1927 - }, - { - "epoch": 1.8749949283888507, - "grad_norm": 0.3984375, - "learning_rate": 1.546541202855175e-06, - "loss": 1.3195, - "step": 1928 - }, - { - "epoch": 1.8759686777295412, - "grad_norm": 0.40625, - "learning_rate": 1.5441775362971955e-06, - "loss": 1.3225, - "step": 1929 - }, - { - "epoch": 1.8769424270702317, - "grad_norm": 0.408203125, - "learning_rate": 1.541814870009364e-06, - "loss": 1.3365, - "step": 1930 - }, - { - "epoch": 1.8779161764109222, - "grad_norm": 0.3984375, - "learning_rate": 1.5394532064642148e-06, - "loss": 1.321, - "step": 1931 - }, - { - "epoch": 1.8788899257516127, - "grad_norm": 0.400390625, - "learning_rate": 1.5370925481332338e-06, - "loss": 1.3168, - "step": 1932 - }, - { - "epoch": 1.8798636750923032, - "grad_norm": 0.400390625, - "learning_rate": 1.534732897486855e-06, - "loss": 1.3231, - "step": 1933 - }, - { - "epoch": 1.8808374244329937, - "grad_norm": 0.4140625, - "learning_rate": 1.5323742569944573e-06, - "loss": 1.3295, - "step": 1934 - }, - { - "epoch": 1.8818111737736845, - "grad_norm": 0.404296875, - "learning_rate": 1.530016629124363e-06, - "loss": 1.294, - "step": 1935 - }, - { - "epoch": 1.882784923114375, - "grad_norm": 0.408203125, - "learning_rate": 1.5276600163438338e-06, - "loss": 1.2921, - "step": 1936 - }, - { - "epoch": 1.8837586724550657, - "grad_norm": 0.388671875, - "learning_rate": 1.5253044211190705e-06, - "loss": 1.2998, - "step": 1937 - }, - { - "epoch": 1.8847324217957562, - "grad_norm": 0.46875, - "learning_rate": 1.522949845915208e-06, - "loss": 1.3007, - "step": 1938 - }, - { - "epoch": 1.8857061711364467, - "grad_norm": 0.3984375, - "learning_rate": 1.5205962931963135e-06, - "loss": 1.329, - "step": 1939 - }, - { - "epoch": 1.8866799204771372, - "grad_norm": 0.40234375, - "learning_rate": 1.5182437654253856e-06, - "loss": 1.2925, - "step": 1940 - }, - { - "epoch": 1.8876536698178277, - "grad_norm": 0.4140625, - "learning_rate": 1.515892265064349e-06, - "loss": 1.2956, - "step": 1941 - }, - { - "epoch": 1.8886274191585182, - "grad_norm": 0.400390625, - "learning_rate": 1.5135417945740533e-06, - "loss": 1.3301, - "step": 1942 - }, - { - "epoch": 1.8896011684992087, - "grad_norm": 0.42578125, - "learning_rate": 1.5111923564142716e-06, - "loss": 1.3004, - "step": 1943 - }, - { - "epoch": 1.8905749178398994, - "grad_norm": 0.421875, - "learning_rate": 1.5088439530436943e-06, - "loss": 1.3135, - "step": 1944 - }, - { - "epoch": 1.89154866718059, - "grad_norm": 0.404296875, - "learning_rate": 1.5064965869199316e-06, - "loss": 1.3066, - "step": 1945 - }, - { - "epoch": 1.8925224165212806, - "grad_norm": 0.39453125, - "learning_rate": 1.5041502604995056e-06, - "loss": 1.3019, - "step": 1946 - }, - { - "epoch": 1.8934961658619711, - "grad_norm": 0.392578125, - "learning_rate": 1.5018049762378528e-06, - "loss": 1.3055, - "step": 1947 - }, - { - "epoch": 1.8944699152026616, - "grad_norm": 0.3984375, - "learning_rate": 1.4994607365893173e-06, - "loss": 1.3339, - "step": 1948 - }, - { - "epoch": 1.8954436645433521, - "grad_norm": 0.3984375, - "learning_rate": 1.4971175440071516e-06, - "loss": 1.3305, - "step": 1949 - }, - { - "epoch": 1.8964174138840426, - "grad_norm": 0.40234375, - "learning_rate": 1.49477540094351e-06, - "loss": 1.3034, - "step": 1950 - }, - { - "epoch": 1.8973911632247331, - "grad_norm": 0.40625, - "learning_rate": 1.492434309849451e-06, - "loss": 1.3158, - "step": 1951 - }, - { - "epoch": 1.8983649125654238, - "grad_norm": 0.3984375, - "learning_rate": 1.4900942731749314e-06, - "loss": 1.3214, - "step": 1952 - }, - { - "epoch": 1.8993386619061143, - "grad_norm": 0.4140625, - "learning_rate": 1.4877552933688033e-06, - "loss": 1.3366, - "step": 1953 - }, - { - "epoch": 1.9003124112468048, - "grad_norm": 0.423828125, - "learning_rate": 1.4854173728788144e-06, - "loss": 1.3046, - "step": 1954 - }, - { - "epoch": 1.9012861605874956, - "grad_norm": 0.40625, - "learning_rate": 1.483080514151603e-06, - "loss": 1.3136, - "step": 1955 - }, - { - "epoch": 1.902259909928186, - "grad_norm": 0.40234375, - "learning_rate": 1.4807447196326967e-06, - "loss": 1.3015, - "step": 1956 - }, - { - "epoch": 1.9032336592688766, - "grad_norm": 0.40625, - "learning_rate": 1.4784099917665094e-06, - "loss": 1.3246, - "step": 1957 - }, - { - "epoch": 1.904207408609567, - "grad_norm": 0.388671875, - "learning_rate": 1.4760763329963378e-06, - "loss": 1.3252, - "step": 1958 - }, - { - "epoch": 1.9051811579502576, - "grad_norm": 0.408203125, - "learning_rate": 1.4737437457643616e-06, - "loss": 1.3049, - "step": 1959 - }, - { - "epoch": 1.906154907290948, - "grad_norm": 0.39453125, - "learning_rate": 1.471412232511638e-06, - "loss": 1.3029, - "step": 1960 - }, - { - "epoch": 1.9071286566316388, - "grad_norm": 0.39453125, - "learning_rate": 1.4690817956781e-06, - "loss": 1.3111, - "step": 1961 - }, - { - "epoch": 1.9081024059723293, - "grad_norm": 0.41796875, - "learning_rate": 1.4667524377025535e-06, - "loss": 1.3004, - "step": 1962 - }, - { - "epoch": 1.90907615531302, - "grad_norm": 0.408203125, - "learning_rate": 1.4644241610226776e-06, - "loss": 1.3212, - "step": 1963 - }, - { - "epoch": 1.9100499046537105, - "grad_norm": 0.3984375, - "learning_rate": 1.462096968075018e-06, - "loss": 1.3024, - "step": 1964 - }, - { - "epoch": 1.911023653994401, - "grad_norm": 0.408203125, - "learning_rate": 1.459770861294987e-06, - "loss": 1.2928, - "step": 1965 - }, - { - "epoch": 1.9119974033350915, - "grad_norm": 0.400390625, - "learning_rate": 1.45744584311686e-06, - "loss": 1.3056, - "step": 1966 - }, - { - "epoch": 1.912971152675782, - "grad_norm": 0.396484375, - "learning_rate": 1.4551219159737728e-06, - "loss": 1.3234, - "step": 1967 - }, - { - "epoch": 1.9139449020164725, - "grad_norm": 0.408203125, - "learning_rate": 1.4527990822977216e-06, - "loss": 1.3294, - "step": 1968 - }, - { - "epoch": 1.914918651357163, - "grad_norm": 0.3984375, - "learning_rate": 1.4504773445195544e-06, - "loss": 1.315, - "step": 1969 - }, - { - "epoch": 1.9158924006978537, - "grad_norm": 0.40625, - "learning_rate": 1.4481567050689764e-06, - "loss": 1.303, - "step": 1970 - }, - { - "epoch": 1.9168661500385442, - "grad_norm": 0.40234375, - "learning_rate": 1.44583716637454e-06, - "loss": 1.3114, - "step": 1971 - }, - { - "epoch": 1.917839899379235, - "grad_norm": 0.3984375, - "learning_rate": 1.4435187308636486e-06, - "loss": 1.3197, - "step": 1972 - }, - { - "epoch": 1.9188136487199254, - "grad_norm": 0.40625, - "learning_rate": 1.4412014009625476e-06, - "loss": 1.3281, - "step": 1973 - }, - { - "epoch": 1.919787398060616, - "grad_norm": 0.404296875, - "learning_rate": 1.438885179096329e-06, - "loss": 1.3262, - "step": 1974 - }, - { - "epoch": 1.9207611474013064, - "grad_norm": 0.40625, - "learning_rate": 1.4365700676889227e-06, - "loss": 1.3261, - "step": 1975 - }, - { - "epoch": 1.921734896741997, - "grad_norm": 0.39453125, - "learning_rate": 1.4342560691630991e-06, - "loss": 1.3037, - "step": 1976 - }, - { - "epoch": 1.9227086460826874, - "grad_norm": 0.3984375, - "learning_rate": 1.4319431859404603e-06, - "loss": 1.3175, - "step": 1977 - }, - { - "epoch": 1.923682395423378, - "grad_norm": 0.40625, - "learning_rate": 1.4296314204414453e-06, - "loss": 1.3127, - "step": 1978 - }, - { - "epoch": 1.9246561447640687, - "grad_norm": 0.400390625, - "learning_rate": 1.427320775085319e-06, - "loss": 1.3142, - "step": 1979 - }, - { - "epoch": 1.9256298941047592, - "grad_norm": 0.396484375, - "learning_rate": 1.4250112522901794e-06, - "loss": 1.3255, - "step": 1980 - }, - { - "epoch": 1.9266036434454499, - "grad_norm": 0.40234375, - "learning_rate": 1.4227028544729448e-06, - "loss": 1.3185, - "step": 1981 - }, - { - "epoch": 1.9275773927861404, - "grad_norm": 0.400390625, - "learning_rate": 1.4203955840493588e-06, - "loss": 1.3147, - "step": 1982 - }, - { - "epoch": 1.9285511421268309, - "grad_norm": 0.412109375, - "learning_rate": 1.4180894434339836e-06, - "loss": 1.3391, - "step": 1983 - }, - { - "epoch": 1.9295248914675214, - "grad_norm": 0.400390625, - "learning_rate": 1.4157844350402017e-06, - "loss": 1.2939, - "step": 1984 - }, - { - "epoch": 1.9304986408082119, - "grad_norm": 0.3984375, - "learning_rate": 1.4134805612802072e-06, - "loss": 1.3273, - "step": 1985 - }, - { - "epoch": 1.9314723901489024, - "grad_norm": 0.41796875, - "learning_rate": 1.4111778245650107e-06, - "loss": 1.3191, - "step": 1986 - }, - { - "epoch": 1.932446139489593, - "grad_norm": 0.408203125, - "learning_rate": 1.4088762273044287e-06, - "loss": 1.3335, - "step": 1987 - }, - { - "epoch": 1.9334198888302836, - "grad_norm": 0.396484375, - "learning_rate": 1.4065757719070896e-06, - "loss": 1.3201, - "step": 1988 - }, - { - "epoch": 1.9343936381709743, - "grad_norm": 0.4140625, - "learning_rate": 1.4042764607804238e-06, - "loss": 1.3164, - "step": 1989 - }, - { - "epoch": 1.9353673875116648, - "grad_norm": 0.41015625, - "learning_rate": 1.4019782963306636e-06, - "loss": 1.3188, - "step": 1990 - }, - { - "epoch": 1.9363411368523553, - "grad_norm": 0.392578125, - "learning_rate": 1.399681280962845e-06, - "loss": 1.3138, - "step": 1991 - }, - { - "epoch": 1.9373148861930458, - "grad_norm": 0.396484375, - "learning_rate": 1.397385417080797e-06, - "loss": 1.3319, - "step": 1992 - }, - { - "epoch": 1.9382886355337363, - "grad_norm": 0.392578125, - "learning_rate": 1.3950907070871483e-06, - "loss": 1.2966, - "step": 1993 - }, - { - "epoch": 1.9392623848744268, - "grad_norm": 0.392578125, - "learning_rate": 1.3927971533833163e-06, - "loss": 1.3071, - "step": 1994 - }, - { - "epoch": 1.9402361342151173, - "grad_norm": 0.40625, - "learning_rate": 1.3905047583695086e-06, - "loss": 1.3234, - "step": 1995 - }, - { - "epoch": 1.941209883555808, - "grad_norm": 0.4140625, - "learning_rate": 1.388213524444723e-06, - "loss": 1.3245, - "step": 1996 - }, - { - "epoch": 1.9421836328964985, - "grad_norm": 0.40234375, - "learning_rate": 1.3859234540067397e-06, - "loss": 1.2904, - "step": 1997 - }, - { - "epoch": 1.9431573822371893, - "grad_norm": 0.3984375, - "learning_rate": 1.3836345494521215e-06, - "loss": 1.3329, - "step": 1998 - }, - { - "epoch": 1.9441311315778798, - "grad_norm": 0.40625, - "learning_rate": 1.3813468131762126e-06, - "loss": 1.3248, - "step": 1999 - }, - { - "epoch": 1.9451048809185703, - "grad_norm": 0.404296875, - "learning_rate": 1.3790602475731323e-06, - "loss": 1.3367, - "step": 2000 - }, - { - "epoch": 1.9460786302592608, - "grad_norm": 0.40234375, - "learning_rate": 1.3767748550357785e-06, - "loss": 1.3169, - "step": 2001 - }, - { - "epoch": 1.9470523795999513, - "grad_norm": 0.41015625, - "learning_rate": 1.3744906379558165e-06, - "loss": 1.3082, - "step": 2002 - }, - { - "epoch": 1.9480261289406418, - "grad_norm": 0.40625, - "learning_rate": 1.3722075987236865e-06, - "loss": 1.3302, - "step": 2003 - }, - { - "epoch": 1.9489998782813323, - "grad_norm": 0.390625, - "learning_rate": 1.3699257397285926e-06, - "loss": 1.3229, - "step": 2004 - }, - { - "epoch": 1.949973627622023, - "grad_norm": 0.3984375, - "learning_rate": 1.3676450633585037e-06, - "loss": 1.2998, - "step": 2005 - }, - { - "epoch": 1.9509473769627135, - "grad_norm": 0.404296875, - "learning_rate": 1.365365572000154e-06, - "loss": 1.3457, - "step": 2006 - }, - { - "epoch": 1.9519211263034042, - "grad_norm": 0.392578125, - "learning_rate": 1.3630872680390356e-06, - "loss": 1.3216, - "step": 2007 - }, - { - "epoch": 1.9528948756440947, - "grad_norm": 0.392578125, - "learning_rate": 1.3608101538593965e-06, - "loss": 1.3328, - "step": 2008 - }, - { - "epoch": 1.9538686249847852, - "grad_norm": 0.400390625, - "learning_rate": 1.3585342318442434e-06, - "loss": 1.2837, - "step": 2009 - }, - { - "epoch": 1.9548423743254757, - "grad_norm": 0.39453125, - "learning_rate": 1.3562595043753318e-06, - "loss": 1.32, - "step": 2010 - }, - { - "epoch": 1.9558161236661662, - "grad_norm": 0.388671875, - "learning_rate": 1.3539859738331707e-06, - "loss": 1.3208, - "step": 2011 - }, - { - "epoch": 1.9567898730068567, - "grad_norm": 0.3984375, - "learning_rate": 1.3517136425970115e-06, - "loss": 1.3032, - "step": 2012 - }, - { - "epoch": 1.9577636223475474, - "grad_norm": 0.40625, - "learning_rate": 1.3494425130448562e-06, - "loss": 1.3074, - "step": 2013 - }, - { - "epoch": 1.958737371688238, - "grad_norm": 0.400390625, - "learning_rate": 1.347172587553444e-06, - "loss": 1.3137, - "step": 2014 - }, - { - "epoch": 1.9597111210289284, - "grad_norm": 0.400390625, - "learning_rate": 1.3449038684982602e-06, - "loss": 1.3113, - "step": 2015 - }, - { - "epoch": 1.9606848703696191, - "grad_norm": 0.40625, - "learning_rate": 1.3426363582535193e-06, - "loss": 1.3086, - "step": 2016 - }, - { - "epoch": 1.9616586197103096, - "grad_norm": 0.388671875, - "learning_rate": 1.3403700591921787e-06, - "loss": 1.3039, - "step": 2017 - }, - { - "epoch": 1.9626323690510001, - "grad_norm": 0.3984375, - "learning_rate": 1.3381049736859225e-06, - "loss": 1.3114, - "step": 2018 - }, - { - "epoch": 1.9636061183916906, - "grad_norm": 0.408203125, - "learning_rate": 1.33584110410517e-06, - "loss": 1.319, - "step": 2019 - }, - { - "epoch": 1.9645798677323811, - "grad_norm": 0.40625, - "learning_rate": 1.3335784528190627e-06, - "loss": 1.3085, - "step": 2020 - }, - { - "epoch": 1.9655536170730716, - "grad_norm": 0.39453125, - "learning_rate": 1.3313170221954719e-06, - "loss": 1.3145, - "step": 2021 - }, - { - "epoch": 1.9665273664137624, - "grad_norm": 0.408203125, - "learning_rate": 1.3290568146009874e-06, - "loss": 1.3138, - "step": 2022 - }, - { - "epoch": 1.9675011157544529, - "grad_norm": 0.40234375, - "learning_rate": 1.3267978324009235e-06, - "loss": 1.3036, - "step": 2023 - }, - { - "epoch": 1.9684748650951436, - "grad_norm": 0.400390625, - "learning_rate": 1.3245400779593067e-06, - "loss": 1.3135, - "step": 2024 - }, - { - "epoch": 1.969448614435834, - "grad_norm": 0.39453125, - "learning_rate": 1.3222835536388845e-06, - "loss": 1.307, - "step": 2025 - }, - { - "epoch": 1.9704223637765246, - "grad_norm": 0.3984375, - "learning_rate": 1.3200282618011115e-06, - "loss": 1.3144, - "step": 2026 - }, - { - "epoch": 1.971396113117215, - "grad_norm": 0.404296875, - "learning_rate": 1.3177742048061587e-06, - "loss": 1.3344, - "step": 2027 - }, - { - "epoch": 1.9723698624579056, - "grad_norm": 0.40625, - "learning_rate": 1.3155213850128968e-06, - "loss": 1.2997, - "step": 2028 - }, - { - "epoch": 1.973343611798596, - "grad_norm": 0.39453125, - "learning_rate": 1.31326980477891e-06, - "loss": 1.3037, - "step": 2029 - }, - { - "epoch": 1.9743173611392866, - "grad_norm": 0.3984375, - "learning_rate": 1.3110194664604786e-06, - "loss": 1.3135, - "step": 2030 - }, - { - "epoch": 1.9752911104799773, - "grad_norm": 0.39453125, - "learning_rate": 1.3087703724125895e-06, - "loss": 1.3181, - "step": 2031 - }, - { - "epoch": 1.9762648598206678, - "grad_norm": 0.39453125, - "learning_rate": 1.3065225249889208e-06, - "loss": 1.3113, - "step": 2032 - }, - { - "epoch": 1.9772386091613585, - "grad_norm": 0.40625, - "learning_rate": 1.3042759265418523e-06, - "loss": 1.3233, - "step": 2033 - }, - { - "epoch": 1.978212358502049, - "grad_norm": 0.40234375, - "learning_rate": 1.3020305794224516e-06, - "loss": 1.3193, - "step": 2034 - }, - { - "epoch": 1.9791861078427395, - "grad_norm": 0.400390625, - "learning_rate": 1.299786485980481e-06, - "loss": 1.3201, - "step": 2035 - }, - { - "epoch": 1.98015985718343, - "grad_norm": 0.404296875, - "learning_rate": 1.2975436485643865e-06, - "loss": 1.3062, - "step": 2036 - }, - { - "epoch": 1.9811336065241205, - "grad_norm": 0.396484375, - "learning_rate": 1.2953020695213048e-06, - "loss": 1.3092, - "step": 2037 - }, - { - "epoch": 1.982107355864811, - "grad_norm": 0.412109375, - "learning_rate": 1.2930617511970514e-06, - "loss": 1.3274, - "step": 2038 - }, - { - "epoch": 1.9830811052055017, - "grad_norm": 0.40234375, - "learning_rate": 1.2908226959361241e-06, - "loss": 1.293, - "step": 2039 - }, - { - "epoch": 1.9840548545461922, - "grad_norm": 0.392578125, - "learning_rate": 1.2885849060816973e-06, - "loss": 1.304, - "step": 2040 - }, - { - "epoch": 1.9850286038868827, - "grad_norm": 0.400390625, - "learning_rate": 1.2863483839756254e-06, - "loss": 1.3259, - "step": 2041 - }, - { - "epoch": 1.9860023532275735, - "grad_norm": 0.396484375, - "learning_rate": 1.284113131958431e-06, - "loss": 1.3236, - "step": 2042 - }, - { - "epoch": 1.986976102568264, - "grad_norm": 0.412109375, - "learning_rate": 1.2818791523693114e-06, - "loss": 1.3307, - "step": 2043 - }, - { - "epoch": 1.9879498519089545, - "grad_norm": 0.390625, - "learning_rate": 1.2796464475461296e-06, - "loss": 1.317, - "step": 2044 - }, - { - "epoch": 1.988923601249645, - "grad_norm": 0.392578125, - "learning_rate": 1.277415019825417e-06, - "loss": 1.3171, - "step": 2045 - }, - { - "epoch": 1.9898973505903355, - "grad_norm": 0.404296875, - "learning_rate": 1.275184871542366e-06, - "loss": 1.3081, - "step": 2046 - }, - { - "epoch": 1.990871099931026, - "grad_norm": 0.408203125, - "learning_rate": 1.2729560050308325e-06, - "loss": 1.328, - "step": 2047 - }, - { - "epoch": 1.9918448492717167, - "grad_norm": 0.3984375, - "learning_rate": 1.2707284226233283e-06, - "loss": 1.3265, - "step": 2048 - }, - { - "epoch": 1.9928185986124072, - "grad_norm": 0.388671875, - "learning_rate": 1.2685021266510256e-06, - "loss": 1.3072, - "step": 2049 - }, - { - "epoch": 1.993792347953098, - "grad_norm": 0.408203125, - "learning_rate": 1.266277119443744e-06, - "loss": 1.3155, - "step": 2050 - }, - { - "epoch": 1.9947660972937884, - "grad_norm": 0.400390625, - "learning_rate": 1.264053403329961e-06, - "loss": 1.3152, - "step": 2051 - }, - { - "epoch": 1.995739846634479, - "grad_norm": 0.396484375, - "learning_rate": 1.261830980636798e-06, - "loss": 1.2953, - "step": 2052 - }, - { - "epoch": 1.9967135959751694, - "grad_norm": 0.400390625, - "learning_rate": 1.2596098536900263e-06, - "loss": 1.3345, - "step": 2053 - }, - { - "epoch": 1.99768734531586, - "grad_norm": 0.392578125, - "learning_rate": 1.2573900248140586e-06, - "loss": 1.3066, - "step": 2054 - }, - { - "epoch": 1.9986610946565504, - "grad_norm": 0.3984375, - "learning_rate": 1.255171496331952e-06, - "loss": 1.3078, - "step": 2055 - }, - { - "epoch": 1.999634843997241, - "grad_norm": 0.396484375, - "learning_rate": 1.2529542705653992e-06, - "loss": 1.3204, - "step": 2056 - }, - { - "epoch": 2.0006085933379314, - "grad_norm": 0.41015625, - "learning_rate": 1.2507383498347328e-06, - "loss": 1.3295, - "step": 2057 - }, - { - "epoch": 2.0015823426786223, - "grad_norm": 0.400390625, - "learning_rate": 1.248523736458917e-06, - "loss": 1.3092, - "step": 2058 - }, - { - "epoch": 2.0015823426786223, - "eval_loss": 1.337652564048767, - "eval_runtime": 1517.6913, - "eval_samples_per_second": 27.512, - "eval_steps_per_second": 3.439, - "step": 2058 - }, - { - "epoch": 2.000162265222506, - "grad_norm": 0.408203125, - "learning_rate": 1.246310432755551e-06, - "loss": 1.3348, - "step": 2059 - }, - { - "epoch": 2.0011358565575432, - "grad_norm": 0.412109375, - "learning_rate": 1.2440984410408607e-06, - "loss": 1.3299, - "step": 2060 - }, - { - "epoch": 2.0021094478925803, - "grad_norm": 0.390625, - "learning_rate": 1.2418877636297e-06, - "loss": 1.3285, - "step": 2061 - }, - { - "epoch": 2.0030830392276173, - "grad_norm": 0.41015625, - "learning_rate": 1.239678402835546e-06, - "loss": 1.2974, - "step": 2062 - }, - { - "epoch": 2.004056630562655, - "grad_norm": 0.40234375, - "learning_rate": 1.2374703609705014e-06, - "loss": 1.3299, - "step": 2063 - }, - { - "epoch": 2.005030221897692, - "grad_norm": 0.40234375, - "learning_rate": 1.2352636403452844e-06, - "loss": 1.3071, - "step": 2064 - }, - { - "epoch": 2.006003813232729, - "grad_norm": 0.39453125, - "learning_rate": 1.2330582432692349e-06, - "loss": 1.3317, - "step": 2065 - }, - { - "epoch": 2.006977404567766, - "grad_norm": 0.400390625, - "learning_rate": 1.2308541720503029e-06, - "loss": 1.3287, - "step": 2066 - }, - { - "epoch": 2.007950995902803, - "grad_norm": 0.3984375, - "learning_rate": 1.228651428995056e-06, - "loss": 1.3298, - "step": 2067 - }, - { - "epoch": 2.00892458723784, - "grad_norm": 0.396484375, - "learning_rate": 1.2264500164086667e-06, - "loss": 1.325, - "step": 2068 - }, - { - "epoch": 2.0098981785728776, - "grad_norm": 0.392578125, - "learning_rate": 1.2242499365949203e-06, - "loss": 1.3052, - "step": 2069 - }, - { - "epoch": 2.0108717699079146, - "grad_norm": 0.392578125, - "learning_rate": 1.2220511918562036e-06, - "loss": 1.3097, - "step": 2070 - }, - { - "epoch": 2.0118453612429517, - "grad_norm": 0.40234375, - "learning_rate": 1.219853784493507e-06, - "loss": 1.3139, - "step": 2071 - }, - { - "epoch": 2.0128189525779887, - "grad_norm": 0.40234375, - "learning_rate": 1.2176577168064213e-06, - "loss": 1.3104, - "step": 2072 - }, - { - "epoch": 2.0137925439130258, - "grad_norm": 0.40234375, - "learning_rate": 1.2154629910931376e-06, - "loss": 1.3206, - "step": 2073 - }, - { - "epoch": 2.014766135248063, - "grad_norm": 0.392578125, - "learning_rate": 1.213269609650439e-06, - "loss": 1.3183, - "step": 2074 - }, - { - "epoch": 2.0157397265831003, - "grad_norm": 0.3984375, - "learning_rate": 1.2110775747737052e-06, - "loss": 1.3096, - "step": 2075 - }, - { - "epoch": 2.0167133179181373, - "grad_norm": 0.400390625, - "learning_rate": 1.2088868887569036e-06, - "loss": 1.3128, - "step": 2076 - }, - { - "epoch": 2.0176869092531744, - "grad_norm": 0.400390625, - "learning_rate": 1.206697553892593e-06, - "loss": 1.3306, - "step": 2077 - }, - { - "epoch": 2.0186605005882114, - "grad_norm": 0.384765625, - "learning_rate": 1.2045095724719156e-06, - "loss": 1.3094, - "step": 2078 - }, - { - "epoch": 2.0196340919232485, - "grad_norm": 0.396484375, - "learning_rate": 1.2023229467845996e-06, - "loss": 1.342, - "step": 2079 - }, - { - "epoch": 2.0206076832582855, - "grad_norm": 0.388671875, - "learning_rate": 1.2001376791189526e-06, - "loss": 1.3286, - "step": 2080 - }, - { - "epoch": 2.0215812745933226, - "grad_norm": 0.40625, - "learning_rate": 1.1979537717618605e-06, - "loss": 1.3285, - "step": 2081 - }, - { - "epoch": 2.02255486592836, - "grad_norm": 0.396484375, - "learning_rate": 1.195771226998789e-06, - "loss": 1.3068, - "step": 2082 - }, - { - "epoch": 2.023528457263397, - "grad_norm": 0.400390625, - "learning_rate": 1.1935900471137742e-06, - "loss": 1.3012, - "step": 2083 - }, - { - "epoch": 2.024502048598434, - "grad_norm": 0.39453125, - "learning_rate": 1.191410234389425e-06, - "loss": 1.3158, - "step": 2084 - }, - { - "epoch": 2.0254756399334712, - "grad_norm": 0.39453125, - "learning_rate": 1.1892317911069212e-06, - "loss": 1.3224, - "step": 2085 - }, - { - "epoch": 2.0264492312685083, - "grad_norm": 0.39453125, - "learning_rate": 1.1870547195460063e-06, - "loss": 1.3266, - "step": 2086 - }, - { - "epoch": 2.0274228226035453, - "grad_norm": 0.3984375, - "learning_rate": 1.1848790219849923e-06, - "loss": 1.321, - "step": 2087 - }, - { - "epoch": 2.028396413938583, - "grad_norm": 0.396484375, - "learning_rate": 1.1827047007007497e-06, - "loss": 1.3121, - "step": 2088 - }, - { - "epoch": 2.02937000527362, - "grad_norm": 0.392578125, - "learning_rate": 1.1805317579687095e-06, - "loss": 1.3191, - "step": 2089 - }, - { - "epoch": 2.030343596608657, - "grad_norm": 0.404296875, - "learning_rate": 1.1783601960628624e-06, - "loss": 1.3041, - "step": 2090 - }, - { - "epoch": 2.031317187943694, - "grad_norm": 0.40625, - "learning_rate": 1.1761900172557508e-06, - "loss": 1.3053, - "step": 2091 - }, - { - "epoch": 2.032290779278731, - "grad_norm": 0.3984375, - "learning_rate": 1.1740212238184726e-06, - "loss": 1.3006, - "step": 2092 - }, - { - "epoch": 2.033264370613768, - "grad_norm": 0.400390625, - "learning_rate": 1.171853818020674e-06, - "loss": 1.3158, - "step": 2093 - }, - { - "epoch": 2.034237961948805, - "grad_norm": 0.392578125, - "learning_rate": 1.1696878021305483e-06, - "loss": 1.3132, - "step": 2094 - }, - { - "epoch": 2.0352115532838426, - "grad_norm": 0.400390625, - "learning_rate": 1.1675231784148378e-06, - "loss": 1.3303, - "step": 2095 - }, - { - "epoch": 2.0361851446188797, - "grad_norm": 0.390625, - "learning_rate": 1.1653599491388234e-06, - "loss": 1.3153, - "step": 2096 - }, - { - "epoch": 2.0371587359539167, - "grad_norm": 0.396484375, - "learning_rate": 1.1631981165663308e-06, - "loss": 1.3044, - "step": 2097 - }, - { - "epoch": 2.0381323272889538, - "grad_norm": 0.41015625, - "learning_rate": 1.1610376829597215e-06, - "loss": 1.3269, - "step": 2098 - }, - { - "epoch": 2.039105918623991, - "grad_norm": 0.40625, - "learning_rate": 1.1588786505798924e-06, - "loss": 1.3155, - "step": 2099 - }, - { - "epoch": 2.040079509959028, - "grad_norm": 0.3984375, - "learning_rate": 1.1567210216862773e-06, - "loss": 1.314, - "step": 2100 - }, - { - "epoch": 2.0410531012940654, - "grad_norm": 0.404296875, - "learning_rate": 1.1545647985368375e-06, - "loss": 1.3135, - "step": 2101 - }, - { - "epoch": 2.0420266926291024, - "grad_norm": 0.408203125, - "learning_rate": 1.1524099833880667e-06, - "loss": 1.3141, - "step": 2102 - }, - { - "epoch": 2.0430002839641395, - "grad_norm": 0.390625, - "learning_rate": 1.150256578494981e-06, - "loss": 1.3114, - "step": 2103 - }, - { - "epoch": 2.0439738752991765, - "grad_norm": 0.38671875, - "learning_rate": 1.1481045861111256e-06, - "loss": 1.3159, - "step": 2104 - }, - { - "epoch": 2.0449474666342136, - "grad_norm": 0.40234375, - "learning_rate": 1.1459540084885635e-06, - "loss": 1.3034, - "step": 2105 - }, - { - "epoch": 2.0459210579692506, - "grad_norm": 0.390625, - "learning_rate": 1.1438048478778786e-06, - "loss": 1.3174, - "step": 2106 - }, - { - "epoch": 2.0468946493042877, - "grad_norm": 0.3984375, - "learning_rate": 1.1416571065281706e-06, - "loss": 1.3028, - "step": 2107 - }, - { - "epoch": 2.047868240639325, - "grad_norm": 0.39453125, - "learning_rate": 1.1395107866870579e-06, - "loss": 1.3072, - "step": 2108 - }, - { - "epoch": 2.048841831974362, - "grad_norm": 0.39453125, - "learning_rate": 1.1373658906006656e-06, - "loss": 1.2953, - "step": 2109 - }, - { - "epoch": 2.0498154233093993, - "grad_norm": 0.392578125, - "learning_rate": 1.1352224205136348e-06, - "loss": 1.2935, - "step": 2110 - }, - { - "epoch": 2.0507890146444363, - "grad_norm": 0.408203125, - "learning_rate": 1.1330803786691086e-06, - "loss": 1.3207, - "step": 2111 - }, - { - "epoch": 2.0517626059794734, - "grad_norm": 0.40234375, - "learning_rate": 1.1309397673087405e-06, - "loss": 1.3186, - "step": 2112 - }, - { - "epoch": 2.0527361973145104, - "grad_norm": 0.400390625, - "learning_rate": 1.1288005886726823e-06, - "loss": 1.2882, - "step": 2113 - }, - { - "epoch": 2.053709788649548, - "grad_norm": 0.388671875, - "learning_rate": 1.1266628449995913e-06, - "loss": 1.2869, - "step": 2114 - }, - { - "epoch": 2.054683379984585, - "grad_norm": 0.3984375, - "learning_rate": 1.1245265385266187e-06, - "loss": 1.3011, - "step": 2115 - }, - { - "epoch": 2.055656971319622, - "grad_norm": 0.396484375, - "learning_rate": 1.1223916714894146e-06, - "loss": 1.3219, - "step": 2116 - }, - { - "epoch": 2.056630562654659, - "grad_norm": 0.40625, - "learning_rate": 1.1202582461221202e-06, - "loss": 1.3128, - "step": 2117 - }, - { - "epoch": 2.057604153989696, - "grad_norm": 0.396484375, - "learning_rate": 1.1181262646573712e-06, - "loss": 1.3329, - "step": 2118 - }, - { - "epoch": 2.058577745324733, - "grad_norm": 0.40234375, - "learning_rate": 1.1159957293262888e-06, - "loss": 1.2994, - "step": 2119 - }, - { - "epoch": 2.05955133665977, - "grad_norm": 0.404296875, - "learning_rate": 1.1138666423584848e-06, - "loss": 1.3412, - "step": 2120 - }, - { - "epoch": 2.0605249279948077, - "grad_norm": 0.400390625, - "learning_rate": 1.1117390059820507e-06, - "loss": 1.3094, - "step": 2121 - }, - { - "epoch": 2.0614985193298447, - "grad_norm": 0.388671875, - "learning_rate": 1.1096128224235644e-06, - "loss": 1.2937, - "step": 2122 - }, - { - "epoch": 2.062472110664882, - "grad_norm": 0.390625, - "learning_rate": 1.1074880939080793e-06, - "loss": 1.338, - "step": 2123 - }, - { - "epoch": 2.063445701999919, - "grad_norm": 0.396484375, - "learning_rate": 1.1053648226591298e-06, - "loss": 1.3177, - "step": 2124 - }, - { - "epoch": 2.064419293334956, - "grad_norm": 0.40625, - "learning_rate": 1.1032430108987221e-06, - "loss": 1.322, - "step": 2125 - }, - { - "epoch": 2.065392884669993, - "grad_norm": 0.39453125, - "learning_rate": 1.1011226608473391e-06, - "loss": 1.3191, - "step": 2126 - }, - { - "epoch": 2.0663664760050304, - "grad_norm": 0.400390625, - "learning_rate": 1.0990037747239278e-06, - "loss": 1.3111, - "step": 2127 - }, - { - "epoch": 2.0673400673400675, - "grad_norm": 0.396484375, - "learning_rate": 1.0968863547459096e-06, - "loss": 1.3288, - "step": 2128 - }, - { - "epoch": 2.0683136586751045, - "grad_norm": 0.400390625, - "learning_rate": 1.0947704031291668e-06, - "loss": 1.3115, - "step": 2129 - }, - { - "epoch": 2.0692872500101416, - "grad_norm": 0.390625, - "learning_rate": 1.092655922088049e-06, - "loss": 1.3068, - "step": 2130 - }, - { - "epoch": 2.0702608413451786, - "grad_norm": 0.39453125, - "learning_rate": 1.0905429138353628e-06, - "loss": 1.3019, - "step": 2131 - }, - { - "epoch": 2.0712344326802157, - "grad_norm": 0.400390625, - "learning_rate": 1.0884313805823783e-06, - "loss": 1.2997, - "step": 2132 - }, - { - "epoch": 2.0722080240152527, - "grad_norm": 0.396484375, - "learning_rate": 1.0863213245388167e-06, - "loss": 1.3196, - "step": 2133 - }, - { - "epoch": 2.07318161535029, - "grad_norm": 0.39453125, - "learning_rate": 1.0842127479128579e-06, - "loss": 1.3132, - "step": 2134 - }, - { - "epoch": 2.0741552066853273, - "grad_norm": 0.400390625, - "learning_rate": 1.0821056529111296e-06, - "loss": 1.3221, - "step": 2135 - }, - { - "epoch": 2.0751287980203643, - "grad_norm": 0.408203125, - "learning_rate": 1.0800000417387132e-06, - "loss": 1.3214, - "step": 2136 - }, - { - "epoch": 2.0761023893554014, - "grad_norm": 0.392578125, - "learning_rate": 1.0778959165991342e-06, - "loss": 1.3201, - "step": 2137 - }, - { - "epoch": 2.0770759806904384, - "grad_norm": 0.40234375, - "learning_rate": 1.0757932796943637e-06, - "loss": 1.3115, - "step": 2138 - }, - { - "epoch": 2.0780495720254755, - "grad_norm": 0.41015625, - "learning_rate": 1.0736921332248145e-06, - "loss": 1.3333, - "step": 2139 - }, - { - "epoch": 2.079023163360513, - "grad_norm": 0.404296875, - "learning_rate": 1.0715924793893425e-06, - "loss": 1.3109, - "step": 2140 - }, - { - "epoch": 2.07999675469555, - "grad_norm": 0.388671875, - "learning_rate": 1.0694943203852382e-06, - "loss": 1.3066, - "step": 2141 - }, - { - "epoch": 2.080970346030587, - "grad_norm": 0.40625, - "learning_rate": 1.0673976584082304e-06, - "loss": 1.3067, - "step": 2142 - }, - { - "epoch": 2.081943937365624, - "grad_norm": 0.40625, - "learning_rate": 1.0653024956524788e-06, - "loss": 1.3261, - "step": 2143 - }, - { - "epoch": 2.082917528700661, - "grad_norm": 0.408203125, - "learning_rate": 1.063208834310577e-06, - "loss": 1.3376, - "step": 2144 - }, - { - "epoch": 2.083891120035698, - "grad_norm": 0.404296875, - "learning_rate": 1.0611166765735442e-06, - "loss": 1.3078, - "step": 2145 - }, - { - "epoch": 2.0848647113707353, - "grad_norm": 0.400390625, - "learning_rate": 1.0590260246308296e-06, - "loss": 1.3155, - "step": 2146 - }, - { - "epoch": 2.0858383027057728, - "grad_norm": 0.384765625, - "learning_rate": 1.0569368806703029e-06, - "loss": 1.3193, - "step": 2147 - }, - { - "epoch": 2.08681189404081, - "grad_norm": 0.40234375, - "learning_rate": 1.0548492468782598e-06, - "loss": 1.3227, - "step": 2148 - }, - { - "epoch": 2.087785485375847, - "grad_norm": 0.3984375, - "learning_rate": 1.0527631254394103e-06, - "loss": 1.3228, - "step": 2149 - }, - { - "epoch": 2.088759076710884, - "grad_norm": 0.40234375, - "learning_rate": 1.050678518536887e-06, - "loss": 1.3149, - "step": 2150 - }, - { - "epoch": 2.089732668045921, - "grad_norm": 0.412109375, - "learning_rate": 1.0485954283522335e-06, - "loss": 1.3298, - "step": 2151 - }, - { - "epoch": 2.090706259380958, - "grad_norm": 0.396484375, - "learning_rate": 1.0465138570654096e-06, - "loss": 1.3265, - "step": 2152 - }, - { - "epoch": 2.0916798507159955, - "grad_norm": 0.40625, - "learning_rate": 1.0444338068547824e-06, - "loss": 1.3284, - "step": 2153 - }, - { - "epoch": 2.0926534420510325, - "grad_norm": 0.408203125, - "learning_rate": 1.0423552798971303e-06, - "loss": 1.3074, - "step": 2154 - }, - { - "epoch": 2.0936270333860696, - "grad_norm": 0.400390625, - "learning_rate": 1.0402782783676343e-06, - "loss": 1.2978, - "step": 2155 - }, - { - "epoch": 2.0946006247211066, - "grad_norm": 0.396484375, - "learning_rate": 1.0382028044398823e-06, - "loss": 1.3092, - "step": 2156 - }, - { - "epoch": 2.0955742160561437, - "grad_norm": 0.396484375, - "learning_rate": 1.0361288602858605e-06, - "loss": 1.3075, - "step": 2157 - }, - { - "epoch": 2.0965478073911807, - "grad_norm": 0.39453125, - "learning_rate": 1.0340564480759568e-06, - "loss": 1.3138, - "step": 2158 - }, - { - "epoch": 2.097521398726218, - "grad_norm": 0.404296875, - "learning_rate": 1.0319855699789546e-06, - "loss": 1.3134, - "step": 2159 - }, - { - "epoch": 2.0984949900612553, - "grad_norm": 0.404296875, - "learning_rate": 1.0299162281620318e-06, - "loss": 1.3094, - "step": 2160 - }, - { - "epoch": 2.0994685813962923, - "grad_norm": 0.396484375, - "learning_rate": 1.0278484247907578e-06, - "loss": 1.3168, - "step": 2161 - }, - { - "epoch": 2.1004421727313294, - "grad_norm": 0.412109375, - "learning_rate": 1.0257821620290948e-06, - "loss": 1.3154, - "step": 2162 - }, - { - "epoch": 2.1014157640663664, - "grad_norm": 0.390625, - "learning_rate": 1.0237174420393894e-06, - "loss": 1.3323, - "step": 2163 - }, - { - "epoch": 2.1023893554014035, - "grad_norm": 0.40234375, - "learning_rate": 1.0216542669823768e-06, - "loss": 1.3194, - "step": 2164 - }, - { - "epoch": 2.1033629467364405, - "grad_norm": 0.396484375, - "learning_rate": 1.0195926390171724e-06, - "loss": 1.3194, - "step": 2165 - }, - { - "epoch": 2.104336538071478, - "grad_norm": 0.3984375, - "learning_rate": 1.0175325603012754e-06, - "loss": 1.3186, - "step": 2166 - }, - { - "epoch": 2.105310129406515, - "grad_norm": 0.419921875, - "learning_rate": 1.0154740329905607e-06, - "loss": 1.32, - "step": 2167 - }, - { - "epoch": 2.106283720741552, - "grad_norm": 0.396484375, - "learning_rate": 1.0134170592392837e-06, - "loss": 1.3077, - "step": 2168 - }, - { - "epoch": 2.107257312076589, - "grad_norm": 0.396484375, - "learning_rate": 1.0113616412000688e-06, - "loss": 1.3067, - "step": 2169 - }, - { - "epoch": 2.1082309034116262, - "grad_norm": 0.3984375, - "learning_rate": 1.0093077810239185e-06, - "loss": 1.328, - "step": 2170 - }, - { - "epoch": 2.1092044947466633, - "grad_norm": 0.38671875, - "learning_rate": 1.0072554808601981e-06, - "loss": 1.3042, - "step": 2171 - }, - { - "epoch": 2.1101780860817003, - "grad_norm": 0.396484375, - "learning_rate": 1.0052047428566461e-06, - "loss": 1.3384, - "step": 2172 - }, - { - "epoch": 2.111151677416738, - "grad_norm": 0.40234375, - "learning_rate": 1.0031555691593627e-06, - "loss": 1.3132, - "step": 2173 - }, - { - "epoch": 2.112125268751775, - "grad_norm": 0.38671875, - "learning_rate": 1.001107961912814e-06, - "loss": 1.303, - "step": 2174 - }, - { - "epoch": 2.113098860086812, - "grad_norm": 0.408203125, - "learning_rate": 9.990619232598228e-07, - "loss": 1.2988, - "step": 2175 - }, - { - "epoch": 2.114072451421849, - "grad_norm": 0.400390625, - "learning_rate": 9.970174553415754e-07, - "loss": 1.2889, - "step": 2176 - }, - { - "epoch": 2.115046042756886, - "grad_norm": 0.404296875, - "learning_rate": 9.94974560297609e-07, - "loss": 1.3143, - "step": 2177 - }, - { - "epoch": 2.116019634091923, - "grad_norm": 0.39453125, - "learning_rate": 9.929332402658198e-07, - "loss": 1.3233, - "step": 2178 - }, - { - "epoch": 2.1169932254269606, - "grad_norm": 0.40625, - "learning_rate": 9.908934973824522e-07, - "loss": 1.3423, - "step": 2179 - }, - { - "epoch": 2.1179668167619976, - "grad_norm": 0.400390625, - "learning_rate": 9.888553337821005e-07, - "loss": 1.3449, - "step": 2180 - }, - { - "epoch": 2.1189404080970347, - "grad_norm": 0.3984375, - "learning_rate": 9.868187515977085e-07, - "loss": 1.3163, - "step": 2181 - }, - { - "epoch": 2.1199139994320717, - "grad_norm": 0.3984375, - "learning_rate": 9.847837529605631e-07, - "loss": 1.3289, - "step": 2182 - }, - { - "epoch": 2.1208875907671088, - "grad_norm": 0.396484375, - "learning_rate": 9.827503400002933e-07, - "loss": 1.3202, - "step": 2183 - }, - { - "epoch": 2.121861182102146, - "grad_norm": 0.400390625, - "learning_rate": 9.80718514844872e-07, - "loss": 1.3237, - "step": 2184 - }, - { - "epoch": 2.122834773437183, - "grad_norm": 0.404296875, - "learning_rate": 9.786882796206062e-07, - "loss": 1.3416, - "step": 2185 - }, - { - "epoch": 2.1238083647722203, - "grad_norm": 0.408203125, - "learning_rate": 9.76659636452143e-07, - "loss": 1.325, - "step": 2186 - }, - { - "epoch": 2.1247819561072574, - "grad_norm": 0.400390625, - "learning_rate": 9.74632587462461e-07, - "loss": 1.3403, - "step": 2187 - }, - { - "epoch": 2.1257555474422944, - "grad_norm": 0.400390625, - "learning_rate": 9.726071347728702e-07, - "loss": 1.2848, - "step": 2188 - }, - { - "epoch": 2.1267291387773315, - "grad_norm": 0.3984375, - "learning_rate": 9.705832805030124e-07, - "loss": 1.3335, - "step": 2189 - }, - { - "epoch": 2.1277027301123685, - "grad_norm": 0.404296875, - "learning_rate": 9.685610267708533e-07, - "loss": 1.3331, - "step": 2190 - }, - { - "epoch": 2.1286763214474056, - "grad_norm": 0.396484375, - "learning_rate": 9.66540375692688e-07, - "loss": 1.3259, - "step": 2191 - }, - { - "epoch": 2.129649912782443, - "grad_norm": 0.404296875, - "learning_rate": 9.645213293831304e-07, - "loss": 1.3321, - "step": 2192 - }, - { - "epoch": 2.13062350411748, - "grad_norm": 0.3984375, - "learning_rate": 9.625038899551162e-07, - "loss": 1.3234, - "step": 2193 - }, - { - "epoch": 2.131597095452517, - "grad_norm": 0.40234375, - "learning_rate": 9.604880595199011e-07, - "loss": 1.3221, - "step": 2194 - }, - { - "epoch": 2.1325706867875542, - "grad_norm": 0.404296875, - "learning_rate": 9.584738401870545e-07, - "loss": 1.3266, - "step": 2195 - }, - { - "epoch": 2.1335442781225913, - "grad_norm": 0.390625, - "learning_rate": 9.564612340644622e-07, - "loss": 1.2918, - "step": 2196 - }, - { - "epoch": 2.1345178694576283, - "grad_norm": 0.400390625, - "learning_rate": 9.5445024325832e-07, - "loss": 1.2987, - "step": 2197 - }, - { - "epoch": 2.1354914607926654, - "grad_norm": 0.3984375, - "learning_rate": 9.52440869873133e-07, - "loss": 1.338, - "step": 2198 - }, - { - "epoch": 2.136465052127703, - "grad_norm": 0.3984375, - "learning_rate": 9.504331160117164e-07, - "loss": 1.3319, - "step": 2199 - }, - { - "epoch": 2.13743864346274, - "grad_norm": 0.4140625, - "learning_rate": 9.484269837751869e-07, - "loss": 1.3222, - "step": 2200 - }, - { - "epoch": 2.138412234797777, - "grad_norm": 0.39453125, - "learning_rate": 9.464224752629675e-07, - "loss": 1.2974, - "step": 2201 - }, - { - "epoch": 2.139385826132814, - "grad_norm": 0.39453125, - "learning_rate": 9.444195925727786e-07, - "loss": 1.3124, - "step": 2202 - }, - { - "epoch": 2.140359417467851, - "grad_norm": 0.40234375, - "learning_rate": 9.424183378006433e-07, - "loss": 1.3252, - "step": 2203 - }, - { - "epoch": 2.141333008802888, - "grad_norm": 0.39453125, - "learning_rate": 9.404187130408773e-07, - "loss": 1.3056, - "step": 2204 - }, - { - "epoch": 2.1423066001379256, - "grad_norm": 0.39453125, - "learning_rate": 9.384207203860923e-07, - "loss": 1.3379, - "step": 2205 - }, - { - "epoch": 2.1432801914729627, - "grad_norm": 0.400390625, - "learning_rate": 9.364243619271906e-07, - "loss": 1.3085, - "step": 2206 - }, - { - "epoch": 2.1442537828079997, - "grad_norm": 0.40234375, - "learning_rate": 9.344296397533672e-07, - "loss": 1.3085, - "step": 2207 - }, - { - "epoch": 2.1452273741430368, - "grad_norm": 0.3984375, - "learning_rate": 9.324365559521015e-07, - "loss": 1.3084, - "step": 2208 - }, - { - "epoch": 2.146200965478074, - "grad_norm": 0.396484375, - "learning_rate": 9.30445112609161e-07, - "loss": 1.2956, - "step": 2209 - }, - { - "epoch": 2.147174556813111, - "grad_norm": 0.39453125, - "learning_rate": 9.284553118085934e-07, - "loss": 1.3094, - "step": 2210 - }, - { - "epoch": 2.148148148148148, - "grad_norm": 0.39453125, - "learning_rate": 9.264671556327315e-07, - "loss": 1.3072, - "step": 2211 - }, - { - "epoch": 2.1491217394831854, - "grad_norm": 0.388671875, - "learning_rate": 9.244806461621828e-07, - "loss": 1.2836, - "step": 2212 - }, - { - "epoch": 2.1500953308182225, - "grad_norm": 0.40625, - "learning_rate": 9.224957854758352e-07, - "loss": 1.2966, - "step": 2213 - }, - { - "epoch": 2.1510689221532595, - "grad_norm": 0.3984375, - "learning_rate": 9.205125756508485e-07, - "loss": 1.3254, - "step": 2214 - }, - { - "epoch": 2.1520425134882966, - "grad_norm": 0.392578125, - "learning_rate": 9.185310187626556e-07, - "loss": 1.3218, - "step": 2215 - }, - { - "epoch": 2.1530161048233336, - "grad_norm": 0.486328125, - "learning_rate": 9.165511168849594e-07, - "loss": 1.3148, - "step": 2216 - }, - { - "epoch": 2.1539896961583707, - "grad_norm": 0.396484375, - "learning_rate": 9.145728720897329e-07, - "loss": 1.2954, - "step": 2217 - }, - { - "epoch": 2.154963287493408, - "grad_norm": 0.416015625, - "learning_rate": 9.125962864472115e-07, - "loss": 1.329, - "step": 2218 - }, - { - "epoch": 2.155936878828445, - "grad_norm": 0.400390625, - "learning_rate": 9.106213620258977e-07, - "loss": 1.3077, - "step": 2219 - }, - { - "epoch": 2.1569104701634823, - "grad_norm": 0.408203125, - "learning_rate": 9.086481008925521e-07, - "loss": 1.308, - "step": 2220 - }, - { - "epoch": 2.1578840614985193, - "grad_norm": 0.396484375, - "learning_rate": 9.066765051121985e-07, - "loss": 1.3217, - "step": 2221 - }, - { - "epoch": 2.1588576528335564, - "grad_norm": 0.408203125, - "learning_rate": 9.047065767481139e-07, - "loss": 1.3222, - "step": 2222 - }, - { - "epoch": 2.1598312441685934, - "grad_norm": 0.3984375, - "learning_rate": 9.02738317861834e-07, - "loss": 1.3065, - "step": 2223 - }, - { - "epoch": 2.1608048355036304, - "grad_norm": 0.400390625, - "learning_rate": 9.007717305131444e-07, - "loss": 1.313, - "step": 2224 - }, - { - "epoch": 2.161778426838668, - "grad_norm": 0.396484375, - "learning_rate": 8.988068167600849e-07, - "loss": 1.3338, - "step": 2225 - }, - { - "epoch": 2.162752018173705, - "grad_norm": 0.396484375, - "learning_rate": 8.968435786589388e-07, - "loss": 1.2983, - "step": 2226 - }, - { - "epoch": 2.163725609508742, - "grad_norm": 0.39453125, - "learning_rate": 8.948820182642406e-07, - "loss": 1.3118, - "step": 2227 - }, - { - "epoch": 2.164699200843779, - "grad_norm": 0.40625, - "learning_rate": 8.929221376287661e-07, - "loss": 1.3278, - "step": 2228 - }, - { - "epoch": 2.165672792178816, - "grad_norm": 0.404296875, - "learning_rate": 8.90963938803536e-07, - "loss": 1.3068, - "step": 2229 - }, - { - "epoch": 2.166646383513853, - "grad_norm": 0.400390625, - "learning_rate": 8.890074238378074e-07, - "loss": 1.3066, - "step": 2230 - }, - { - "epoch": 2.1676199748488907, - "grad_norm": 0.396484375, - "learning_rate": 8.870525947790792e-07, - "loss": 1.3118, - "step": 2231 - }, - { - "epoch": 2.1685935661839277, - "grad_norm": 0.390625, - "learning_rate": 8.85099453673082e-07, - "loss": 1.2933, - "step": 2232 - }, - { - "epoch": 2.169567157518965, - "grad_norm": 0.400390625, - "learning_rate": 8.831480025637837e-07, - "loss": 1.3178, - "step": 2233 - }, - { - "epoch": 2.170540748854002, - "grad_norm": 0.396484375, - "learning_rate": 8.8119824349338e-07, - "loss": 1.306, - "step": 2234 - }, - { - "epoch": 2.171514340189039, - "grad_norm": 0.41015625, - "learning_rate": 8.792501785022997e-07, - "loss": 1.3078, - "step": 2235 - }, - { - "epoch": 2.172487931524076, - "grad_norm": 0.390625, - "learning_rate": 8.773038096291958e-07, - "loss": 1.3018, - "step": 2236 - }, - { - "epoch": 2.1734615228591134, - "grad_norm": 0.412109375, - "learning_rate": 8.753591389109475e-07, - "loss": 1.344, - "step": 2237 - }, - { - "epoch": 2.1744351141941505, - "grad_norm": 0.400390625, - "learning_rate": 8.734161683826554e-07, - "loss": 1.3137, - "step": 2238 - }, - { - "epoch": 2.1754087055291875, - "grad_norm": 0.396484375, - "learning_rate": 8.714749000776441e-07, - "loss": 1.3293, - "step": 2239 - }, - { - "epoch": 2.1763822968642246, - "grad_norm": 0.39453125, - "learning_rate": 8.695353360274527e-07, - "loss": 1.3067, - "step": 2240 - }, - { - "epoch": 2.1773558881992616, - "grad_norm": 0.404296875, - "learning_rate": 8.67597478261841e-07, - "loss": 1.3205, - "step": 2241 - }, - { - "epoch": 2.1783294795342987, - "grad_norm": 0.40234375, - "learning_rate": 8.656613288087795e-07, - "loss": 1.3126, - "step": 2242 - }, - { - "epoch": 2.1793030708693357, - "grad_norm": 0.404296875, - "learning_rate": 8.63726889694454e-07, - "loss": 1.2934, - "step": 2243 - }, - { - "epoch": 2.180276662204373, - "grad_norm": 0.388671875, - "learning_rate": 8.617941629432572e-07, - "loss": 1.2984, - "step": 2244 - }, - { - "epoch": 2.1812502535394103, - "grad_norm": 0.3984375, - "learning_rate": 8.598631505777932e-07, - "loss": 1.3209, - "step": 2245 - }, - { - "epoch": 2.1822238448744473, - "grad_norm": 0.3984375, - "learning_rate": 8.57933854618869e-07, - "loss": 1.314, - "step": 2246 - }, - { - "epoch": 2.1831974362094844, - "grad_norm": 0.39453125, - "learning_rate": 8.56006277085499e-07, - "loss": 1.3065, - "step": 2247 - }, - { - "epoch": 2.1841710275445214, - "grad_norm": 0.3984375, - "learning_rate": 8.540804199948938e-07, - "loss": 1.3275, - "step": 2248 - }, - { - "epoch": 2.1851446188795585, - "grad_norm": 0.3984375, - "learning_rate": 8.521562853624693e-07, - "loss": 1.3211, - "step": 2249 - }, - { - "epoch": 2.186118210214596, - "grad_norm": 0.40625, - "learning_rate": 8.502338752018344e-07, - "loss": 1.335, - "step": 2250 - }, - { - "epoch": 2.187091801549633, - "grad_norm": 0.396484375, - "learning_rate": 8.483131915247969e-07, - "loss": 1.3178, - "step": 2251 - }, - { - "epoch": 2.18806539288467, - "grad_norm": 0.40625, - "learning_rate": 8.463942363413546e-07, - "loss": 1.3232, - "step": 2252 - }, - { - "epoch": 2.189038984219707, - "grad_norm": 0.39453125, - "learning_rate": 8.444770116596998e-07, - "loss": 1.3106, - "step": 2253 - }, - { - "epoch": 2.190012575554744, - "grad_norm": 0.392578125, - "learning_rate": 8.425615194862102e-07, - "loss": 1.3297, - "step": 2254 - }, - { - "epoch": 2.190986166889781, - "grad_norm": 0.392578125, - "learning_rate": 8.406477618254538e-07, - "loss": 1.3106, - "step": 2255 - }, - { - "epoch": 2.1919597582248187, - "grad_norm": 0.3984375, - "learning_rate": 8.387357406801804e-07, - "loss": 1.3031, - "step": 2256 - }, - { - "epoch": 2.1929333495598557, - "grad_norm": 0.3984375, - "learning_rate": 8.368254580513261e-07, - "loss": 1.3099, - "step": 2257 - }, - { - "epoch": 2.193906940894893, - "grad_norm": 0.388671875, - "learning_rate": 8.349169159380027e-07, - "loss": 1.3115, - "step": 2258 - }, - { - "epoch": 2.19488053222993, - "grad_norm": 0.39453125, - "learning_rate": 8.33010116337507e-07, - "loss": 1.3064, - "step": 2259 - }, - { - "epoch": 2.195854123564967, - "grad_norm": 0.39453125, - "learning_rate": 8.311050612453048e-07, - "loss": 1.3139, - "step": 2260 - }, - { - "epoch": 2.196827714900004, - "grad_norm": 0.39453125, - "learning_rate": 8.292017526550425e-07, - "loss": 1.3012, - "step": 2261 - }, - { - "epoch": 2.197801306235041, - "grad_norm": 0.400390625, - "learning_rate": 8.273001925585353e-07, - "loss": 1.3185, - "step": 2262 - }, - { - "epoch": 2.1987748975700785, - "grad_norm": 0.466796875, - "learning_rate": 8.254003829457707e-07, - "loss": 1.3078, - "step": 2263 - }, - { - "epoch": 2.1997484889051155, - "grad_norm": 0.396484375, - "learning_rate": 8.235023258049024e-07, - "loss": 1.3091, - "step": 2264 - }, - { - "epoch": 2.2007220802401526, - "grad_norm": 0.39453125, - "learning_rate": 8.21606023122252e-07, - "loss": 1.3116, - "step": 2265 - }, - { - "epoch": 2.2016956715751896, - "grad_norm": 0.3984375, - "learning_rate": 8.197114768823027e-07, - "loss": 1.342, - "step": 2266 - }, - { - "epoch": 2.2026692629102267, - "grad_norm": 0.396484375, - "learning_rate": 8.178186890677029e-07, - "loss": 1.3108, - "step": 2267 - }, - { - "epoch": 2.2036428542452637, - "grad_norm": 0.40234375, - "learning_rate": 8.159276616592568e-07, - "loss": 1.3002, - "step": 2268 - }, - { - "epoch": 2.2046164455803012, - "grad_norm": 0.390625, - "learning_rate": 8.140383966359302e-07, - "loss": 1.3349, - "step": 2269 - }, - { - "epoch": 2.2055900369153383, - "grad_norm": 0.400390625, - "learning_rate": 8.121508959748423e-07, - "loss": 1.3135, - "step": 2270 - }, - { - "epoch": 2.2065636282503753, - "grad_norm": 0.3984375, - "learning_rate": 8.102651616512658e-07, - "loss": 1.3086, - "step": 2271 - }, - { - "epoch": 2.2075372195854124, - "grad_norm": 0.3984375, - "learning_rate": 8.083811956386253e-07, - "loss": 1.324, - "step": 2272 - }, - { - "epoch": 2.2085108109204494, - "grad_norm": 0.39453125, - "learning_rate": 8.06498999908496e-07, - "loss": 1.3149, - "step": 2273 - }, - { - "epoch": 2.2094844022554865, - "grad_norm": 0.390625, - "learning_rate": 8.046185764305986e-07, - "loss": 1.3083, - "step": 2274 - }, - { - "epoch": 2.2104579935905235, - "grad_norm": 0.404296875, - "learning_rate": 8.027399271728012e-07, - "loss": 1.292, - "step": 2275 - }, - { - "epoch": 2.211431584925561, - "grad_norm": 0.40625, - "learning_rate": 8.008630541011128e-07, - "loss": 1.3117, - "step": 2276 - }, - { - "epoch": 2.212405176260598, - "grad_norm": 0.390625, - "learning_rate": 7.989879591796862e-07, - "loss": 1.3037, - "step": 2277 - }, - { - "epoch": 2.213378767595635, - "grad_norm": 0.3984375, - "learning_rate": 7.971146443708117e-07, - "loss": 1.3396, - "step": 2278 - }, - { - "epoch": 2.214352358930672, - "grad_norm": 0.40625, - "learning_rate": 7.952431116349155e-07, - "loss": 1.3452, - "step": 2279 - }, - { - "epoch": 2.215325950265709, - "grad_norm": 0.3984375, - "learning_rate": 7.933733629305626e-07, - "loss": 1.3219, - "step": 2280 - }, - { - "epoch": 2.2162995416007463, - "grad_norm": 0.3984375, - "learning_rate": 7.915054002144478e-07, - "loss": 1.3486, - "step": 2281 - }, - { - "epoch": 2.2172731329357838, - "grad_norm": 0.390625, - "learning_rate": 7.89639225441397e-07, - "loss": 1.3059, - "step": 2282 - }, - { - "epoch": 2.218246724270821, - "grad_norm": 0.396484375, - "learning_rate": 7.877748405643676e-07, - "loss": 1.3048, - "step": 2283 - }, - { - "epoch": 2.219220315605858, - "grad_norm": 0.400390625, - "learning_rate": 7.859122475344408e-07, - "loss": 1.3334, - "step": 2284 - }, - { - "epoch": 2.220193906940895, - "grad_norm": 0.388671875, - "learning_rate": 7.840514483008252e-07, - "loss": 1.3033, - "step": 2285 - }, - { - "epoch": 2.221167498275932, - "grad_norm": 0.404296875, - "learning_rate": 7.821924448108492e-07, - "loss": 1.343, - "step": 2286 - }, - { - "epoch": 2.222141089610969, - "grad_norm": 0.400390625, - "learning_rate": 7.803352390099661e-07, - "loss": 1.3136, - "step": 2287 - }, - { - "epoch": 2.223114680946006, - "grad_norm": 0.392578125, - "learning_rate": 7.784798328417439e-07, - "loss": 1.3015, - "step": 2288 - }, - { - "epoch": 2.2240882722810436, - "grad_norm": 0.400390625, - "learning_rate": 7.766262282478687e-07, - "loss": 1.3181, - "step": 2289 - }, - { - "epoch": 2.2250618636160806, - "grad_norm": 0.3984375, - "learning_rate": 7.747744271681429e-07, - "loss": 1.2962, - "step": 2290 - }, - { - "epoch": 2.2260354549511177, - "grad_norm": 0.396484375, - "learning_rate": 7.729244315404782e-07, - "loss": 1.3244, - "step": 2291 - }, - { - "epoch": 2.2270090462861547, - "grad_norm": 0.40234375, - "learning_rate": 7.710762433009009e-07, - "loss": 1.3174, - "step": 2292 - }, - { - "epoch": 2.2279826376211918, - "grad_norm": 0.40234375, - "learning_rate": 7.692298643835424e-07, - "loss": 1.312, - "step": 2293 - }, - { - "epoch": 2.228956228956229, - "grad_norm": 0.396484375, - "learning_rate": 7.673852967206416e-07, - "loss": 1.306, - "step": 2294 - }, - { - "epoch": 2.2299298202912663, - "grad_norm": 0.396484375, - "learning_rate": 7.655425422425436e-07, - "loss": 1.3128, - "step": 2295 - }, - { - "epoch": 2.2309034116263033, - "grad_norm": 0.3984375, - "learning_rate": 7.637016028776941e-07, - "loss": 1.3313, - "step": 2296 - }, - { - "epoch": 2.2318770029613404, - "grad_norm": 0.408203125, - "learning_rate": 7.618624805526389e-07, - "loss": 1.3239, - "step": 2297 - }, - { - "epoch": 2.2328505942963774, - "grad_norm": 0.392578125, - "learning_rate": 7.600251771920253e-07, - "loss": 1.3198, - "step": 2298 - }, - { - "epoch": 2.2338241856314145, - "grad_norm": 0.4140625, - "learning_rate": 7.58189694718593e-07, - "loss": 1.305, - "step": 2299 - }, - { - "epoch": 2.2347977769664515, - "grad_norm": 0.396484375, - "learning_rate": 7.563560350531795e-07, - "loss": 1.32, - "step": 2300 - }, - { - "epoch": 2.2357713683014886, - "grad_norm": 0.416015625, - "learning_rate": 7.545242001147123e-07, - "loss": 1.2973, - "step": 2301 - }, - { - "epoch": 2.236744959636526, - "grad_norm": 0.400390625, - "learning_rate": 7.526941918202116e-07, - "loss": 1.3227, - "step": 2302 - }, - { - "epoch": 2.237718550971563, - "grad_norm": 0.392578125, - "learning_rate": 7.508660120847841e-07, - "loss": 1.3226, - "step": 2303 - }, - { - "epoch": 2.2386921423066, - "grad_norm": 0.400390625, - "learning_rate": 7.490396628216237e-07, - "loss": 1.3061, - "step": 2304 - }, - { - "epoch": 2.2396657336416372, - "grad_norm": 0.408203125, - "learning_rate": 7.472151459420079e-07, - "loss": 1.3107, - "step": 2305 - }, - { - "epoch": 2.2406393249766743, - "grad_norm": 0.396484375, - "learning_rate": 7.453924633552986e-07, - "loss": 1.312, - "step": 2306 - }, - { - "epoch": 2.2416129163117113, - "grad_norm": 0.408203125, - "learning_rate": 7.435716169689355e-07, - "loss": 1.3287, - "step": 2307 - }, - { - "epoch": 2.242586507646749, - "grad_norm": 0.396484375, - "learning_rate": 7.417526086884394e-07, - "loss": 1.3028, - "step": 2308 - }, - { - "epoch": 2.243560098981786, - "grad_norm": 0.390625, - "learning_rate": 7.399354404174047e-07, - "loss": 1.2748, - "step": 2309 - }, - { - "epoch": 2.244533690316823, - "grad_norm": 0.39453125, - "learning_rate": 7.381201140575031e-07, - "loss": 1.3154, - "step": 2310 - }, - { - "epoch": 2.24550728165186, - "grad_norm": 0.392578125, - "learning_rate": 7.363066315084761e-07, - "loss": 1.3212, - "step": 2311 - }, - { - "epoch": 2.246480872986897, - "grad_norm": 0.40234375, - "learning_rate": 7.344949946681382e-07, - "loss": 1.3299, - "step": 2312 - }, - { - "epoch": 2.247454464321934, - "grad_norm": 0.400390625, - "learning_rate": 7.326852054323693e-07, - "loss": 1.3035, - "step": 2313 - }, - { - "epoch": 2.248428055656971, - "grad_norm": 0.404296875, - "learning_rate": 7.308772656951202e-07, - "loss": 1.3132, - "step": 2314 - }, - { - "epoch": 2.2494016469920086, - "grad_norm": 0.388671875, - "learning_rate": 7.290711773484002e-07, - "loss": 1.3047, - "step": 2315 - }, - { - "epoch": 2.2503752383270457, - "grad_norm": 0.390625, - "learning_rate": 7.272669422822867e-07, - "loss": 1.3009, - "step": 2316 - }, - { - "epoch": 2.2513488296620827, - "grad_norm": 0.3984375, - "learning_rate": 7.254645623849141e-07, - "loss": 1.3161, - "step": 2317 - }, - { - "epoch": 2.2523224209971198, - "grad_norm": 0.3984375, - "learning_rate": 7.23664039542478e-07, - "loss": 1.2989, - "step": 2318 - }, - { - "epoch": 2.253296012332157, - "grad_norm": 0.40234375, - "learning_rate": 7.218653756392272e-07, - "loss": 1.3291, - "step": 2319 - }, - { - "epoch": 2.254269603667194, - "grad_norm": 0.3984375, - "learning_rate": 7.200685725574694e-07, - "loss": 1.2851, - "step": 2320 - }, - { - "epoch": 2.2552431950022314, - "grad_norm": 0.404296875, - "learning_rate": 7.182736321775607e-07, - "loss": 1.3195, - "step": 2321 - }, - { - "epoch": 2.2562167863372684, - "grad_norm": 0.40234375, - "learning_rate": 7.164805563779115e-07, - "loss": 1.2928, - "step": 2322 - }, - { - "epoch": 2.2571903776723055, - "grad_norm": 0.3984375, - "learning_rate": 7.146893470349778e-07, - "loss": 1.3178, - "step": 2323 - }, - { - "epoch": 2.2581639690073425, - "grad_norm": 0.39453125, - "learning_rate": 7.129000060232649e-07, - "loss": 1.2987, - "step": 2324 - }, - { - "epoch": 2.2591375603423796, - "grad_norm": 0.40234375, - "learning_rate": 7.111125352153217e-07, - "loss": 1.3162, - "step": 2325 - }, - { - "epoch": 2.2601111516774166, - "grad_norm": 0.40625, - "learning_rate": 7.093269364817398e-07, - "loss": 1.3139, - "step": 2326 - }, - { - "epoch": 2.2610847430124537, - "grad_norm": 0.40625, - "learning_rate": 7.07543211691151e-07, - "loss": 1.3175, - "step": 2327 - }, - { - "epoch": 2.262058334347491, - "grad_norm": 0.396484375, - "learning_rate": 7.057613627102286e-07, - "loss": 1.3247, - "step": 2328 - }, - { - "epoch": 2.263031925682528, - "grad_norm": 0.408203125, - "learning_rate": 7.039813914036792e-07, - "loss": 1.3079, - "step": 2329 - }, - { - "epoch": 2.2640055170175652, - "grad_norm": 0.396484375, - "learning_rate": 7.022032996342485e-07, - "loss": 1.3464, - "step": 2330 - }, - { - "epoch": 2.2649791083526023, - "grad_norm": 0.388671875, - "learning_rate": 7.004270892627113e-07, - "loss": 1.2976, - "step": 2331 - }, - { - "epoch": 2.2659526996876393, - "grad_norm": 0.3984375, - "learning_rate": 6.986527621478773e-07, - "loss": 1.3219, - "step": 2332 - }, - { - "epoch": 2.2669262910226764, - "grad_norm": 0.392578125, - "learning_rate": 6.96880320146581e-07, - "loss": 1.2983, - "step": 2333 - }, - { - "epoch": 2.267899882357714, - "grad_norm": 0.3984375, - "learning_rate": 6.95109765113689e-07, - "loss": 1.3336, - "step": 2334 - }, - { - "epoch": 2.268873473692751, - "grad_norm": 0.396484375, - "learning_rate": 6.933410989020886e-07, - "loss": 1.297, - "step": 2335 - }, - { - "epoch": 2.269847065027788, - "grad_norm": 0.404296875, - "learning_rate": 6.915743233626954e-07, - "loss": 1.3146, - "step": 2336 - }, - { - "epoch": 2.270820656362825, - "grad_norm": 0.40234375, - "learning_rate": 6.8980944034444e-07, - "loss": 1.313, - "step": 2337 - }, - { - "epoch": 2.271794247697862, - "grad_norm": 0.396484375, - "learning_rate": 6.880464516942787e-07, - "loss": 1.3195, - "step": 2338 - }, - { - "epoch": 2.272767839032899, - "grad_norm": 0.39453125, - "learning_rate": 6.86285359257181e-07, - "loss": 1.3295, - "step": 2339 - }, - { - "epoch": 2.273741430367936, - "grad_norm": 0.40234375, - "learning_rate": 6.845261648761353e-07, - "loss": 1.3069, - "step": 2340 - }, - { - "epoch": 2.2747150217029737, - "grad_norm": 0.39453125, - "learning_rate": 6.827688703921407e-07, - "loss": 1.3146, - "step": 2341 - }, - { - "epoch": 2.2756886130380107, - "grad_norm": 0.400390625, - "learning_rate": 6.810134776442107e-07, - "loss": 1.318, - "step": 2342 - }, - { - "epoch": 2.276662204373048, - "grad_norm": 0.400390625, - "learning_rate": 6.79259988469366e-07, - "loss": 1.2957, - "step": 2343 - }, - { - "epoch": 2.277635795708085, - "grad_norm": 0.3984375, - "learning_rate": 6.775084047026381e-07, - "loss": 1.3402, - "step": 2344 - }, - { - "epoch": 2.278609387043122, - "grad_norm": 0.404296875, - "learning_rate": 6.757587281770614e-07, - "loss": 1.3118, - "step": 2345 - }, - { - "epoch": 2.279582978378159, - "grad_norm": 0.39453125, - "learning_rate": 6.740109607236775e-07, - "loss": 1.3179, - "step": 2346 - }, - { - "epoch": 2.2805565697131964, - "grad_norm": 0.400390625, - "learning_rate": 6.722651041715278e-07, - "loss": 1.313, - "step": 2347 - }, - { - "epoch": 2.2815301610482335, - "grad_norm": 0.400390625, - "learning_rate": 6.705211603476547e-07, - "loss": 1.3097, - "step": 2348 - }, - { - "epoch": 2.2825037523832705, - "grad_norm": 0.3984375, - "learning_rate": 6.687791310770983e-07, - "loss": 1.3099, - "step": 2349 - }, - { - "epoch": 2.2834773437183076, - "grad_norm": 0.40234375, - "learning_rate": 6.670390181828973e-07, - "loss": 1.3247, - "step": 2350 - }, - { - "epoch": 2.2844509350533446, - "grad_norm": 0.40234375, - "learning_rate": 6.653008234860814e-07, - "loss": 1.3361, - "step": 2351 - }, - { - "epoch": 2.2854245263883817, - "grad_norm": 0.396484375, - "learning_rate": 6.635645488056771e-07, - "loss": 1.3343, - "step": 2352 - }, - { - "epoch": 2.2863981177234187, - "grad_norm": 0.404296875, - "learning_rate": 6.618301959586973e-07, - "loss": 1.3328, - "step": 2353 - }, - { - "epoch": 2.287371709058456, - "grad_norm": 0.404296875, - "learning_rate": 6.600977667601477e-07, - "loss": 1.3181, - "step": 2354 - }, - { - "epoch": 2.2883453003934933, - "grad_norm": 0.388671875, - "learning_rate": 6.58367263023017e-07, - "loss": 1.3199, - "step": 2355 - }, - { - "epoch": 2.2893188917285303, - "grad_norm": 0.404296875, - "learning_rate": 6.566386865582827e-07, - "loss": 1.3364, - "step": 2356 - }, - { - "epoch": 2.2902924830635674, - "grad_norm": 0.396484375, - "learning_rate": 6.549120391749023e-07, - "loss": 1.2999, - "step": 2357 - }, - { - "epoch": 2.2912660743986044, - "grad_norm": 0.40234375, - "learning_rate": 6.531873226798174e-07, - "loss": 1.3147, - "step": 2358 - }, - { - "epoch": 2.2922396657336415, - "grad_norm": 0.388671875, - "learning_rate": 6.514645388779447e-07, - "loss": 1.2999, - "step": 2359 - }, - { - "epoch": 2.293213257068679, - "grad_norm": 0.3984375, - "learning_rate": 6.497436895721834e-07, - "loss": 1.3116, - "step": 2360 - }, - { - "epoch": 2.294186848403716, - "grad_norm": 0.392578125, - "learning_rate": 6.48024776563404e-07, - "loss": 1.3199, - "step": 2361 - }, - { - "epoch": 2.295160439738753, - "grad_norm": 0.40234375, - "learning_rate": 6.46307801650454e-07, - "loss": 1.3237, - "step": 2362 - }, - { - "epoch": 2.29613403107379, - "grad_norm": 0.3984375, - "learning_rate": 6.445927666301494e-07, - "loss": 1.3241, - "step": 2363 - }, - { - "epoch": 2.297107622408827, - "grad_norm": 0.396484375, - "learning_rate": 6.428796732972797e-07, - "loss": 1.3209, - "step": 2364 - }, - { - "epoch": 2.298081213743864, - "grad_norm": 0.400390625, - "learning_rate": 6.411685234445989e-07, - "loss": 1.3029, - "step": 2365 - }, - { - "epoch": 2.2990548050789013, - "grad_norm": 0.392578125, - "learning_rate": 6.394593188628303e-07, - "loss": 1.2924, - "step": 2366 - }, - { - "epoch": 2.3000283964139387, - "grad_norm": 0.396484375, - "learning_rate": 6.377520613406585e-07, - "loss": 1.328, - "step": 2367 - }, - { - "epoch": 2.301001987748976, - "grad_norm": 0.404296875, - "learning_rate": 6.360467526647333e-07, - "loss": 1.3369, - "step": 2368 - }, - { - "epoch": 2.301975579084013, - "grad_norm": 0.392578125, - "learning_rate": 6.343433946196631e-07, - "loss": 1.3079, - "step": 2369 - }, - { - "epoch": 2.30294917041905, - "grad_norm": 0.3984375, - "learning_rate": 6.326419889880156e-07, - "loss": 1.3376, - "step": 2370 - }, - { - "epoch": 2.303922761754087, - "grad_norm": 0.40234375, - "learning_rate": 6.309425375503144e-07, - "loss": 1.298, - "step": 2371 - }, - { - "epoch": 2.304896353089124, - "grad_norm": 0.390625, - "learning_rate": 6.292450420850402e-07, - "loss": 1.3031, - "step": 2372 - }, - { - "epoch": 2.3058699444241615, - "grad_norm": 0.408203125, - "learning_rate": 6.275495043686244e-07, - "loss": 1.3244, - "step": 2373 - }, - { - "epoch": 2.3068435357591985, - "grad_norm": 0.404296875, - "learning_rate": 6.258559261754518e-07, - "loss": 1.3114, - "step": 2374 - }, - { - "epoch": 2.3078171270942356, - "grad_norm": 0.3984375, - "learning_rate": 6.24164309277854e-07, - "loss": 1.3265, - "step": 2375 - }, - { - "epoch": 2.3087907184292726, - "grad_norm": 0.400390625, - "learning_rate": 6.224746554461128e-07, - "loss": 1.2972, - "step": 2376 - }, - { - "epoch": 2.3097643097643097, - "grad_norm": 0.39453125, - "learning_rate": 6.207869664484542e-07, - "loss": 1.3105, - "step": 2377 - }, - { - "epoch": 2.3107379010993467, - "grad_norm": 0.400390625, - "learning_rate": 6.191012440510469e-07, - "loss": 1.3157, - "step": 2378 - }, - { - "epoch": 2.311711492434384, - "grad_norm": 0.4140625, - "learning_rate": 6.174174900180044e-07, - "loss": 1.3024, - "step": 2379 - }, - { - "epoch": 2.3126850837694213, - "grad_norm": 0.396484375, - "learning_rate": 6.157357061113781e-07, - "loss": 1.3134, - "step": 2380 - }, - { - "epoch": 2.3136586751044583, - "grad_norm": 0.392578125, - "learning_rate": 6.140558940911573e-07, - "loss": 1.3184, - "step": 2381 - }, - { - "epoch": 2.3146322664394954, - "grad_norm": 0.412109375, - "learning_rate": 6.123780557152703e-07, - "loss": 1.3197, - "step": 2382 - }, - { - "epoch": 2.3156058577745324, - "grad_norm": 0.39453125, - "learning_rate": 6.10702192739577e-07, - "loss": 1.337, - "step": 2383 - }, - { - "epoch": 2.3165794491095695, - "grad_norm": 0.404296875, - "learning_rate": 6.09028306917873e-07, - "loss": 1.303, - "step": 2384 - }, - { - "epoch": 2.3175530404446065, - "grad_norm": 0.41796875, - "learning_rate": 6.073564000018811e-07, - "loss": 1.3265, - "step": 2385 - }, - { - "epoch": 2.318526631779644, - "grad_norm": 0.40234375, - "learning_rate": 6.056864737412574e-07, - "loss": 1.3251, - "step": 2386 - }, - { - "epoch": 2.319500223114681, - "grad_norm": 0.453125, - "learning_rate": 6.040185298835821e-07, - "loss": 1.3304, - "step": 2387 - }, - { - "epoch": 2.320473814449718, - "grad_norm": 0.3984375, - "learning_rate": 6.02352570174361e-07, - "loss": 1.2987, - "step": 2388 - }, - { - "epoch": 2.321447405784755, - "grad_norm": 0.3984375, - "learning_rate": 6.006885963570261e-07, - "loss": 1.3073, - "step": 2389 - }, - { - "epoch": 2.322420997119792, - "grad_norm": 0.3984375, - "learning_rate": 5.990266101729278e-07, - "loss": 1.3253, - "step": 2390 - }, - { - "epoch": 2.3233945884548293, - "grad_norm": 0.392578125, - "learning_rate": 5.973666133613393e-07, - "loss": 1.2776, - "step": 2391 - }, - { - "epoch": 2.3243681797898663, - "grad_norm": 0.390625, - "learning_rate": 5.957086076594502e-07, - "loss": 1.3026, - "step": 2392 - }, - { - "epoch": 2.325341771124904, - "grad_norm": 0.400390625, - "learning_rate": 5.940525948023657e-07, - "loss": 1.3391, - "step": 2393 - }, - { - "epoch": 2.326315362459941, - "grad_norm": 0.400390625, - "learning_rate": 5.923985765231082e-07, - "loss": 1.307, - "step": 2394 - }, - { - "epoch": 2.327288953794978, - "grad_norm": 0.3984375, - "learning_rate": 5.907465545526109e-07, - "loss": 1.3212, - "step": 2395 - }, - { - "epoch": 2.328262545130015, - "grad_norm": 0.404296875, - "learning_rate": 5.890965306197166e-07, - "loss": 1.319, - "step": 2396 - }, - { - "epoch": 2.329236136465052, - "grad_norm": 0.39453125, - "learning_rate": 5.874485064511809e-07, - "loss": 1.3012, - "step": 2397 - }, - { - "epoch": 2.330209727800089, - "grad_norm": 0.388671875, - "learning_rate": 5.858024837716628e-07, - "loss": 1.3028, - "step": 2398 - }, - { - "epoch": 2.3311833191351266, - "grad_norm": 0.412109375, - "learning_rate": 5.841584643037295e-07, - "loss": 1.3361, - "step": 2399 - }, - { - "epoch": 2.3321569104701636, - "grad_norm": 0.400390625, - "learning_rate": 5.825164497678495e-07, - "loss": 1.3167, - "step": 2400 - }, - { - "epoch": 2.3331305018052007, - "grad_norm": 0.408203125, - "learning_rate": 5.808764418823959e-07, - "loss": 1.318, - "step": 2401 - }, - { - "epoch": 2.3341040931402377, - "grad_norm": 0.388671875, - "learning_rate": 5.792384423636394e-07, - "loss": 1.3019, - "step": 2402 - }, - { - "epoch": 2.3350776844752748, - "grad_norm": 0.392578125, - "learning_rate": 5.77602452925749e-07, - "loss": 1.3256, - "step": 2403 - }, - { - "epoch": 2.336051275810312, - "grad_norm": 0.3984375, - "learning_rate": 5.759684752807925e-07, - "loss": 1.3099, - "step": 2404 - }, - { - "epoch": 2.337024867145349, - "grad_norm": 0.3984375, - "learning_rate": 5.743365111387303e-07, - "loss": 1.319, - "step": 2405 - }, - { - "epoch": 2.3379984584803863, - "grad_norm": 0.3984375, - "learning_rate": 5.727065622074154e-07, - "loss": 1.3009, - "step": 2406 - }, - { - "epoch": 2.3389720498154234, - "grad_norm": 0.400390625, - "learning_rate": 5.710786301925938e-07, - "loss": 1.3294, - "step": 2407 - }, - { - "epoch": 2.3399456411504604, - "grad_norm": 0.3984375, - "learning_rate": 5.694527167978986e-07, - "loss": 1.2959, - "step": 2408 - }, - { - "epoch": 2.3409192324854975, - "grad_norm": 0.3984375, - "learning_rate": 5.678288237248525e-07, - "loss": 1.3163, - "step": 2409 - }, - { - "epoch": 2.3418928238205345, - "grad_norm": 0.3984375, - "learning_rate": 5.662069526728617e-07, - "loss": 1.3368, - "step": 2410 - }, - { - "epoch": 2.3428664151555716, - "grad_norm": 0.39453125, - "learning_rate": 5.645871053392191e-07, - "loss": 1.3278, - "step": 2411 - }, - { - "epoch": 2.343840006490609, - "grad_norm": 0.400390625, - "learning_rate": 5.629692834190963e-07, - "loss": 1.3079, - "step": 2412 - }, - { - "epoch": 2.344813597825646, - "grad_norm": 0.396484375, - "learning_rate": 5.613534886055502e-07, - "loss": 1.2973, - "step": 2413 - }, - { - "epoch": 2.345787189160683, - "grad_norm": 0.38671875, - "learning_rate": 5.597397225895098e-07, - "loss": 1.3207, - "step": 2414 - }, - { - "epoch": 2.3467607804957202, - "grad_norm": 0.39453125, - "learning_rate": 5.581279870597866e-07, - "loss": 1.3242, - "step": 2415 - }, - { - "epoch": 2.3477343718307573, - "grad_norm": 0.388671875, - "learning_rate": 5.565182837030645e-07, - "loss": 1.3133, - "step": 2416 - }, - { - "epoch": 2.3487079631657943, - "grad_norm": 0.390625, - "learning_rate": 5.549106142039018e-07, - "loss": 1.3171, - "step": 2417 - }, - { - "epoch": 2.3496815545008314, - "grad_norm": 0.390625, - "learning_rate": 5.533049802447268e-07, - "loss": 1.286, - "step": 2418 - }, - { - "epoch": 2.350655145835869, - "grad_norm": 0.388671875, - "learning_rate": 5.517013835058404e-07, - "loss": 1.3036, - "step": 2419 - }, - { - "epoch": 2.351628737170906, - "grad_norm": 0.400390625, - "learning_rate": 5.50099825665408e-07, - "loss": 1.3215, - "step": 2420 - }, - { - "epoch": 2.352602328505943, - "grad_norm": 0.39453125, - "learning_rate": 5.485003083994649e-07, - "loss": 1.3113, - "step": 2421 - }, - { - "epoch": 2.35357591984098, - "grad_norm": 0.40234375, - "learning_rate": 5.469028333819077e-07, - "loss": 1.3337, - "step": 2422 - }, - { - "epoch": 2.354549511176017, - "grad_norm": 0.390625, - "learning_rate": 5.453074022844984e-07, - "loss": 1.2914, - "step": 2423 - }, - { - "epoch": 2.3555231025110546, - "grad_norm": 0.390625, - "learning_rate": 5.437140167768582e-07, - "loss": 1.2937, - "step": 2424 - }, - { - "epoch": 2.3564966938460916, - "grad_norm": 0.40625, - "learning_rate": 5.421226785264685e-07, - "loss": 1.3063, - "step": 2425 - }, - { - "epoch": 2.3574702851811287, - "grad_norm": 0.40234375, - "learning_rate": 5.405333891986672e-07, - "loss": 1.3183, - "step": 2426 - }, - { - "epoch": 2.3584438765161657, - "grad_norm": 0.3984375, - "learning_rate": 5.389461504566503e-07, - "loss": 1.3231, - "step": 2427 - }, - { - "epoch": 2.3594174678512028, - "grad_norm": 0.404296875, - "learning_rate": 5.37360963961465e-07, - "loss": 1.3191, - "step": 2428 - }, - { - "epoch": 2.36039105918624, - "grad_norm": 0.392578125, - "learning_rate": 5.35777831372013e-07, - "loss": 1.3077, - "step": 2429 - }, - { - "epoch": 2.361364650521277, - "grad_norm": 0.3984375, - "learning_rate": 5.341967543450452e-07, - "loss": 1.3377, - "step": 2430 - }, - { - "epoch": 2.362338241856314, - "grad_norm": 0.400390625, - "learning_rate": 5.326177345351627e-07, - "loss": 1.3204, - "step": 2431 - }, - { - "epoch": 2.3633118331913514, - "grad_norm": 0.41015625, - "learning_rate": 5.31040773594812e-07, - "loss": 1.3141, - "step": 2432 - }, - { - "epoch": 2.3642854245263885, - "grad_norm": 0.392578125, - "learning_rate": 5.294658731742869e-07, - "loss": 1.3197, - "step": 2433 - }, - { - "epoch": 2.3652590158614255, - "grad_norm": 0.38671875, - "learning_rate": 5.278930349217226e-07, - "loss": 1.3094, - "step": 2434 - }, - { - "epoch": 2.3662326071964626, - "grad_norm": 0.40234375, - "learning_rate": 5.263222604831001e-07, - "loss": 1.3223, - "step": 2435 - }, - { - "epoch": 2.3672061985314996, - "grad_norm": 0.404296875, - "learning_rate": 5.247535515022345e-07, - "loss": 1.3183, - "step": 2436 - }, - { - "epoch": 2.368179789866537, - "grad_norm": 0.40234375, - "learning_rate": 5.231869096207853e-07, - "loss": 1.3117, - "step": 2437 - }, - { - "epoch": 2.369153381201574, - "grad_norm": 0.38671875, - "learning_rate": 5.216223364782455e-07, - "loss": 1.3197, - "step": 2438 - }, - { - "epoch": 2.370126972536611, - "grad_norm": 0.388671875, - "learning_rate": 5.200598337119447e-07, - "loss": 1.3198, - "step": 2439 - }, - { - "epoch": 2.3711005638716482, - "grad_norm": 0.39453125, - "learning_rate": 5.184994029570443e-07, - "loss": 1.3035, - "step": 2440 - }, - { - "epoch": 2.3720741552066853, - "grad_norm": 0.390625, - "learning_rate": 5.169410458465393e-07, - "loss": 1.3379, - "step": 2441 - }, - { - "epoch": 2.3730477465417223, - "grad_norm": 0.392578125, - "learning_rate": 5.153847640112528e-07, - "loss": 1.3303, - "step": 2442 - }, - { - "epoch": 2.3740213378767594, - "grad_norm": 0.3984375, - "learning_rate": 5.138305590798376e-07, - "loss": 1.3176, - "step": 2443 - }, - { - "epoch": 2.3749949292117964, - "grad_norm": 0.388671875, - "learning_rate": 5.122784326787711e-07, - "loss": 1.3209, - "step": 2444 - }, - { - "epoch": 2.375968520546834, - "grad_norm": 0.392578125, - "learning_rate": 5.107283864323584e-07, - "loss": 1.2801, - "step": 2445 - }, - { - "epoch": 2.376942111881871, - "grad_norm": 0.3984375, - "learning_rate": 5.091804219627253e-07, - "loss": 1.3275, - "step": 2446 - }, - { - "epoch": 2.377915703216908, - "grad_norm": 0.404296875, - "learning_rate": 5.076345408898192e-07, - "loss": 1.3178, - "step": 2447 - }, - { - "epoch": 2.378889294551945, - "grad_norm": 0.396484375, - "learning_rate": 5.060907448314078e-07, - "loss": 1.3144, - "step": 2448 - }, - { - "epoch": 2.379862885886982, - "grad_norm": 0.404296875, - "learning_rate": 5.04549035403078e-07, - "loss": 1.3149, - "step": 2449 - }, - { - "epoch": 2.3808364772220196, - "grad_norm": 0.396484375, - "learning_rate": 5.030094142182301e-07, - "loss": 1.3017, - "step": 2450 - }, - { - "epoch": 2.3818100685570567, - "grad_norm": 0.390625, - "learning_rate": 5.014718828880827e-07, - "loss": 1.3011, - "step": 2451 - }, - { - "epoch": 2.3827836598920937, - "grad_norm": 0.396484375, - "learning_rate": 4.999364430216639e-07, - "loss": 1.3088, - "step": 2452 - }, - { - "epoch": 2.383757251227131, - "grad_norm": 0.400390625, - "learning_rate": 4.984030962258158e-07, - "loss": 1.3191, - "step": 2453 - }, - { - "epoch": 2.384730842562168, - "grad_norm": 0.39453125, - "learning_rate": 4.968718441051876e-07, - "loss": 1.3256, - "step": 2454 - }, - { - "epoch": 2.385704433897205, - "grad_norm": 0.390625, - "learning_rate": 4.953426882622392e-07, - "loss": 1.3182, - "step": 2455 - }, - { - "epoch": 2.386678025232242, - "grad_norm": 0.392578125, - "learning_rate": 4.938156302972338e-07, - "loss": 1.3034, - "step": 2456 - }, - { - "epoch": 2.387651616567279, - "grad_norm": 0.3984375, - "learning_rate": 4.922906718082431e-07, - "loss": 1.3164, - "step": 2457 - }, - { - "epoch": 2.3886252079023165, - "grad_norm": 0.40234375, - "learning_rate": 4.907678143911363e-07, - "loss": 1.324, - "step": 2458 - }, - { - "epoch": 2.3895987992373535, - "grad_norm": 0.412109375, - "learning_rate": 4.892470596395887e-07, - "loss": 1.3342, - "step": 2459 - }, - { - "epoch": 2.3905723905723906, - "grad_norm": 0.392578125, - "learning_rate": 4.877284091450718e-07, - "loss": 1.3079, - "step": 2460 - }, - { - "epoch": 2.3915459819074276, - "grad_norm": 0.3984375, - "learning_rate": 4.862118644968584e-07, - "loss": 1.3134, - "step": 2461 - }, - { - "epoch": 2.3925195732424647, - "grad_norm": 0.390625, - "learning_rate": 4.846974272820131e-07, - "loss": 1.285, - "step": 2462 - }, - { - "epoch": 2.393493164577502, - "grad_norm": 0.400390625, - "learning_rate": 4.831850990854e-07, - "loss": 1.3168, - "step": 2463 - }, - { - "epoch": 2.394466755912539, - "grad_norm": 0.390625, - "learning_rate": 4.816748814896716e-07, - "loss": 1.326, - "step": 2464 - }, - { - "epoch": 2.3954403472475763, - "grad_norm": 0.390625, - "learning_rate": 4.801667760752754e-07, - "loss": 1.2944, - "step": 2465 - }, - { - "epoch": 2.3964139385826133, - "grad_norm": 0.388671875, - "learning_rate": 4.786607844204449e-07, - "loss": 1.3209, - "step": 2466 - }, - { - "epoch": 2.3973875299176504, - "grad_norm": 0.400390625, - "learning_rate": 4.771569081012053e-07, - "loss": 1.3157, - "step": 2467 - }, - { - "epoch": 2.3983611212526874, - "grad_norm": 0.390625, - "learning_rate": 4.756551486913655e-07, - "loss": 1.3212, - "step": 2468 - }, - { - "epoch": 2.3993347125877245, - "grad_norm": 0.390625, - "learning_rate": 4.741555077625193e-07, - "loss": 1.2997, - "step": 2469 - }, - { - "epoch": 2.4003083039227615, - "grad_norm": 0.3984375, - "learning_rate": 4.726579868840439e-07, - "loss": 1.3039, - "step": 2470 - }, - { - "epoch": 2.401281895257799, - "grad_norm": 0.400390625, - "learning_rate": 4.711625876230988e-07, - "loss": 1.3264, - "step": 2471 - }, - { - "epoch": 2.402255486592836, - "grad_norm": 0.392578125, - "learning_rate": 4.696693115446216e-07, - "loss": 1.295, - "step": 2472 - }, - { - "epoch": 2.403229077927873, - "grad_norm": 0.404296875, - "learning_rate": 4.6817816021132916e-07, - "loss": 1.3141, - "step": 2473 - }, - { - "epoch": 2.40420266926291, - "grad_norm": 0.421875, - "learning_rate": 4.6668913518371413e-07, - "loss": 1.2946, - "step": 2474 - }, - { - "epoch": 2.405176260597947, - "grad_norm": 0.408203125, - "learning_rate": 4.6520223802004464e-07, - "loss": 1.3183, - "step": 2475 - }, - { - "epoch": 2.4061498519329847, - "grad_norm": 0.404296875, - "learning_rate": 4.637174702763608e-07, - "loss": 1.308, - "step": 2476 - }, - { - "epoch": 2.4071234432680217, - "grad_norm": 0.400390625, - "learning_rate": 4.622348335064761e-07, - "loss": 1.2958, - "step": 2477 - }, - { - "epoch": 2.408097034603059, - "grad_norm": 0.400390625, - "learning_rate": 4.607543292619726e-07, - "loss": 1.3342, - "step": 2478 - }, - { - "epoch": 2.409070625938096, - "grad_norm": 0.39453125, - "learning_rate": 4.592759590922005e-07, - "loss": 1.3052, - "step": 2479 - }, - { - "epoch": 2.410044217273133, - "grad_norm": 0.404296875, - "learning_rate": 4.5779972454427694e-07, - "loss": 1.3167, - "step": 2480 - }, - { - "epoch": 2.41101780860817, - "grad_norm": 0.44921875, - "learning_rate": 4.563256271630856e-07, - "loss": 1.2966, - "step": 2481 - }, - { - "epoch": 2.411991399943207, - "grad_norm": 0.396484375, - "learning_rate": 4.5485366849127105e-07, - "loss": 1.3211, - "step": 2482 - }, - { - "epoch": 2.412964991278244, - "grad_norm": 0.392578125, - "learning_rate": 4.5338385006924223e-07, - "loss": 1.3314, - "step": 2483 - }, - { - "epoch": 2.4139385826132815, - "grad_norm": 0.40625, - "learning_rate": 4.5191617343516596e-07, - "loss": 1.3233, - "step": 2484 - }, - { - "epoch": 2.4149121739483186, - "grad_norm": 0.3984375, - "learning_rate": 4.5045064012497003e-07, - "loss": 1.3321, - "step": 2485 - }, - { - "epoch": 2.4158857652833556, - "grad_norm": 0.3984375, - "learning_rate": 4.489872516723373e-07, - "loss": 1.3154, - "step": 2486 - }, - { - "epoch": 2.4168593566183927, - "grad_norm": 0.392578125, - "learning_rate": 4.475260096087064e-07, - "loss": 1.3271, - "step": 2487 - }, - { - "epoch": 2.4178329479534297, - "grad_norm": 0.400390625, - "learning_rate": 4.46066915463271e-07, - "loss": 1.3055, - "step": 2488 - }, - { - "epoch": 2.4188065392884672, - "grad_norm": 0.392578125, - "learning_rate": 4.4460997076297504e-07, - "loss": 1.3123, - "step": 2489 - }, - { - "epoch": 2.4197801306235043, - "grad_norm": 0.396484375, - "learning_rate": 4.4315517703251514e-07, - "loss": 1.3276, - "step": 2490 - }, - { - "epoch": 2.4207537219585413, - "grad_norm": 0.390625, - "learning_rate": 4.417025357943355e-07, - "loss": 1.3138, - "step": 2491 - }, - { - "epoch": 2.4217273132935784, - "grad_norm": 0.38671875, - "learning_rate": 4.402520485686276e-07, - "loss": 1.2835, - "step": 2492 - }, - { - "epoch": 2.4227009046286154, - "grad_norm": 0.392578125, - "learning_rate": 4.3880371687333027e-07, - "loss": 1.3218, - "step": 2493 - }, - { - "epoch": 2.4236744959636525, - "grad_norm": 0.39453125, - "learning_rate": 4.3735754222412494e-07, - "loss": 1.3177, - "step": 2494 - }, - { - "epoch": 2.4246480872986895, - "grad_norm": 0.392578125, - "learning_rate": 4.3591352613443606e-07, - "loss": 1.3138, - "step": 2495 - }, - { - "epoch": 2.425621678633727, - "grad_norm": 0.39453125, - "learning_rate": 4.3447167011543e-07, - "loss": 1.3114, - "step": 2496 - }, - { - "epoch": 2.426595269968764, - "grad_norm": 0.392578125, - "learning_rate": 4.3303197567601125e-07, - "loss": 1.3237, - "step": 2497 - }, - { - "epoch": 2.427568861303801, - "grad_norm": 0.40234375, - "learning_rate": 4.3159444432282426e-07, - "loss": 1.3405, - "step": 2498 - }, - { - "epoch": 2.428542452638838, - "grad_norm": 0.390625, - "learning_rate": 4.3015907756024683e-07, - "loss": 1.3163, - "step": 2499 - }, - { - "epoch": 2.429516043973875, - "grad_norm": 0.396484375, - "learning_rate": 4.2872587689039486e-07, - "loss": 1.3104, - "step": 2500 - }, - { - "epoch": 2.4304896353089123, - "grad_norm": 0.396484375, - "learning_rate": 4.272948438131144e-07, - "loss": 1.2947, - "step": 2501 - }, - { - "epoch": 2.4314632266439498, - "grad_norm": 0.408203125, - "learning_rate": 4.2586597982598536e-07, - "loss": 1.3231, - "step": 2502 - }, - { - "epoch": 2.432436817978987, - "grad_norm": 0.400390625, - "learning_rate": 4.244392864243169e-07, - "loss": 1.3201, - "step": 2503 - }, - { - "epoch": 2.433410409314024, - "grad_norm": 0.388671875, - "learning_rate": 4.230147651011457e-07, - "loss": 1.3002, - "step": 2504 - }, - { - "epoch": 2.434384000649061, - "grad_norm": 0.400390625, - "learning_rate": 4.215924173472363e-07, - "loss": 1.3024, - "step": 2505 - }, - { - "epoch": 2.435357591984098, - "grad_norm": 0.38671875, - "learning_rate": 4.2017224465107947e-07, - "loss": 1.2984, - "step": 2506 - }, - { - "epoch": 2.436331183319135, - "grad_norm": 0.39453125, - "learning_rate": 4.187542484988874e-07, - "loss": 1.3176, - "step": 2507 - }, - { - "epoch": 2.437304774654172, - "grad_norm": 0.392578125, - "learning_rate": 4.1733843037459754e-07, - "loss": 1.3094, - "step": 2508 - }, - { - "epoch": 2.4382783659892096, - "grad_norm": 0.392578125, - "learning_rate": 4.1592479175986494e-07, - "loss": 1.2799, - "step": 2509 - }, - { - "epoch": 2.4392519573242466, - "grad_norm": 0.396484375, - "learning_rate": 4.145133341340665e-07, - "loss": 1.303, - "step": 2510 - }, - { - "epoch": 2.4402255486592836, - "grad_norm": 0.400390625, - "learning_rate": 4.1310405897429463e-07, - "loss": 1.3046, - "step": 2511 - }, - { - "epoch": 2.4411991399943207, - "grad_norm": 0.392578125, - "learning_rate": 4.1169696775535923e-07, - "loss": 1.3219, - "step": 2512 - }, - { - "epoch": 2.4421727313293577, - "grad_norm": 0.39453125, - "learning_rate": 4.102920619497841e-07, - "loss": 1.321, - "step": 2513 - }, - { - "epoch": 2.443146322664395, - "grad_norm": 0.40234375, - "learning_rate": 4.088893430278057e-07, - "loss": 1.3411, - "step": 2514 - }, - { - "epoch": 2.4441199139994323, - "grad_norm": 0.38671875, - "learning_rate": 4.0748881245737173e-07, - "loss": 1.2986, - "step": 2515 - }, - { - "epoch": 2.4450935053344693, - "grad_norm": 0.396484375, - "learning_rate": 4.060904717041417e-07, - "loss": 1.3207, - "step": 2516 - }, - { - "epoch": 2.4460670966695064, - "grad_norm": 0.39453125, - "learning_rate": 4.046943222314803e-07, - "loss": 1.3151, - "step": 2517 - }, - { - "epoch": 2.4470406880045434, - "grad_norm": 0.3984375, - "learning_rate": 4.033003655004622e-07, - "loss": 1.3021, - "step": 2518 - }, - { - "epoch": 2.4480142793395805, - "grad_norm": 0.400390625, - "learning_rate": 4.019086029698649e-07, - "loss": 1.3171, - "step": 2519 - }, - { - "epoch": 2.4489878706746175, - "grad_norm": 0.400390625, - "learning_rate": 4.0051903609617195e-07, - "loss": 1.3259, - "step": 2520 - }, - { - "epoch": 2.4499614620096546, - "grad_norm": 0.39453125, - "learning_rate": 3.9913166633356645e-07, - "loss": 1.3218, - "step": 2521 - }, - { - "epoch": 2.450935053344692, - "grad_norm": 0.392578125, - "learning_rate": 3.977464951339352e-07, - "loss": 1.3049, - "step": 2522 - }, - { - "epoch": 2.451908644679729, - "grad_norm": 0.396484375, - "learning_rate": 3.9636352394686134e-07, - "loss": 1.3084, - "step": 2523 - }, - { - "epoch": 2.452882236014766, - "grad_norm": 0.404296875, - "learning_rate": 3.9498275421962854e-07, - "loss": 1.3172, - "step": 2524 - }, - { - "epoch": 2.4538558273498032, - "grad_norm": 0.39453125, - "learning_rate": 3.9360418739721295e-07, - "loss": 1.3097, - "step": 2525 - }, - { - "epoch": 2.4548294186848403, - "grad_norm": 0.396484375, - "learning_rate": 3.922278249222894e-07, - "loss": 1.3161, - "step": 2526 - }, - { - "epoch": 2.4558030100198773, - "grad_norm": 0.3984375, - "learning_rate": 3.908536682352229e-07, - "loss": 1.3021, - "step": 2527 - }, - { - "epoch": 2.456776601354915, - "grad_norm": 0.3984375, - "learning_rate": 3.8948171877407207e-07, - "loss": 1.3062, - "step": 2528 - }, - { - "epoch": 2.457750192689952, - "grad_norm": 0.3984375, - "learning_rate": 3.88111977974584e-07, - "loss": 1.3074, - "step": 2529 - }, - { - "epoch": 2.458723784024989, - "grad_norm": 0.396484375, - "learning_rate": 3.867444472701959e-07, - "loss": 1.3241, - "step": 2530 - }, - { - "epoch": 2.459697375360026, - "grad_norm": 0.3984375, - "learning_rate": 3.8537912809203075e-07, - "loss": 1.2956, - "step": 2531 - }, - { - "epoch": 2.460670966695063, - "grad_norm": 0.4140625, - "learning_rate": 3.8401602186889904e-07, - "loss": 1.3255, - "step": 2532 - }, - { - "epoch": 2.4616445580301, - "grad_norm": 0.396484375, - "learning_rate": 3.826551300272924e-07, - "loss": 1.3229, - "step": 2533 - }, - { - "epoch": 2.462618149365137, - "grad_norm": 0.3984375, - "learning_rate": 3.812964539913888e-07, - "loss": 1.294, - "step": 2534 - }, - { - "epoch": 2.4635917407001746, - "grad_norm": 0.41015625, - "learning_rate": 3.7993999518304433e-07, - "loss": 1.3159, - "step": 2535 - }, - { - "epoch": 2.4645653320352117, - "grad_norm": 0.39453125, - "learning_rate": 3.7858575502179613e-07, - "loss": 1.3169, - "step": 2536 - }, - { - "epoch": 2.4655389233702487, - "grad_norm": 0.3984375, - "learning_rate": 3.772337349248589e-07, - "loss": 1.3146, - "step": 2537 - }, - { - "epoch": 2.4665125147052858, - "grad_norm": 0.39453125, - "learning_rate": 3.7588393630712513e-07, - "loss": 1.3161, - "step": 2538 - }, - { - "epoch": 2.467486106040323, - "grad_norm": 0.396484375, - "learning_rate": 3.745363605811611e-07, - "loss": 1.3002, - "step": 2539 - }, - { - "epoch": 2.46845969737536, - "grad_norm": 0.39453125, - "learning_rate": 3.731910091572083e-07, - "loss": 1.2935, - "step": 2540 - }, - { - "epoch": 2.4694332887103974, - "grad_norm": 0.390625, - "learning_rate": 3.718478834431788e-07, - "loss": 1.3054, - "step": 2541 - }, - { - "epoch": 2.4704068800454344, - "grad_norm": 0.39453125, - "learning_rate": 3.7050698484465767e-07, - "loss": 1.3202, - "step": 2542 - }, - { - "epoch": 2.4713804713804715, - "grad_norm": 0.40234375, - "learning_rate": 3.691683147648964e-07, - "loss": 1.3242, - "step": 2543 - }, - { - "epoch": 2.4723540627155085, - "grad_norm": 0.392578125, - "learning_rate": 3.6783187460481763e-07, - "loss": 1.3024, - "step": 2544 - }, - { - "epoch": 2.4733276540505456, - "grad_norm": 0.396484375, - "learning_rate": 3.6649766576300707e-07, - "loss": 1.3246, - "step": 2545 - }, - { - "epoch": 2.4743012453855826, - "grad_norm": 0.396484375, - "learning_rate": 3.651656896357189e-07, - "loss": 1.3226, - "step": 2546 - }, - { - "epoch": 2.4752748367206197, - "grad_norm": 0.400390625, - "learning_rate": 3.638359476168668e-07, - "loss": 1.305, - "step": 2547 - }, - { - "epoch": 2.476248428055657, - "grad_norm": 0.39453125, - "learning_rate": 3.625084410980298e-07, - "loss": 1.3425, - "step": 2548 - }, - { - "epoch": 2.477222019390694, - "grad_norm": 0.388671875, - "learning_rate": 3.6118317146844524e-07, - "loss": 1.309, - "step": 2549 - }, - { - "epoch": 2.4781956107257312, - "grad_norm": 0.400390625, - "learning_rate": 3.5986014011501175e-07, - "loss": 1.3193, - "step": 2550 - }, - { - "epoch": 2.4791692020607683, - "grad_norm": 0.404296875, - "learning_rate": 3.585393484222829e-07, - "loss": 1.3187, - "step": 2551 - }, - { - "epoch": 2.4801427933958053, - "grad_norm": 0.39453125, - "learning_rate": 3.572207977724709e-07, - "loss": 1.3145, - "step": 2552 - }, - { - "epoch": 2.4811163847308424, - "grad_norm": 0.40625, - "learning_rate": 3.559044895454411e-07, - "loss": 1.3195, - "step": 2553 - }, - { - "epoch": 2.48208997606588, - "grad_norm": 0.39453125, - "learning_rate": 3.545904251187135e-07, - "loss": 1.3314, - "step": 2554 - }, - { - "epoch": 2.483063567400917, - "grad_norm": 0.388671875, - "learning_rate": 3.532786058674581e-07, - "loss": 1.2886, - "step": 2555 - }, - { - "epoch": 2.484037158735954, - "grad_norm": 0.3828125, - "learning_rate": 3.519690331644973e-07, - "loss": 1.3003, - "step": 2556 - }, - { - "epoch": 2.485010750070991, - "grad_norm": 0.40234375, - "learning_rate": 3.5066170838030153e-07, - "loss": 1.3048, - "step": 2557 - }, - { - "epoch": 2.485984341406028, - "grad_norm": 0.404296875, - "learning_rate": 3.493566328829884e-07, - "loss": 1.3262, - "step": 2558 - }, - { - "epoch": 2.486957932741065, - "grad_norm": 0.392578125, - "learning_rate": 3.4805380803832167e-07, - "loss": 1.3179, - "step": 2559 - }, - { - "epoch": 2.487931524076102, - "grad_norm": 0.404296875, - "learning_rate": 3.4675323520971137e-07, - "loss": 1.3123, - "step": 2560 - }, - { - "epoch": 2.4889051154111397, - "grad_norm": 0.38671875, - "learning_rate": 3.454549157582082e-07, - "loss": 1.3129, - "step": 2561 - }, - { - "epoch": 2.4898787067461767, - "grad_norm": 0.392578125, - "learning_rate": 3.4415885104250787e-07, - "loss": 1.3066, - "step": 2562 - }, - { - "epoch": 2.490852298081214, - "grad_norm": 0.400390625, - "learning_rate": 3.4286504241894283e-07, - "loss": 1.3166, - "step": 2563 - }, - { - "epoch": 2.491825889416251, - "grad_norm": 0.400390625, - "learning_rate": 3.415734912414878e-07, - "loss": 1.3, - "step": 2564 - }, - { - "epoch": 2.492799480751288, - "grad_norm": 0.39453125, - "learning_rate": 3.4028419886175306e-07, - "loss": 1.3168, - "step": 2565 - }, - { - "epoch": 2.493773072086325, - "grad_norm": 0.3984375, - "learning_rate": 3.3899716662898623e-07, - "loss": 1.2988, - "step": 2566 - }, - { - "epoch": 2.4947466634213624, - "grad_norm": 0.400390625, - "learning_rate": 3.377123958900688e-07, - "loss": 1.314, - "step": 2567 - }, - { - "epoch": 2.4957202547563995, - "grad_norm": 0.392578125, - "learning_rate": 3.3642988798951587e-07, - "loss": 1.3127, - "step": 2568 - }, - { - "epoch": 2.4966938460914365, - "grad_norm": 0.412109375, - "learning_rate": 3.3514964426947433e-07, - "loss": 1.3219, - "step": 2569 - }, - { - "epoch": 2.4976674374264736, - "grad_norm": 0.3984375, - "learning_rate": 3.338716660697225e-07, - "loss": 1.3277, - "step": 2570 - }, - { - "epoch": 2.4986410287615106, - "grad_norm": 0.396484375, - "learning_rate": 3.3259595472766635e-07, - "loss": 1.3038, - "step": 2571 - }, - { - "epoch": 2.4996146200965477, - "grad_norm": 0.40625, - "learning_rate": 3.3132251157834137e-07, - "loss": 1.318, - "step": 2572 - }, - { - "epoch": 2.5005882114315847, - "grad_norm": 0.400390625, - "learning_rate": 3.3005133795440734e-07, - "loss": 1.315, - "step": 2573 - }, - { - "epoch": 2.5015618027666218, - "grad_norm": 0.388671875, - "learning_rate": 3.2878243518615127e-07, - "loss": 1.3067, - "step": 2574 - }, - { - "epoch": 2.5025353941016593, - "grad_norm": 0.400390625, - "learning_rate": 3.2751580460148094e-07, - "loss": 1.311, - "step": 2575 - }, - { - "epoch": 2.5035089854366963, - "grad_norm": 0.40234375, - "learning_rate": 3.262514475259296e-07, - "loss": 1.3077, - "step": 2576 - }, - { - "epoch": 2.5044825767717334, - "grad_norm": 0.384765625, - "learning_rate": 3.249893652826483e-07, - "loss": 1.3187, - "step": 2577 - }, - { - "epoch": 2.5054561681067704, - "grad_norm": 0.396484375, - "learning_rate": 3.2372955919240834e-07, - "loss": 1.3228, - "step": 2578 - }, - { - "epoch": 2.506429759441808, - "grad_norm": 0.396484375, - "learning_rate": 3.2247203057360066e-07, - "loss": 1.3064, - "step": 2579 - }, - { - "epoch": 2.507403350776845, - "grad_norm": 0.408203125, - "learning_rate": 3.212167807422306e-07, - "loss": 1.2972, - "step": 2580 - }, - { - "epoch": 2.508376942111882, - "grad_norm": 0.3984375, - "learning_rate": 3.1996381101191936e-07, - "loss": 1.3273, - "step": 2581 - }, - { - "epoch": 2.509350533446919, - "grad_norm": 0.416015625, - "learning_rate": 3.187131226939036e-07, - "loss": 1.3228, - "step": 2582 - }, - { - "epoch": 2.510324124781956, - "grad_norm": 0.396484375, - "learning_rate": 3.1746471709702963e-07, - "loss": 1.3191, - "step": 2583 - }, - { - "epoch": 2.511297716116993, - "grad_norm": 0.39453125, - "learning_rate": 3.1621859552775774e-07, - "loss": 1.2978, - "step": 2584 - }, - { - "epoch": 2.51227130745203, - "grad_norm": 0.3984375, - "learning_rate": 3.1497475929015614e-07, - "loss": 1.307, - "step": 2585 - }, - { - "epoch": 2.5132448987870672, - "grad_norm": 0.40234375, - "learning_rate": 3.137332096859014e-07, - "loss": 1.3124, - "step": 2586 - }, - { - "epoch": 2.5142184901221047, - "grad_norm": 0.408203125, - "learning_rate": 3.124939480142786e-07, - "loss": 1.3189, - "step": 2587 - }, - { - "epoch": 2.515192081457142, - "grad_norm": 0.392578125, - "learning_rate": 3.11256975572177e-07, - "loss": 1.327, - "step": 2588 - }, - { - "epoch": 2.516165672792179, - "grad_norm": 0.390625, - "learning_rate": 3.100222936540914e-07, - "loss": 1.322, - "step": 2589 - }, - { - "epoch": 2.517139264127216, - "grad_norm": 0.392578125, - "learning_rate": 3.0878990355211886e-07, - "loss": 1.316, - "step": 2590 - }, - { - "epoch": 2.518112855462253, - "grad_norm": 0.3984375, - "learning_rate": 3.075598065559571e-07, - "loss": 1.3309, - "step": 2591 - }, - { - "epoch": 2.5190864467972904, - "grad_norm": 0.392578125, - "learning_rate": 3.063320039529064e-07, - "loss": 1.2845, - "step": 2592 - }, - { - "epoch": 2.5200600381323275, - "grad_norm": 0.40234375, - "learning_rate": 3.051064970278633e-07, - "loss": 1.3211, - "step": 2593 - }, - { - "epoch": 2.5210336294673645, - "grad_norm": 0.396484375, - "learning_rate": 3.038832870633249e-07, - "loss": 1.3163, - "step": 2594 - }, - { - "epoch": 2.5220072208024016, - "grad_norm": 0.39453125, - "learning_rate": 3.0266237533938204e-07, - "loss": 1.3347, - "step": 2595 - }, - { - "epoch": 2.5229808121374386, - "grad_norm": 0.392578125, - "learning_rate": 3.014437631337211e-07, - "loss": 1.3153, - "step": 2596 - }, - { - "epoch": 2.5239544034724757, - "grad_norm": 0.396484375, - "learning_rate": 3.002274517216228e-07, - "loss": 1.3202, - "step": 2597 - }, - { - "epoch": 2.5249279948075127, - "grad_norm": 0.388671875, - "learning_rate": 2.9901344237595856e-07, - "loss": 1.2834, - "step": 2598 - }, - { - "epoch": 2.52590158614255, - "grad_norm": 0.3984375, - "learning_rate": 2.978017363671931e-07, - "loss": 1.3174, - "step": 2599 - }, - { - "epoch": 2.5268751774775873, - "grad_norm": 0.400390625, - "learning_rate": 2.965923349633779e-07, - "loss": 1.3101, - "step": 2600 - }, - { - "epoch": 2.5278487688126243, - "grad_norm": 0.396484375, - "learning_rate": 2.9538523943015455e-07, - "loss": 1.2955, - "step": 2601 - }, - { - "epoch": 2.5288223601476614, - "grad_norm": 0.40234375, - "learning_rate": 2.9418045103075137e-07, - "loss": 1.3198, - "step": 2602 - }, - { - "epoch": 2.5297959514826984, - "grad_norm": 0.400390625, - "learning_rate": 2.929779710259811e-07, - "loss": 1.2972, - "step": 2603 - }, - { - "epoch": 2.5307695428177355, - "grad_norm": 0.41015625, - "learning_rate": 2.917778006742414e-07, - "loss": 1.3272, - "step": 2604 - }, - { - "epoch": 2.531743134152773, - "grad_norm": 0.39453125, - "learning_rate": 2.905799412315141e-07, - "loss": 1.306, - "step": 2605 - }, - { - "epoch": 2.53271672548781, - "grad_norm": 0.3984375, - "learning_rate": 2.8938439395135995e-07, - "loss": 1.322, - "step": 2606 - }, - { - "epoch": 2.533690316822847, - "grad_norm": 0.39453125, - "learning_rate": 2.8819116008492327e-07, - "loss": 1.3134, - "step": 2607 - }, - { - "epoch": 2.534663908157884, - "grad_norm": 0.396484375, - "learning_rate": 2.8700024088092415e-07, - "loss": 1.2964, - "step": 2608 - }, - { - "epoch": 2.535637499492921, - "grad_norm": 0.396484375, - "learning_rate": 2.8581163758566346e-07, - "loss": 1.3143, - "step": 2609 - }, - { - "epoch": 2.536611090827958, - "grad_norm": 0.400390625, - "learning_rate": 2.8462535144301554e-07, - "loss": 1.3134, - "step": 2610 - }, - { - "epoch": 2.5375846821629953, - "grad_norm": 0.408203125, - "learning_rate": 2.834413836944325e-07, - "loss": 1.3109, - "step": 2611 - }, - { - "epoch": 2.5385582734980323, - "grad_norm": 0.404296875, - "learning_rate": 2.822597355789383e-07, - "loss": 1.2995, - "step": 2612 - }, - { - "epoch": 2.53953186483307, - "grad_norm": 0.3984375, - "learning_rate": 2.8108040833313035e-07, - "loss": 1.3202, - "step": 2613 - }, - { - "epoch": 2.540505456168107, - "grad_norm": 0.390625, - "learning_rate": 2.799034031911765e-07, - "loss": 1.3064, - "step": 2614 - }, - { - "epoch": 2.541479047503144, - "grad_norm": 0.40234375, - "learning_rate": 2.7872872138481557e-07, - "loss": 1.3254, - "step": 2615 - }, - { - "epoch": 2.542452638838181, - "grad_norm": 0.40234375, - "learning_rate": 2.77556364143354e-07, - "loss": 1.3129, - "step": 2616 - }, - { - "epoch": 2.543426230173218, - "grad_norm": 0.3984375, - "learning_rate": 2.7638633269366666e-07, - "loss": 1.3286, - "step": 2617 - }, - { - "epoch": 2.5443998215082555, - "grad_norm": 0.396484375, - "learning_rate": 2.7521862826019317e-07, - "loss": 1.3266, - "step": 2618 - }, - { - "epoch": 2.5453734128432925, - "grad_norm": 0.38671875, - "learning_rate": 2.7405325206493914e-07, - "loss": 1.2944, - "step": 2619 - }, - { - "epoch": 2.5463470041783296, - "grad_norm": 0.3984375, - "learning_rate": 2.7289020532747263e-07, - "loss": 1.3001, - "step": 2620 - }, - { - "epoch": 2.5473205955133666, - "grad_norm": 0.39453125, - "learning_rate": 2.7172948926492497e-07, - "loss": 1.3071, - "step": 2621 - }, - { - "epoch": 2.5482941868484037, - "grad_norm": 0.39453125, - "learning_rate": 2.705711050919871e-07, - "loss": 1.3266, - "step": 2622 - }, - { - "epoch": 2.5492677781834407, - "grad_norm": 0.392578125, - "learning_rate": 2.694150540209117e-07, - "loss": 1.3262, - "step": 2623 - }, - { - "epoch": 2.550241369518478, - "grad_norm": 0.396484375, - "learning_rate": 2.68261337261507e-07, - "loss": 1.3199, - "step": 2624 - }, - { - "epoch": 2.551214960853515, - "grad_norm": 0.40625, - "learning_rate": 2.67109956021141e-07, - "loss": 1.3013, - "step": 2625 - }, - { - "epoch": 2.5521885521885523, - "grad_norm": 0.39453125, - "learning_rate": 2.659609115047354e-07, - "loss": 1.3188, - "step": 2626 - }, - { - "epoch": 2.5531621435235894, - "grad_norm": 0.396484375, - "learning_rate": 2.648142049147692e-07, - "loss": 1.3065, - "step": 2627 - }, - { - "epoch": 2.5541357348586264, - "grad_norm": 0.390625, - "learning_rate": 2.6366983745127197e-07, - "loss": 1.3006, - "step": 2628 - }, - { - "epoch": 2.5551093261936635, - "grad_norm": 0.40625, - "learning_rate": 2.62527810311827e-07, - "loss": 1.3011, - "step": 2629 - }, - { - "epoch": 2.5560829175287005, - "grad_norm": 0.390625, - "learning_rate": 2.6138812469156784e-07, - "loss": 1.2994, - "step": 2630 - }, - { - "epoch": 2.557056508863738, - "grad_norm": 0.408203125, - "learning_rate": 2.602507817831784e-07, - "loss": 1.3111, - "step": 2631 - }, - { - "epoch": 2.558030100198775, - "grad_norm": 0.39453125, - "learning_rate": 2.591157827768892e-07, - "loss": 1.3137, - "step": 2632 - }, - { - "epoch": 2.559003691533812, - "grad_norm": 0.384765625, - "learning_rate": 2.5798312886048034e-07, - "loss": 1.3057, - "step": 2633 - }, - { - "epoch": 2.559977282868849, - "grad_norm": 0.396484375, - "learning_rate": 2.568528212192756e-07, - "loss": 1.3158, - "step": 2634 - }, - { - "epoch": 2.5609508742038862, - "grad_norm": 0.40234375, - "learning_rate": 2.557248610361443e-07, - "loss": 1.3071, - "step": 2635 - }, - { - "epoch": 2.5619244655389233, - "grad_norm": 0.40234375, - "learning_rate": 2.5459924949149896e-07, - "loss": 1.3166, - "step": 2636 - }, - { - "epoch": 2.5628980568739603, - "grad_norm": 0.390625, - "learning_rate": 2.53475987763295e-07, - "loss": 1.312, - "step": 2637 - }, - { - "epoch": 2.5638716482089974, - "grad_norm": 0.390625, - "learning_rate": 2.523550770270269e-07, - "loss": 1.2926, - "step": 2638 - }, - { - "epoch": 2.564845239544035, - "grad_norm": 0.404296875, - "learning_rate": 2.512365184557314e-07, - "loss": 1.3246, - "step": 2639 - }, - { - "epoch": 2.565818830879072, - "grad_norm": 0.390625, - "learning_rate": 2.5012031321998117e-07, - "loss": 1.3306, - "step": 2640 - }, - { - "epoch": 2.566792422214109, - "grad_norm": 0.38671875, - "learning_rate": 2.4900646248788823e-07, - "loss": 1.3108, - "step": 2641 - }, - { - "epoch": 2.567766013549146, - "grad_norm": 0.390625, - "learning_rate": 2.4789496742509883e-07, - "loss": 1.3182, - "step": 2642 - }, - { - "epoch": 2.568739604884183, - "grad_norm": 0.39453125, - "learning_rate": 2.4678582919479557e-07, - "loss": 1.3009, - "step": 2643 - }, - { - "epoch": 2.5697131962192206, - "grad_norm": 0.404296875, - "learning_rate": 2.4567904895769267e-07, - "loss": 1.3197, - "step": 2644 - }, - { - "epoch": 2.5706867875542576, - "grad_norm": 0.400390625, - "learning_rate": 2.445746278720398e-07, - "loss": 1.3177, - "step": 2645 - }, - { - "epoch": 2.5716603788892947, - "grad_norm": 0.38671875, - "learning_rate": 2.434725670936139e-07, - "loss": 1.3048, - "step": 2646 - }, - { - "epoch": 2.5726339702243317, - "grad_norm": 0.392578125, - "learning_rate": 2.423728677757248e-07, - "loss": 1.3191, - "step": 2647 - }, - { - "epoch": 2.5736075615593688, - "grad_norm": 0.3984375, - "learning_rate": 2.412755310692097e-07, - "loss": 1.3201, - "step": 2648 - }, - { - "epoch": 2.574581152894406, - "grad_norm": 0.400390625, - "learning_rate": 2.4018055812243394e-07, - "loss": 1.3176, - "step": 2649 - }, - { - "epoch": 2.575554744229443, - "grad_norm": 0.404296875, - "learning_rate": 2.3908795008128873e-07, - "loss": 1.3146, - "step": 2650 - }, - { - "epoch": 2.57652833556448, - "grad_norm": 0.3984375, - "learning_rate": 2.3799770808919126e-07, - "loss": 1.3261, - "step": 2651 - }, - { - "epoch": 2.5775019268995174, - "grad_norm": 0.3984375, - "learning_rate": 2.369098332870809e-07, - "loss": 1.3223, - "step": 2652 - }, - { - "epoch": 2.5784755182345545, - "grad_norm": 0.400390625, - "learning_rate": 2.3582432681342194e-07, - "loss": 1.3249, - "step": 2653 - }, - { - "epoch": 2.5794491095695915, - "grad_norm": 0.388671875, - "learning_rate": 2.347411898041979e-07, - "loss": 1.3046, - "step": 2654 - }, - { - "epoch": 2.5804227009046286, - "grad_norm": 0.390625, - "learning_rate": 2.3366042339291517e-07, - "loss": 1.3086, - "step": 2655 - }, - { - "epoch": 2.5813962922396656, - "grad_norm": 0.392578125, - "learning_rate": 2.325820287105973e-07, - "loss": 1.2968, - "step": 2656 - }, - { - "epoch": 2.582369883574703, - "grad_norm": 0.4453125, - "learning_rate": 2.3150600688578661e-07, - "loss": 1.302, - "step": 2657 - }, - { - "epoch": 2.58334347490974, - "grad_norm": 0.3984375, - "learning_rate": 2.3043235904454148e-07, - "loss": 1.3028, - "step": 2658 - }, - { - "epoch": 2.584317066244777, - "grad_norm": 0.388671875, - "learning_rate": 2.2936108631043785e-07, - "loss": 1.3239, - "step": 2659 - }, - { - "epoch": 2.5852906575798142, - "grad_norm": 0.39453125, - "learning_rate": 2.2829218980456342e-07, - "loss": 1.3075, - "step": 2660 - }, - { - "epoch": 2.5862642489148513, - "grad_norm": 0.400390625, - "learning_rate": 2.272256706455217e-07, - "loss": 1.3313, - "step": 2661 - }, - { - "epoch": 2.5872378402498883, - "grad_norm": 0.388671875, - "learning_rate": 2.261615299494263e-07, - "loss": 1.3042, - "step": 2662 - }, - { - "epoch": 2.5882114315849254, - "grad_norm": 0.404296875, - "learning_rate": 2.2509976882990364e-07, - "loss": 1.3205, - "step": 2663 - }, - { - "epoch": 2.5891850229199624, - "grad_norm": 0.41015625, - "learning_rate": 2.2404038839808766e-07, - "loss": 1.3346, - "step": 2664 - }, - { - "epoch": 2.590158614255, - "grad_norm": 0.40234375, - "learning_rate": 2.2298338976262324e-07, - "loss": 1.3188, - "step": 2665 - }, - { - "epoch": 2.591132205590037, - "grad_norm": 0.400390625, - "learning_rate": 2.219287740296605e-07, - "loss": 1.3062, - "step": 2666 - }, - { - "epoch": 2.592105796925074, - "grad_norm": 0.39453125, - "learning_rate": 2.208765423028586e-07, - "loss": 1.3295, - "step": 2667 - }, - { - "epoch": 2.593079388260111, - "grad_norm": 0.3984375, - "learning_rate": 2.1982669568337806e-07, - "loss": 1.3132, - "step": 2668 - }, - { - "epoch": 2.594052979595148, - "grad_norm": 0.40234375, - "learning_rate": 2.18779235269887e-07, - "loss": 1.3178, - "step": 2669 - }, - { - "epoch": 2.5950265709301856, - "grad_norm": 0.396484375, - "learning_rate": 2.1773416215855407e-07, - "loss": 1.3137, - "step": 2670 - }, - { - "epoch": 2.5960001622652227, - "grad_norm": 0.39453125, - "learning_rate": 2.166914774430512e-07, - "loss": 1.3098, - "step": 2671 - }, - { - "epoch": 2.5969737536002597, - "grad_norm": 0.400390625, - "learning_rate": 2.1565118221454905e-07, - "loss": 1.3295, - "step": 2672 - }, - { - "epoch": 2.5979473449352968, - "grad_norm": 0.400390625, - "learning_rate": 2.146132775617199e-07, - "loss": 1.3178, - "step": 2673 - }, - { - "epoch": 2.598920936270334, - "grad_norm": 0.390625, - "learning_rate": 2.135777645707318e-07, - "loss": 1.3179, - "step": 2674 - }, - { - "epoch": 2.599894527605371, - "grad_norm": 0.396484375, - "learning_rate": 2.1254464432525214e-07, - "loss": 1.3161, - "step": 2675 - }, - { - "epoch": 2.600868118940408, - "grad_norm": 0.400390625, - "learning_rate": 2.1151391790644322e-07, - "loss": 1.3216, - "step": 2676 - }, - { - "epoch": 2.601841710275445, - "grad_norm": 0.40234375, - "learning_rate": 2.104855863929617e-07, - "loss": 1.3194, - "step": 2677 - }, - { - "epoch": 2.6028153016104825, - "grad_norm": 0.388671875, - "learning_rate": 2.094596508609595e-07, - "loss": 1.31, - "step": 2678 - }, - { - "epoch": 2.6037888929455195, - "grad_norm": 0.404296875, - "learning_rate": 2.0843611238407945e-07, - "loss": 1.3241, - "step": 2679 - }, - { - "epoch": 2.6047624842805566, - "grad_norm": 0.40234375, - "learning_rate": 2.0741497203345673e-07, - "loss": 1.3255, - "step": 2680 - }, - { - "epoch": 2.6057360756155936, - "grad_norm": 0.400390625, - "learning_rate": 2.063962308777176e-07, - "loss": 1.2954, - "step": 2681 - }, - { - "epoch": 2.6067096669506307, - "grad_norm": 0.3984375, - "learning_rate": 2.0537988998297565e-07, - "loss": 1.3256, - "step": 2682 - }, - { - "epoch": 2.607683258285668, - "grad_norm": 0.392578125, - "learning_rate": 2.0436595041283454e-07, - "loss": 1.3044, - "step": 2683 - }, - { - "epoch": 2.608656849620705, - "grad_norm": 0.396484375, - "learning_rate": 2.033544132283838e-07, - "loss": 1.327, - "step": 2684 - }, - { - "epoch": 2.6096304409557423, - "grad_norm": 0.3984375, - "learning_rate": 2.023452794881986e-07, - "loss": 1.3128, - "step": 2685 - }, - { - "epoch": 2.6106040322907793, - "grad_norm": 0.39453125, - "learning_rate": 2.013385502483406e-07, - "loss": 1.3102, - "step": 2686 - }, - { - "epoch": 2.6115776236258164, - "grad_norm": 0.388671875, - "learning_rate": 2.0033422656235258e-07, - "loss": 1.3093, - "step": 2687 - }, - { - "epoch": 2.6125512149608534, - "grad_norm": 0.396484375, - "learning_rate": 1.993323094812627e-07, - "loss": 1.3181, - "step": 2688 - }, - { - "epoch": 2.6135248062958905, - "grad_norm": 0.392578125, - "learning_rate": 1.9833280005357864e-07, - "loss": 1.3326, - "step": 2689 - }, - { - "epoch": 2.6144983976309275, - "grad_norm": 0.400390625, - "learning_rate": 1.973356993252884e-07, - "loss": 1.3082, - "step": 2690 - }, - { - "epoch": 2.615471988965965, - "grad_norm": 0.39453125, - "learning_rate": 1.963410083398609e-07, - "loss": 1.3226, - "step": 2691 - }, - { - "epoch": 2.616445580301002, - "grad_norm": 0.40234375, - "learning_rate": 1.9534872813824158e-07, - "loss": 1.3134, - "step": 2692 - }, - { - "epoch": 2.617419171636039, - "grad_norm": 0.388671875, - "learning_rate": 1.9435885975885443e-07, - "loss": 1.3277, - "step": 2693 - }, - { - "epoch": 2.618392762971076, - "grad_norm": 0.400390625, - "learning_rate": 1.9337140423759838e-07, - "loss": 1.3206, - "step": 2694 - }, - { - "epoch": 2.619366354306113, - "grad_norm": 0.388671875, - "learning_rate": 1.9238636260784675e-07, - "loss": 1.3258, - "step": 2695 - }, - { - "epoch": 2.6203399456411507, - "grad_norm": 0.392578125, - "learning_rate": 1.914037359004489e-07, - "loss": 1.3277, - "step": 2696 - }, - { - "epoch": 2.6213135369761877, - "grad_norm": 0.392578125, - "learning_rate": 1.9042352514372504e-07, - "loss": 1.3244, - "step": 2697 - }, - { - "epoch": 2.622287128311225, - "grad_norm": 0.39453125, - "learning_rate": 1.894457313634679e-07, - "loss": 1.296, - "step": 2698 - }, - { - "epoch": 2.623260719646262, - "grad_norm": 0.380859375, - "learning_rate": 1.8847035558294037e-07, - "loss": 1.3252, - "step": 2699 - }, - { - "epoch": 2.624234310981299, - "grad_norm": 0.392578125, - "learning_rate": 1.8749739882287566e-07, - "loss": 1.304, - "step": 2700 - }, - { - "epoch": 2.625207902316336, - "grad_norm": 0.388671875, - "learning_rate": 1.8652686210147485e-07, - "loss": 1.3261, - "step": 2701 - }, - { - "epoch": 2.626181493651373, - "grad_norm": 0.392578125, - "learning_rate": 1.8555874643440662e-07, - "loss": 1.3186, - "step": 2702 - }, - { - "epoch": 2.62715508498641, - "grad_norm": 0.38671875, - "learning_rate": 1.8459305283480528e-07, - "loss": 1.3255, - "step": 2703 - }, - { - "epoch": 2.6281286763214475, - "grad_norm": 0.390625, - "learning_rate": 1.8362978231327184e-07, - "loss": 1.3142, - "step": 2704 - }, - { - "epoch": 2.6291022676564846, - "grad_norm": 0.396484375, - "learning_rate": 1.826689358778705e-07, - "loss": 1.2996, - "step": 2705 - }, - { - "epoch": 2.6300758589915216, - "grad_norm": 0.388671875, - "learning_rate": 1.817105145341297e-07, - "loss": 1.3071, - "step": 2706 - }, - { - "epoch": 2.6310494503265587, - "grad_norm": 0.3984375, - "learning_rate": 1.807545192850385e-07, - "loss": 1.3153, - "step": 2707 - }, - { - "epoch": 2.6320230416615957, - "grad_norm": 0.384765625, - "learning_rate": 1.7980095113104835e-07, - "loss": 1.2973, - "step": 2708 - }, - { - "epoch": 2.6329966329966332, - "grad_norm": 0.400390625, - "learning_rate": 1.7884981107006981e-07, - "loss": 1.3153, - "step": 2709 - }, - { - "epoch": 2.6339702243316703, - "grad_norm": 0.396484375, - "learning_rate": 1.7790110009747368e-07, - "loss": 1.3058, - "step": 2710 - }, - { - "epoch": 2.6349438156667073, - "grad_norm": 0.396484375, - "learning_rate": 1.7695481920608716e-07, - "loss": 1.3462, - "step": 2711 - }, - { - "epoch": 2.6359174070017444, - "grad_norm": 0.390625, - "learning_rate": 1.7601096938619556e-07, - "loss": 1.2838, - "step": 2712 - }, - { - "epoch": 2.6368909983367814, - "grad_norm": 0.392578125, - "learning_rate": 1.7506955162553908e-07, - "loss": 1.2936, - "step": 2713 - }, - { - "epoch": 2.6378645896718185, - "grad_norm": 0.404296875, - "learning_rate": 1.7413056690931406e-07, - "loss": 1.3187, - "step": 2714 - }, - { - "epoch": 2.6388381810068555, - "grad_norm": 0.3984375, - "learning_rate": 1.7319401622016897e-07, - "loss": 1.3051, - "step": 2715 - }, - { - "epoch": 2.6398117723418926, - "grad_norm": 0.39453125, - "learning_rate": 1.7225990053820724e-07, - "loss": 1.3195, - "step": 2716 - }, - { - "epoch": 2.64078536367693, - "grad_norm": 0.396484375, - "learning_rate": 1.7132822084098165e-07, - "loss": 1.3136, - "step": 2717 - }, - { - "epoch": 2.641758955011967, - "grad_norm": 0.396484375, - "learning_rate": 1.7039897810349786e-07, - "loss": 1.3162, - "step": 2718 - }, - { - "epoch": 2.642732546347004, - "grad_norm": 0.400390625, - "learning_rate": 1.6947217329820937e-07, - "loss": 1.3118, - "step": 2719 - }, - { - "epoch": 2.643706137682041, - "grad_norm": 0.3984375, - "learning_rate": 1.6854780739502003e-07, - "loss": 1.3109, - "step": 2720 - }, - { - "epoch": 2.6446797290170783, - "grad_norm": 0.39453125, - "learning_rate": 1.6762588136127995e-07, - "loss": 1.3261, - "step": 2721 - }, - { - "epoch": 2.6456533203521158, - "grad_norm": 0.40234375, - "learning_rate": 1.6670639616178792e-07, - "loss": 1.3326, - "step": 2722 - }, - { - "epoch": 2.646626911687153, - "grad_norm": 0.396484375, - "learning_rate": 1.6578935275878533e-07, - "loss": 1.3104, - "step": 2723 - }, - { - "epoch": 2.64760050302219, - "grad_norm": 0.390625, - "learning_rate": 1.6487475211196118e-07, - "loss": 1.3226, - "step": 2724 - }, - { - "epoch": 2.648574094357227, - "grad_norm": 0.400390625, - "learning_rate": 1.6396259517844598e-07, - "loss": 1.3035, - "step": 2725 - }, - { - "epoch": 2.649547685692264, - "grad_norm": 0.39453125, - "learning_rate": 1.6305288291281474e-07, - "loss": 1.3106, - "step": 2726 - }, - { - "epoch": 2.650521277027301, - "grad_norm": 0.400390625, - "learning_rate": 1.6214561626708258e-07, - "loss": 1.3109, - "step": 2727 - }, - { - "epoch": 2.651494868362338, - "grad_norm": 0.388671875, - "learning_rate": 1.6124079619070614e-07, - "loss": 1.3198, - "step": 2728 - }, - { - "epoch": 2.652468459697375, - "grad_norm": 0.396484375, - "learning_rate": 1.6033842363058134e-07, - "loss": 1.3292, - "step": 2729 - }, - { - "epoch": 2.6534420510324126, - "grad_norm": 0.400390625, - "learning_rate": 1.594384995310433e-07, - "loss": 1.2973, - "step": 2730 - }, - { - "epoch": 2.6544156423674496, - "grad_norm": 0.396484375, - "learning_rate": 1.585410248338634e-07, - "loss": 1.3261, - "step": 2731 - }, - { - "epoch": 2.6553892337024867, - "grad_norm": 0.400390625, - "learning_rate": 1.5764600047825207e-07, - "loss": 1.3151, - "step": 2732 - }, - { - "epoch": 2.6563628250375237, - "grad_norm": 0.404296875, - "learning_rate": 1.567534274008531e-07, - "loss": 1.3178, - "step": 2733 - }, - { - "epoch": 2.657336416372561, - "grad_norm": 0.392578125, - "learning_rate": 1.5586330653574704e-07, - "loss": 1.3188, - "step": 2734 - }, - { - "epoch": 2.6583100077075983, - "grad_norm": 0.3984375, - "learning_rate": 1.5497563881444577e-07, - "loss": 1.3305, - "step": 2735 - }, - { - "epoch": 2.6592835990426353, - "grad_norm": 0.392578125, - "learning_rate": 1.5409042516589646e-07, - "loss": 1.3147, - "step": 2736 - }, - { - "epoch": 2.6602571903776724, - "grad_norm": 0.400390625, - "learning_rate": 1.5320766651647613e-07, - "loss": 1.3299, - "step": 2737 - }, - { - "epoch": 2.6612307817127094, - "grad_norm": 0.408203125, - "learning_rate": 1.5232736378999468e-07, - "loss": 1.345, - "step": 2738 - }, - { - "epoch": 2.6622043730477465, - "grad_norm": 0.39453125, - "learning_rate": 1.5144951790768942e-07, - "loss": 1.3179, - "step": 2739 - }, - { - "epoch": 2.6631779643827835, - "grad_norm": 0.400390625, - "learning_rate": 1.5057412978822906e-07, - "loss": 1.3291, - "step": 2740 - }, - { - "epoch": 2.6641515557178206, - "grad_norm": 0.400390625, - "learning_rate": 1.4970120034770775e-07, - "loss": 1.3238, - "step": 2741 - }, - { - "epoch": 2.6651251470528576, - "grad_norm": 0.3984375, - "learning_rate": 1.488307304996492e-07, - "loss": 1.3265, - "step": 2742 - }, - { - "epoch": 2.666098738387895, - "grad_norm": 0.39453125, - "learning_rate": 1.4796272115500082e-07, - "loss": 1.3026, - "step": 2743 - }, - { - "epoch": 2.667072329722932, - "grad_norm": 0.388671875, - "learning_rate": 1.4709717322213712e-07, - "loss": 1.3354, - "step": 2744 - }, - { - "epoch": 2.6680459210579692, - "grad_norm": 0.390625, - "learning_rate": 1.462340876068549e-07, - "loss": 1.3258, - "step": 2745 - }, - { - "epoch": 2.6690195123930063, - "grad_norm": 0.39453125, - "learning_rate": 1.453734652123756e-07, - "loss": 1.3267, - "step": 2746 - }, - { - "epoch": 2.6699931037280433, - "grad_norm": 0.40234375, - "learning_rate": 1.445153069393418e-07, - "loss": 1.3174, - "step": 2747 - }, - { - "epoch": 2.670966695063081, - "grad_norm": 0.396484375, - "learning_rate": 1.4365961368581844e-07, - "loss": 1.3072, - "step": 2748 - }, - { - "epoch": 2.671940286398118, - "grad_norm": 0.392578125, - "learning_rate": 1.428063863472895e-07, - "loss": 1.3099, - "step": 2749 - }, - { - "epoch": 2.672913877733155, - "grad_norm": 0.404296875, - "learning_rate": 1.4195562581666017e-07, - "loss": 1.3071, - "step": 2750 - }, - { - "epoch": 2.673887469068192, - "grad_norm": 0.3984375, - "learning_rate": 1.411073329842519e-07, - "loss": 1.3049, - "step": 2751 - }, - { - "epoch": 2.674861060403229, - "grad_norm": 0.390625, - "learning_rate": 1.4026150873780564e-07, - "loss": 1.3209, - "step": 2752 - }, - { - "epoch": 2.675834651738266, - "grad_norm": 0.392578125, - "learning_rate": 1.3941815396247783e-07, - "loss": 1.3175, - "step": 2753 - }, - { - "epoch": 2.676808243073303, - "grad_norm": 0.40234375, - "learning_rate": 1.3857726954084134e-07, - "loss": 1.3318, - "step": 2754 - }, - { - "epoch": 2.67778183440834, - "grad_norm": 0.400390625, - "learning_rate": 1.3773885635288308e-07, - "loss": 1.3257, - "step": 2755 - }, - { - "epoch": 2.6787554257433777, - "grad_norm": 0.392578125, - "learning_rate": 1.3690291527600458e-07, - "loss": 1.2978, - "step": 2756 - }, - { - "epoch": 2.6797290170784147, - "grad_norm": 0.404296875, - "learning_rate": 1.3606944718501908e-07, - "loss": 1.3257, - "step": 2757 - }, - { - "epoch": 2.6807026084134518, - "grad_norm": 0.388671875, - "learning_rate": 1.3523845295215332e-07, - "loss": 1.3187, - "step": 2758 - }, - { - "epoch": 2.681676199748489, - "grad_norm": 0.396484375, - "learning_rate": 1.344099334470439e-07, - "loss": 1.3108, - "step": 2759 - }, - { - "epoch": 2.682649791083526, - "grad_norm": 0.38671875, - "learning_rate": 1.335838895367389e-07, - "loss": 1.3245, - "step": 2760 - }, - { - "epoch": 2.6836233824185634, - "grad_norm": 0.396484375, - "learning_rate": 1.3276032208569407e-07, - "loss": 1.309, - "step": 2761 - }, - { - "epoch": 2.6845969737536004, - "grad_norm": 0.400390625, - "learning_rate": 1.319392319557755e-07, - "loss": 1.3319, - "step": 2762 - }, - { - "epoch": 2.6855705650886375, - "grad_norm": 0.3984375, - "learning_rate": 1.3112062000625452e-07, - "loss": 1.3149, - "step": 2763 - }, - { - "epoch": 2.6865441564236745, - "grad_norm": 0.392578125, - "learning_rate": 1.3030448709381082e-07, - "loss": 1.3122, - "step": 2764 - }, - { - "epoch": 2.6875177477587116, - "grad_norm": 0.40234375, - "learning_rate": 1.2949083407252898e-07, - "loss": 1.303, - "step": 2765 - }, - { - "epoch": 2.6884913390937486, - "grad_norm": 0.4140625, - "learning_rate": 1.2867966179389902e-07, - "loss": 1.3133, - "step": 2766 - }, - { - "epoch": 2.6894649304287856, - "grad_norm": 0.400390625, - "learning_rate": 1.278709711068138e-07, - "loss": 1.3121, - "step": 2767 - }, - { - "epoch": 2.6904385217638227, - "grad_norm": 0.39453125, - "learning_rate": 1.2706476285756997e-07, - "loss": 1.323, - "step": 2768 - }, - { - "epoch": 2.69141211309886, - "grad_norm": 0.3984375, - "learning_rate": 1.26261037889866e-07, - "loss": 1.3148, - "step": 2769 - }, - { - "epoch": 2.6923857044338972, - "grad_norm": 0.39453125, - "learning_rate": 1.2545979704480181e-07, - "loss": 1.2991, - "step": 2770 - }, - { - "epoch": 2.6933592957689343, - "grad_norm": 0.40234375, - "learning_rate": 1.2466104116087728e-07, - "loss": 1.3178, - "step": 2771 - }, - { - "epoch": 2.6943328871039713, - "grad_norm": 0.404296875, - "learning_rate": 1.2386477107399264e-07, - "loss": 1.3164, - "step": 2772 - }, - { - "epoch": 2.695306478439009, - "grad_norm": 0.39453125, - "learning_rate": 1.2307098761744547e-07, - "loss": 1.3033, - "step": 2773 - }, - { - "epoch": 2.696280069774046, - "grad_norm": 0.404296875, - "learning_rate": 1.2227969162193238e-07, - "loss": 1.3221, - "step": 2774 - }, - { - "epoch": 2.697253661109083, - "grad_norm": 0.396484375, - "learning_rate": 1.2149088391554599e-07, - "loss": 1.3038, - "step": 2775 - }, - { - "epoch": 2.69822725244412, - "grad_norm": 0.40625, - "learning_rate": 1.2070456532377483e-07, - "loss": 1.2977, - "step": 2776 - }, - { - "epoch": 2.699200843779157, - "grad_norm": 0.3984375, - "learning_rate": 1.1992073666950376e-07, - "loss": 1.3398, - "step": 2777 - }, - { - "epoch": 2.700174435114194, - "grad_norm": 0.3984375, - "learning_rate": 1.1913939877301023e-07, - "loss": 1.3146, - "step": 2778 - }, - { - "epoch": 2.701148026449231, - "grad_norm": 0.40625, - "learning_rate": 1.1836055245196598e-07, - "loss": 1.3235, - "step": 2779 - }, - { - "epoch": 2.702121617784268, - "grad_norm": 0.390625, - "learning_rate": 1.1758419852143599e-07, - "loss": 1.2857, - "step": 2780 - }, - { - "epoch": 2.7030952091193052, - "grad_norm": 0.392578125, - "learning_rate": 1.1681033779387507e-07, - "loss": 1.3169, - "step": 2781 - }, - { - "epoch": 2.7040688004543427, - "grad_norm": 0.39453125, - "learning_rate": 1.1603897107913126e-07, - "loss": 1.3133, - "step": 2782 - }, - { - "epoch": 2.7050423917893798, - "grad_norm": 0.408203125, - "learning_rate": 1.1527009918444076e-07, - "loss": 1.3187, - "step": 2783 - }, - { - "epoch": 2.706015983124417, - "grad_norm": 0.40625, - "learning_rate": 1.145037229144294e-07, - "loss": 1.3237, - "step": 2784 - }, - { - "epoch": 2.706989574459454, - "grad_norm": 0.40625, - "learning_rate": 1.137398430711123e-07, - "loss": 1.3193, - "step": 2785 - }, - { - "epoch": 2.7079631657944914, - "grad_norm": 0.404296875, - "learning_rate": 1.1297846045389e-07, - "loss": 1.3331, - "step": 2786 - }, - { - "epoch": 2.7089367571295284, - "grad_norm": 0.392578125, - "learning_rate": 1.1221957585955207e-07, - "loss": 1.2998, - "step": 2787 - }, - { - "epoch": 2.7099103484645655, - "grad_norm": 0.40625, - "learning_rate": 1.1146319008227214e-07, - "loss": 1.3189, - "step": 2788 - }, - { - "epoch": 2.7108839397996025, - "grad_norm": 0.396484375, - "learning_rate": 1.1070930391361007e-07, - "loss": 1.3314, - "step": 2789 - }, - { - "epoch": 2.7118575311346396, - "grad_norm": 0.384765625, - "learning_rate": 1.0995791814250894e-07, - "loss": 1.2824, - "step": 2790 - }, - { - "epoch": 2.7128311224696766, - "grad_norm": 0.390625, - "learning_rate": 1.0920903355529505e-07, - "loss": 1.3075, - "step": 2791 - }, - { - "epoch": 2.7138047138047137, - "grad_norm": 0.400390625, - "learning_rate": 1.0846265093567815e-07, - "loss": 1.2965, - "step": 2792 - }, - { - "epoch": 2.7147783051397507, - "grad_norm": 0.396484375, - "learning_rate": 1.0771877106474927e-07, - "loss": 1.3235, - "step": 2793 - }, - { - "epoch": 2.715751896474788, - "grad_norm": 0.400390625, - "learning_rate": 1.0697739472097956e-07, - "loss": 1.3308, - "step": 2794 - }, - { - "epoch": 2.7167254878098253, - "grad_norm": 0.3984375, - "learning_rate": 1.062385226802215e-07, - "loss": 1.338, - "step": 2795 - }, - { - "epoch": 2.7176990791448623, - "grad_norm": 0.396484375, - "learning_rate": 1.0550215571570599e-07, - "loss": 1.3218, - "step": 2796 - }, - { - "epoch": 2.7186726704798994, - "grad_norm": 0.392578125, - "learning_rate": 1.0476829459804245e-07, - "loss": 1.3114, - "step": 2797 - }, - { - "epoch": 2.7196462618149364, - "grad_norm": 0.390625, - "learning_rate": 1.0403694009521793e-07, - "loss": 1.3237, - "step": 2798 - }, - { - "epoch": 2.720619853149974, - "grad_norm": 0.39453125, - "learning_rate": 1.0330809297259714e-07, - "loss": 1.33, - "step": 2799 - }, - { - "epoch": 2.721593444485011, - "grad_norm": 0.408203125, - "learning_rate": 1.0258175399291914e-07, - "loss": 1.3131, - "step": 2800 - }, - { - "epoch": 2.722567035820048, - "grad_norm": 0.3984375, - "learning_rate": 1.0185792391629978e-07, - "loss": 1.3231, - "step": 2801 - }, - { - "epoch": 2.723540627155085, - "grad_norm": 0.404296875, - "learning_rate": 1.0113660350022786e-07, - "loss": 1.3088, - "step": 2802 - }, - { - "epoch": 2.724514218490122, - "grad_norm": 0.404296875, - "learning_rate": 1.0041779349956788e-07, - "loss": 1.3269, - "step": 2803 - }, - { - "epoch": 2.725487809825159, - "grad_norm": 0.39453125, - "learning_rate": 9.970149466655477e-08, - "loss": 1.3081, - "step": 2804 - }, - { - "epoch": 2.726461401160196, - "grad_norm": 0.39453125, - "learning_rate": 9.898770775079752e-08, - "loss": 1.3188, - "step": 2805 - }, - { - "epoch": 2.7274349924952332, - "grad_norm": 0.396484375, - "learning_rate": 9.82764334992753e-08, - "loss": 1.303, - "step": 2806 - }, - { - "epoch": 2.7284085838302707, - "grad_norm": 0.400390625, - "learning_rate": 9.75676726563382e-08, - "loss": 1.312, - "step": 2807 - }, - { - "epoch": 2.729382175165308, - "grad_norm": 0.404296875, - "learning_rate": 9.686142596370545e-08, - "loss": 1.315, - "step": 2808 - }, - { - "epoch": 2.730355766500345, - "grad_norm": 0.39453125, - "learning_rate": 9.615769416046639e-08, - "loss": 1.2908, - "step": 2809 - }, - { - "epoch": 2.731329357835382, - "grad_norm": 0.39453125, - "learning_rate": 9.54564779830769e-08, - "loss": 1.3208, - "step": 2810 - }, - { - "epoch": 2.732302949170419, - "grad_norm": 0.39453125, - "learning_rate": 9.475777816536219e-08, - "loss": 1.3289, - "step": 2811 - }, - { - "epoch": 2.7332765405054564, - "grad_norm": 0.400390625, - "learning_rate": 9.406159543851184e-08, - "loss": 1.3351, - "step": 2812 - }, - { - "epoch": 2.7342501318404935, - "grad_norm": 0.400390625, - "learning_rate": 9.336793053108361e-08, - "loss": 1.3239, - "step": 2813 - }, - { - "epoch": 2.7352237231755305, - "grad_norm": 0.388671875, - "learning_rate": 9.267678416899823e-08, - "loss": 1.3134, - "step": 2814 - }, - { - "epoch": 2.7361973145105676, - "grad_norm": 0.400390625, - "learning_rate": 9.198815707554237e-08, - "loss": 1.3196, - "step": 2815 - }, - { - "epoch": 2.7371709058456046, - "grad_norm": 0.3984375, - "learning_rate": 9.130204997136543e-08, - "loss": 1.306, - "step": 2816 - }, - { - "epoch": 2.7381444971806417, - "grad_norm": 0.390625, - "learning_rate": 9.061846357448028e-08, - "loss": 1.2892, - "step": 2817 - }, - { - "epoch": 2.7391180885156787, - "grad_norm": 0.396484375, - "learning_rate": 8.993739860026108e-08, - "loss": 1.3408, - "step": 2818 - }, - { - "epoch": 2.7400916798507158, - "grad_norm": 0.396484375, - "learning_rate": 8.925885576144377e-08, - "loss": 1.3204, - "step": 2819 - }, - { - "epoch": 2.7410652711857533, - "grad_norm": 0.39453125, - "learning_rate": 8.858283576812482e-08, - "loss": 1.3129, - "step": 2820 - }, - { - "epoch": 2.7420388625207903, - "grad_norm": 0.39453125, - "learning_rate": 8.79093393277608e-08, - "loss": 1.3181, - "step": 2821 - }, - { - "epoch": 2.7430124538558274, - "grad_norm": 0.39453125, - "learning_rate": 8.723836714516681e-08, - "loss": 1.3159, - "step": 2822 - }, - { - "epoch": 2.7439860451908644, - "grad_norm": 0.39453125, - "learning_rate": 8.656991992251674e-08, - "loss": 1.3085, - "step": 2823 - }, - { - "epoch": 2.7449596365259015, - "grad_norm": 0.3984375, - "learning_rate": 8.590399835934154e-08, - "loss": 1.3067, - "step": 2824 - }, - { - "epoch": 2.745933227860939, - "grad_norm": 0.40234375, - "learning_rate": 8.524060315253019e-08, - "loss": 1.3227, - "step": 2825 - }, - { - "epoch": 2.746906819195976, - "grad_norm": 0.39453125, - "learning_rate": 8.45797349963265e-08, - "loss": 1.3254, - "step": 2826 - }, - { - "epoch": 2.747880410531013, - "grad_norm": 0.404296875, - "learning_rate": 8.392139458233056e-08, - "loss": 1.3098, - "step": 2827 - }, - { - "epoch": 2.74885400186605, - "grad_norm": 0.3984375, - "learning_rate": 8.326558259949713e-08, - "loss": 1.3429, - "step": 2828 - }, - { - "epoch": 2.749827593201087, - "grad_norm": 0.39453125, - "learning_rate": 8.26122997341347e-08, - "loss": 1.3122, - "step": 2829 - }, - { - "epoch": 2.750801184536124, - "grad_norm": 0.396484375, - "learning_rate": 8.196154666990475e-08, - "loss": 1.3015, - "step": 2830 - }, - { - "epoch": 2.7517747758711613, - "grad_norm": 0.404296875, - "learning_rate": 8.131332408782222e-08, - "loss": 1.3588, - "step": 2831 - }, - { - "epoch": 2.7527483672061983, - "grad_norm": 0.392578125, - "learning_rate": 8.066763266625283e-08, - "loss": 1.3171, - "step": 2832 - }, - { - "epoch": 2.753721958541236, - "grad_norm": 0.396484375, - "learning_rate": 8.002447308091466e-08, - "loss": 1.3136, - "step": 2833 - }, - { - "epoch": 2.754695549876273, - "grad_norm": 0.384765625, - "learning_rate": 7.938384600487486e-08, - "loss": 1.3255, - "step": 2834 - }, - { - "epoch": 2.75566914121131, - "grad_norm": 0.396484375, - "learning_rate": 7.874575210855134e-08, - "loss": 1.2997, - "step": 2835 - }, - { - "epoch": 2.756642732546347, - "grad_norm": 0.404296875, - "learning_rate": 7.811019205971021e-08, - "loss": 1.3134, - "step": 2836 - }, - { - "epoch": 2.757616323881384, - "grad_norm": 0.40234375, - "learning_rate": 7.747716652346692e-08, - "loss": 1.3239, - "step": 2837 - }, - { - "epoch": 2.7585899152164215, - "grad_norm": 0.404296875, - "learning_rate": 7.684667616228353e-08, - "loss": 1.3154, - "step": 2838 - }, - { - "epoch": 2.7595635065514585, - "grad_norm": 0.3984375, - "learning_rate": 7.621872163596999e-08, - "loss": 1.3054, - "step": 2839 - }, - { - "epoch": 2.7605370978864956, - "grad_norm": 0.400390625, - "learning_rate": 7.559330360168148e-08, - "loss": 1.3236, - "step": 2840 - }, - { - "epoch": 2.7615106892215326, - "grad_norm": 0.39453125, - "learning_rate": 7.497042271391947e-08, - "loss": 1.2818, - "step": 2841 - }, - { - "epoch": 2.7624842805565697, - "grad_norm": 0.40625, - "learning_rate": 7.435007962452972e-08, - "loss": 1.3123, - "step": 2842 - }, - { - "epoch": 2.7634578718916067, - "grad_norm": 0.400390625, - "learning_rate": 7.373227498270353e-08, - "loss": 1.3257, - "step": 2843 - }, - { - "epoch": 2.764431463226644, - "grad_norm": 0.40234375, - "learning_rate": 7.311700943497369e-08, - "loss": 1.3013, - "step": 2844 - }, - { - "epoch": 2.765405054561681, - "grad_norm": 0.390625, - "learning_rate": 7.250428362521711e-08, - "loss": 1.3261, - "step": 2845 - }, - { - "epoch": 2.7663786458967183, - "grad_norm": 0.39453125, - "learning_rate": 7.189409819465255e-08, - "loss": 1.3097, - "step": 2846 - }, - { - "epoch": 2.7673522372317554, - "grad_norm": 0.390625, - "learning_rate": 7.128645378184085e-08, - "loss": 1.2819, - "step": 2847 - }, - { - "epoch": 2.7683258285667924, - "grad_norm": 0.396484375, - "learning_rate": 7.068135102268226e-08, - "loss": 1.3146, - "step": 2848 - }, - { - "epoch": 2.7692994199018295, - "grad_norm": 0.396484375, - "learning_rate": 7.007879055041855e-08, - "loss": 1.3027, - "step": 2849 - }, - { - "epoch": 2.7702730112368665, - "grad_norm": 0.39453125, - "learning_rate": 6.947877299563032e-08, - "loss": 1.3155, - "step": 2850 - }, - { - "epoch": 2.771246602571904, - "grad_norm": 0.39453125, - "learning_rate": 6.888129898623752e-08, - "loss": 1.3179, - "step": 2851 - }, - { - "epoch": 2.772220193906941, - "grad_norm": 0.3984375, - "learning_rate": 6.828636914749748e-08, - "loss": 1.3121, - "step": 2852 - }, - { - "epoch": 2.773193785241978, - "grad_norm": 0.396484375, - "learning_rate": 6.769398410200579e-08, - "loss": 1.3119, - "step": 2853 - }, - { - "epoch": 2.774167376577015, - "grad_norm": 0.388671875, - "learning_rate": 6.710414446969405e-08, - "loss": 1.3194, - "step": 2854 - }, - { - "epoch": 2.7751409679120522, - "grad_norm": 0.390625, - "learning_rate": 6.651685086783155e-08, - "loss": 1.3126, - "step": 2855 - }, - { - "epoch": 2.7761145592470893, - "grad_norm": 0.392578125, - "learning_rate": 6.593210391102139e-08, - "loss": 1.3027, - "step": 2856 - }, - { - "epoch": 2.7770881505821263, - "grad_norm": 0.396484375, - "learning_rate": 6.534990421120296e-08, - "loss": 1.3011, - "step": 2857 - }, - { - "epoch": 2.7780617419171634, - "grad_norm": 0.396484375, - "learning_rate": 6.477025237764889e-08, - "loss": 1.3246, - "step": 2858 - }, - { - "epoch": 2.779035333252201, - "grad_norm": 0.400390625, - "learning_rate": 6.419314901696671e-08, - "loss": 1.3045, - "step": 2859 - }, - { - "epoch": 2.780008924587238, - "grad_norm": 0.392578125, - "learning_rate": 6.361859473309556e-08, - "loss": 1.3222, - "step": 2860 - }, - { - "epoch": 2.780982515922275, - "grad_norm": 0.396484375, - "learning_rate": 6.304659012730835e-08, - "loss": 1.3308, - "step": 2861 - }, - { - "epoch": 2.781956107257312, - "grad_norm": 0.38671875, - "learning_rate": 6.247713579820847e-08, - "loss": 1.3075, - "step": 2862 - }, - { - "epoch": 2.782929698592349, - "grad_norm": 0.396484375, - "learning_rate": 6.191023234173143e-08, - "loss": 1.3197, - "step": 2863 - }, - { - "epoch": 2.7839032899273866, - "grad_norm": 0.39453125, - "learning_rate": 6.134588035114242e-08, - "loss": 1.3026, - "step": 2864 - }, - { - "epoch": 2.7848768812624236, - "grad_norm": 0.388671875, - "learning_rate": 6.078408041703732e-08, - "loss": 1.311, - "step": 2865 - }, - { - "epoch": 2.7858504725974607, - "grad_norm": 0.396484375, - "learning_rate": 6.022483312734112e-08, - "loss": 1.3102, - "step": 2866 - }, - { - "epoch": 2.7868240639324977, - "grad_norm": 0.39453125, - "learning_rate": 5.966813906730679e-08, - "loss": 1.3189, - "step": 2867 - }, - { - "epoch": 2.7877976552675348, - "grad_norm": 0.388671875, - "learning_rate": 5.9113998819515515e-08, - "loss": 1.3078, - "step": 2868 - }, - { - "epoch": 2.788771246602572, - "grad_norm": 0.400390625, - "learning_rate": 5.8562412963877044e-08, - "loss": 1.2978, - "step": 2869 - }, - { - "epoch": 2.789744837937609, - "grad_norm": 0.3984375, - "learning_rate": 5.80133820776263e-08, - "loss": 1.3239, - "step": 2870 - }, - { - "epoch": 2.790718429272646, - "grad_norm": 0.392578125, - "learning_rate": 5.74669067353259e-08, - "loss": 1.3089, - "step": 2871 - }, - { - "epoch": 2.7916920206076834, - "grad_norm": 0.390625, - "learning_rate": 5.6922987508862546e-08, - "loss": 1.294, - "step": 2872 - }, - { - "epoch": 2.7926656119427204, - "grad_norm": 0.396484375, - "learning_rate": 5.638162496744981e-08, - "loss": 1.3267, - "step": 2873 - }, - { - "epoch": 2.7936392032777575, - "grad_norm": 0.408203125, - "learning_rate": 5.5842819677624225e-08, - "loss": 1.3228, - "step": 2874 - }, - { - "epoch": 2.7946127946127945, - "grad_norm": 0.392578125, - "learning_rate": 5.530657220324615e-08, - "loss": 1.3066, - "step": 2875 - }, - { - "epoch": 2.7955863859478316, - "grad_norm": 0.39453125, - "learning_rate": 5.477288310550055e-08, - "loss": 1.302, - "step": 2876 - }, - { - "epoch": 2.796559977282869, - "grad_norm": 0.408203125, - "learning_rate": 5.424175294289374e-08, - "loss": 1.3167, - "step": 2877 - }, - { - "epoch": 2.797533568617906, - "grad_norm": 0.3984375, - "learning_rate": 5.371318227125416e-08, - "loss": 1.2813, - "step": 2878 - }, - { - "epoch": 2.798507159952943, - "grad_norm": 0.388671875, - "learning_rate": 5.318717164373266e-08, - "loss": 1.3026, - "step": 2879 - }, - { - "epoch": 2.7994807512879802, - "grad_norm": 0.3984375, - "learning_rate": 5.266372161079975e-08, - "loss": 1.3161, - "step": 2880 - }, - { - "epoch": 2.8004543426230173, - "grad_norm": 0.40625, - "learning_rate": 5.2142832720247784e-08, - "loss": 1.3258, - "step": 2881 - }, - { - "epoch": 2.8014279339580543, - "grad_norm": 0.392578125, - "learning_rate": 5.16245055171874e-08, - "loss": 1.3059, - "step": 2882 - }, - { - "epoch": 2.8024015252930914, - "grad_norm": 0.388671875, - "learning_rate": 5.110874054404941e-08, - "loss": 1.306, - "step": 2883 - }, - { - "epoch": 2.8033751166281284, - "grad_norm": 0.404296875, - "learning_rate": 5.059553834058289e-08, - "loss": 1.3173, - "step": 2884 - }, - { - "epoch": 2.804348707963166, - "grad_norm": 0.400390625, - "learning_rate": 5.008489944385464e-08, - "loss": 1.3198, - "step": 2885 - }, - { - "epoch": 2.805322299298203, - "grad_norm": 0.39453125, - "learning_rate": 4.957682438824996e-08, - "loss": 1.3263, - "step": 2886 - }, - { - "epoch": 2.80629589063324, - "grad_norm": 0.396484375, - "learning_rate": 4.9071313705469635e-08, - "loss": 1.3138, - "step": 2887 - }, - { - "epoch": 2.807269481968277, - "grad_norm": 0.39453125, - "learning_rate": 4.856836792453218e-08, - "loss": 1.298, - "step": 2888 - }, - { - "epoch": 2.808243073303314, - "grad_norm": 0.396484375, - "learning_rate": 4.806798757177128e-08, - "loss": 1.3065, - "step": 2889 - }, - { - "epoch": 2.8092166646383516, - "grad_norm": 0.40234375, - "learning_rate": 4.7570173170835846e-08, - "loss": 1.3114, - "step": 2890 - }, - { - "epoch": 2.8101902559733887, - "grad_norm": 0.390625, - "learning_rate": 4.70749252426897e-08, - "loss": 1.3133, - "step": 2891 - }, - { - "epoch": 2.8111638473084257, - "grad_norm": 0.396484375, - "learning_rate": 4.6582244305611034e-08, - "loss": 1.3155, - "step": 2892 - }, - { - "epoch": 2.8121374386434628, - "grad_norm": 0.4140625, - "learning_rate": 4.6092130875190766e-08, - "loss": 1.3006, - "step": 2893 - }, - { - "epoch": 2.8131110299785, - "grad_norm": 0.390625, - "learning_rate": 4.5604585464334436e-08, - "loss": 1.3369, - "step": 2894 - }, - { - "epoch": 2.814084621313537, - "grad_norm": 0.38671875, - "learning_rate": 4.5119608583258646e-08, - "loss": 1.3226, - "step": 2895 - }, - { - "epoch": 2.815058212648574, - "grad_norm": 0.396484375, - "learning_rate": 4.4637200739493514e-08, - "loss": 1.3146, - "step": 2896 - }, - { - "epoch": 2.816031803983611, - "grad_norm": 0.396484375, - "learning_rate": 4.415736243787882e-08, - "loss": 1.308, - "step": 2897 - }, - { - "epoch": 2.8170053953186485, - "grad_norm": 0.3984375, - "learning_rate": 4.368009418056707e-08, - "loss": 1.3103, - "step": 2898 - }, - { - "epoch": 2.8179789866536855, - "grad_norm": 0.392578125, - "learning_rate": 4.32053964670201e-08, - "loss": 1.3023, - "step": 2899 - }, - { - "epoch": 2.8189525779887226, - "grad_norm": 0.41015625, - "learning_rate": 4.273326979400999e-08, - "loss": 1.3158, - "step": 2900 - }, - { - "epoch": 2.8199261693237596, - "grad_norm": 0.404296875, - "learning_rate": 4.226371465561846e-08, - "loss": 1.3245, - "step": 2901 - }, - { - "epoch": 2.8208997606587967, - "grad_norm": 0.40625, - "learning_rate": 4.179673154323605e-08, - "loss": 1.2983, - "step": 2902 - }, - { - "epoch": 2.821873351993834, - "grad_norm": 0.400390625, - "learning_rate": 4.1332320945561e-08, - "loss": 1.3317, - "step": 2903 - }, - { - "epoch": 2.822846943328871, - "grad_norm": 0.404296875, - "learning_rate": 4.0870483348600386e-08, - "loss": 1.3029, - "step": 2904 - }, - { - "epoch": 2.8238205346639083, - "grad_norm": 0.3984375, - "learning_rate": 4.0411219235667877e-08, - "loss": 1.3209, - "step": 2905 - }, - { - "epoch": 2.8247941259989453, - "grad_norm": 0.390625, - "learning_rate": 3.9954529087384844e-08, - "loss": 1.3222, - "step": 2906 - }, - { - "epoch": 2.8257677173339824, - "grad_norm": 0.390625, - "learning_rate": 3.950041338167787e-08, - "loss": 1.3242, - "step": 2907 - }, - { - "epoch": 2.8267413086690194, - "grad_norm": 0.404296875, - "learning_rate": 3.9048872593780426e-08, - "loss": 1.3201, - "step": 2908 - }, - { - "epoch": 2.8277149000040565, - "grad_norm": 0.39453125, - "learning_rate": 3.8599907196230636e-08, - "loss": 1.3172, - "step": 2909 - }, - { - "epoch": 2.8286884913390935, - "grad_norm": 0.396484375, - "learning_rate": 3.815351765887182e-08, - "loss": 1.3079, - "step": 2910 - }, - { - "epoch": 2.829662082674131, - "grad_norm": 0.396484375, - "learning_rate": 3.770970444885114e-08, - "loss": 1.3196, - "step": 2911 - }, - { - "epoch": 2.830635674009168, - "grad_norm": 0.384765625, - "learning_rate": 3.72684680306204e-08, - "loss": 1.3144, - "step": 2912 - }, - { - "epoch": 2.831609265344205, - "grad_norm": 0.400390625, - "learning_rate": 3.682980886593412e-08, - "loss": 1.3211, - "step": 2913 - }, - { - "epoch": 2.832582856679242, - "grad_norm": 0.408203125, - "learning_rate": 3.639372741385039e-08, - "loss": 1.3257, - "step": 2914 - }, - { - "epoch": 2.833556448014279, - "grad_norm": 0.3984375, - "learning_rate": 3.596022413072886e-08, - "loss": 1.3351, - "step": 2915 - }, - { - "epoch": 2.8345300393493167, - "grad_norm": 0.396484375, - "learning_rate": 3.5529299470232206e-08, - "loss": 1.3217, - "step": 2916 - }, - { - "epoch": 2.8355036306843537, - "grad_norm": 0.3984375, - "learning_rate": 3.51009538833233e-08, - "loss": 1.3064, - "step": 2917 - }, - { - "epoch": 2.836477222019391, - "grad_norm": 0.396484375, - "learning_rate": 3.4675187818267174e-08, - "loss": 1.3091, - "step": 2918 - }, - { - "epoch": 2.837450813354428, - "grad_norm": 0.3984375, - "learning_rate": 3.4252001720628816e-08, - "loss": 1.3193, - "step": 2919 - }, - { - "epoch": 2.838424404689465, - "grad_norm": 0.392578125, - "learning_rate": 3.383139603327312e-08, - "loss": 1.3046, - "step": 2920 - }, - { - "epoch": 2.839397996024502, - "grad_norm": 0.39453125, - "learning_rate": 3.3413371196364954e-08, - "loss": 1.3227, - "step": 2921 - }, - { - "epoch": 2.840371587359539, - "grad_norm": 0.388671875, - "learning_rate": 3.299792764736798e-08, - "loss": 1.2949, - "step": 2922 - }, - { - "epoch": 2.841345178694576, - "grad_norm": 0.3984375, - "learning_rate": 3.25850658210447e-08, - "loss": 1.3157, - "step": 2923 - }, - { - "epoch": 2.8423187700296135, - "grad_norm": 0.40234375, - "learning_rate": 3.2174786149456184e-08, - "loss": 1.32, - "step": 2924 - }, - { - "epoch": 2.8432923613646506, - "grad_norm": 0.39453125, - "learning_rate": 3.176708906196063e-08, - "loss": 1.3047, - "step": 2925 - }, - { - "epoch": 2.8442659526996876, - "grad_norm": 0.39453125, - "learning_rate": 3.136197498521398e-08, - "loss": 1.3176, - "step": 2926 - }, - { - "epoch": 2.8452395440347247, - "grad_norm": 0.390625, - "learning_rate": 3.0959444343169055e-08, - "loss": 1.3141, - "step": 2927 - }, - { - "epoch": 2.8462131353697617, - "grad_norm": 0.396484375, - "learning_rate": 3.0559497557074715e-08, - "loss": 1.3156, - "step": 2928 - }, - { - "epoch": 2.847186726704799, - "grad_norm": 0.392578125, - "learning_rate": 3.016213504547616e-08, - "loss": 1.3105, - "step": 2929 - }, - { - "epoch": 2.8481603180398363, - "grad_norm": 0.400390625, - "learning_rate": 2.9767357224214365e-08, - "loss": 1.3098, - "step": 2930 - }, - { - "epoch": 2.8491339093748733, - "grad_norm": 0.39453125, - "learning_rate": 2.9375164506424693e-08, - "loss": 1.311, - "step": 2931 - }, - { - "epoch": 2.8501075007099104, - "grad_norm": 0.3984375, - "learning_rate": 2.8985557302537994e-08, - "loss": 1.3018, - "step": 2932 - }, - { - "epoch": 2.8510810920449474, - "grad_norm": 0.39453125, - "learning_rate": 2.8598536020278678e-08, - "loss": 1.3184, - "step": 2933 - }, - { - "epoch": 2.8520546833799845, - "grad_norm": 0.41015625, - "learning_rate": 2.8214101064665545e-08, - "loss": 1.3352, - "step": 2934 - }, - { - "epoch": 2.8530282747150215, - "grad_norm": 0.390625, - "learning_rate": 2.7832252838010387e-08, - "loss": 1.3224, - "step": 2935 - }, - { - "epoch": 2.8540018660500586, - "grad_norm": 0.3984375, - "learning_rate": 2.7452991739918277e-08, - "loss": 1.3408, - "step": 2936 - }, - { - "epoch": 2.854975457385096, - "grad_norm": 0.41796875, - "learning_rate": 2.7076318167286452e-08, - "loss": 1.3088, - "step": 2937 - }, - { - "epoch": 2.855949048720133, - "grad_norm": 0.388671875, - "learning_rate": 2.670223251430515e-08, - "loss": 1.298, - "step": 2938 - }, - { - "epoch": 2.85692264005517, - "grad_norm": 0.39453125, - "learning_rate": 2.63307351724551e-08, - "loss": 1.2909, - "step": 2939 - }, - { - "epoch": 2.857896231390207, - "grad_norm": 0.416015625, - "learning_rate": 2.5961826530509214e-08, - "loss": 1.3313, - "step": 2940 - }, - { - "epoch": 2.8588698227252443, - "grad_norm": 0.396484375, - "learning_rate": 2.5595506974531448e-08, - "loss": 1.3273, - "step": 2941 - }, - { - "epoch": 2.8598434140602818, - "grad_norm": 0.38671875, - "learning_rate": 2.5231776887875703e-08, - "loss": 1.3218, - "step": 2942 - }, - { - "epoch": 2.860817005395319, - "grad_norm": 0.400390625, - "learning_rate": 2.4870636651186388e-08, - "loss": 1.3012, - "step": 2943 - }, - { - "epoch": 2.861790596730356, - "grad_norm": 0.3984375, - "learning_rate": 2.451208664239757e-08, - "loss": 1.3256, - "step": 2944 - }, - { - "epoch": 2.862764188065393, - "grad_norm": 0.39453125, - "learning_rate": 2.4156127236732162e-08, - "loss": 1.3031, - "step": 2945 - }, - { - "epoch": 2.86373777940043, - "grad_norm": 0.390625, - "learning_rate": 2.3802758806702463e-08, - "loss": 1.3073, - "step": 2946 - }, - { - "epoch": 2.864711370735467, - "grad_norm": 0.39453125, - "learning_rate": 2.3451981722109608e-08, - "loss": 1.3062, - "step": 2947 - }, - { - "epoch": 2.865684962070504, - "grad_norm": 0.40625, - "learning_rate": 2.310379635004245e-08, - "loss": 1.3162, - "step": 2948 - }, - { - "epoch": 2.866658553405541, - "grad_norm": 0.390625, - "learning_rate": 2.2758203054877302e-08, - "loss": 1.338, - "step": 2949 - }, - { - "epoch": 2.8676321447405786, - "grad_norm": 0.404296875, - "learning_rate": 2.241520219827903e-08, - "loss": 1.3093, - "step": 2950 - }, - { - "epoch": 2.8686057360756156, - "grad_norm": 0.390625, - "learning_rate": 2.207479413919772e-08, - "loss": 1.3074, - "step": 2951 - }, - { - "epoch": 2.8695793274106527, - "grad_norm": 0.3984375, - "learning_rate": 2.1736979233872025e-08, - "loss": 1.3125, - "step": 2952 - }, - { - "epoch": 2.8705529187456897, - "grad_norm": 0.388671875, - "learning_rate": 2.1401757835824988e-08, - "loss": 1.3036, - "step": 2953 - }, - { - "epoch": 2.871526510080727, - "grad_norm": 0.40625, - "learning_rate": 2.1069130295867656e-08, - "loss": 1.3257, - "step": 2954 - }, - { - "epoch": 2.8725001014157643, - "grad_norm": 0.384765625, - "learning_rate": 2.073909696209436e-08, - "loss": 1.3061, - "step": 2955 - }, - { - "epoch": 2.8734736927508013, - "grad_norm": 0.392578125, - "learning_rate": 2.0411658179886053e-08, - "loss": 1.3274, - "step": 2956 - }, - { - "epoch": 2.8744472840858384, - "grad_norm": 0.392578125, - "learning_rate": 2.0086814291908064e-08, - "loss": 1.3454, - "step": 2957 - }, - { - "epoch": 2.8754208754208754, - "grad_norm": 0.3984375, - "learning_rate": 1.976456563811041e-08, - "loss": 1.3012, - "step": 2958 - }, - { - "epoch": 2.8763944667559125, - "grad_norm": 0.39453125, - "learning_rate": 1.9444912555726668e-08, - "loss": 1.3053, - "step": 2959 - }, - { - "epoch": 2.8773680580909495, - "grad_norm": 0.40234375, - "learning_rate": 1.9127855379274528e-08, - "loss": 1.2956, - "step": 2960 - }, - { - "epoch": 2.8783416494259866, - "grad_norm": 0.3828125, - "learning_rate": 1.8813394440554966e-08, - "loss": 1.292, - "step": 2961 - }, - { - "epoch": 2.8793152407610236, - "grad_norm": 0.390625, - "learning_rate": 1.850153006865224e-08, - "loss": 1.323, - "step": 2962 - }, - { - "epoch": 2.880288832096061, - "grad_norm": 0.390625, - "learning_rate": 1.8192262589932507e-08, - "loss": 1.3126, - "step": 2963 - }, - { - "epoch": 2.881262423431098, - "grad_norm": 0.3984375, - "learning_rate": 1.788559232804521e-08, - "loss": 1.3386, - "step": 2964 - }, - { - "epoch": 2.8822360147661352, - "grad_norm": 0.390625, - "learning_rate": 1.7581519603921406e-08, - "loss": 1.3285, - "step": 2965 - }, - { - "epoch": 2.8832096061011723, - "grad_norm": 0.400390625, - "learning_rate": 1.7280044735773493e-08, - "loss": 1.3347, - "step": 2966 - }, - { - "epoch": 2.8841831974362093, - "grad_norm": 0.40234375, - "learning_rate": 1.6981168039095774e-08, - "loss": 1.3068, - "step": 2967 - }, - { - "epoch": 2.885156788771247, - "grad_norm": 0.396484375, - "learning_rate": 1.668488982666361e-08, - "loss": 1.3398, - "step": 2968 - }, - { - "epoch": 2.886130380106284, - "grad_norm": 0.396484375, - "learning_rate": 1.6391210408532032e-08, - "loss": 1.3475, - "step": 2969 - }, - { - "epoch": 2.887103971441321, - "grad_norm": 0.388671875, - "learning_rate": 1.6100130092037704e-08, - "loss": 1.3005, - "step": 2970 - }, - { - "epoch": 2.888077562776358, - "grad_norm": 0.388671875, - "learning_rate": 1.5811649181796673e-08, - "loss": 1.306, - "step": 2971 - }, - { - "epoch": 2.889051154111395, - "grad_norm": 0.39453125, - "learning_rate": 1.5525767979704675e-08, - "loss": 1.3159, - "step": 2972 - }, - { - "epoch": 2.890024745446432, - "grad_norm": 0.404296875, - "learning_rate": 1.5242486784937115e-08, - "loss": 1.3084, - "step": 2973 - }, - { - "epoch": 2.890998336781469, - "grad_norm": 0.392578125, - "learning_rate": 1.4961805893948245e-08, - "loss": 1.2883, - "step": 2974 - }, - { - "epoch": 2.891971928116506, - "grad_norm": 0.40234375, - "learning_rate": 1.4683725600471155e-08, - "loss": 1.3183, - "step": 2975 - }, - { - "epoch": 2.8929455194515437, - "grad_norm": 0.392578125, - "learning_rate": 1.4408246195517506e-08, - "loss": 1.3028, - "step": 2976 - }, - { - "epoch": 2.8939191107865807, - "grad_norm": 0.404296875, - "learning_rate": 1.4135367967377244e-08, - "loss": 1.3117, - "step": 2977 - }, - { - "epoch": 2.8948927021216178, - "grad_norm": 0.392578125, - "learning_rate": 1.3865091201617498e-08, - "loss": 1.3144, - "step": 2978 - }, - { - "epoch": 2.895866293456655, - "grad_norm": 0.3984375, - "learning_rate": 1.3597416181083678e-08, - "loss": 1.3064, - "step": 2979 - }, - { - "epoch": 2.896839884791692, - "grad_norm": 0.390625, - "learning_rate": 1.3332343185898377e-08, - "loss": 1.3151, - "step": 2980 - }, - { - "epoch": 2.8978134761267293, - "grad_norm": 0.38671875, - "learning_rate": 1.3069872493461089e-08, - "loss": 1.2951, - "step": 2981 - }, - { - "epoch": 2.8987870674617664, - "grad_norm": 0.396484375, - "learning_rate": 1.2810004378447649e-08, - "loss": 1.3181, - "step": 2982 - }, - { - "epoch": 2.8997606587968034, - "grad_norm": 0.38671875, - "learning_rate": 1.255273911281052e-08, - "loss": 1.3188, - "step": 2983 - }, - { - "epoch": 2.9007342501318405, - "grad_norm": 0.40234375, - "learning_rate": 1.2298076965778782e-08, - "loss": 1.3106, - "step": 2984 - }, - { - "epoch": 2.9017078414668775, - "grad_norm": 0.40234375, - "learning_rate": 1.204601820385648e-08, - "loss": 1.3223, - "step": 2985 - }, - { - "epoch": 2.9026814328019146, - "grad_norm": 0.392578125, - "learning_rate": 1.1796563090823443e-08, - "loss": 1.2958, - "step": 2986 - }, - { - "epoch": 2.9036550241369516, - "grad_norm": 0.400390625, - "learning_rate": 1.154971188773557e-08, - "loss": 1.2908, - "step": 2987 - }, - { - "epoch": 2.9046286154719887, - "grad_norm": 0.388671875, - "learning_rate": 1.1305464852922887e-08, - "loss": 1.3144, - "step": 2988 - }, - { - "epoch": 2.905602206807026, - "grad_norm": 0.396484375, - "learning_rate": 1.1063822241990097e-08, - "loss": 1.3207, - "step": 2989 - }, - { - "epoch": 2.9065757981420632, - "grad_norm": 0.39453125, - "learning_rate": 1.0824784307817138e-08, - "loss": 1.3336, - "step": 2990 - }, - { - "epoch": 2.9075493894771003, - "grad_norm": 0.400390625, - "learning_rate": 1.058835130055752e-08, - "loss": 1.2953, - "step": 2991 - }, - { - "epoch": 2.9085229808121373, - "grad_norm": 0.392578125, - "learning_rate": 1.03545234676386e-08, - "loss": 1.3062, - "step": 2992 - }, - { - "epoch": 2.909496572147175, - "grad_norm": 0.40234375, - "learning_rate": 1.0123301053762136e-08, - "loss": 1.3272, - "step": 2993 - }, - { - "epoch": 2.910470163482212, - "grad_norm": 0.390625, - "learning_rate": 9.894684300902347e-09, - "loss": 1.3112, - "step": 2994 - }, - { - "epoch": 2.911443754817249, - "grad_norm": 0.404296875, - "learning_rate": 9.668673448307575e-09, - "loss": 1.3279, - "step": 2995 - }, - { - "epoch": 2.912417346152286, - "grad_norm": 0.39453125, - "learning_rate": 9.445268732498625e-09, - "loss": 1.3128, - "step": 2996 - }, - { - "epoch": 2.913390937487323, - "grad_norm": 0.396484375, - "learning_rate": 9.224470387268759e-09, - "loss": 1.3207, - "step": 2997 - }, - { - "epoch": 2.91436452882236, - "grad_norm": 0.400390625, - "learning_rate": 9.006278643683697e-09, - "loss": 1.3169, - "step": 2998 - }, - { - "epoch": 2.915338120157397, - "grad_norm": 0.40234375, - "learning_rate": 8.790693730082179e-09, - "loss": 1.3137, - "step": 2999 - }, - { - "epoch": 2.916311711492434, - "grad_norm": 0.39453125, - "learning_rate": 8.577715872073734e-09, - "loss": 1.3052, - "step": 3000 - }, - { - "epoch": 2.9172853028274712, - "grad_norm": 0.392578125, - "learning_rate": 8.367345292540074e-09, - "loss": 1.3251, - "step": 3001 - }, - { - "epoch": 2.9182588941625087, - "grad_norm": 0.392578125, - "learning_rate": 8.15958221163482e-09, - "loss": 1.3077, - "step": 3002 - }, - { - "epoch": 2.9192324854975458, - "grad_norm": 0.392578125, - "learning_rate": 7.95442684678238e-09, - "loss": 1.3223, - "step": 3003 - }, - { - "epoch": 2.920206076832583, - "grad_norm": 0.39453125, - "learning_rate": 7.751879412677966e-09, - "loss": 1.3218, - "step": 3004 - }, - { - "epoch": 2.92117966816762, - "grad_norm": 0.39453125, - "learning_rate": 7.551940121288126e-09, - "loss": 1.2997, - "step": 3005 - }, - { - "epoch": 2.9221532595026574, - "grad_norm": 0.390625, - "learning_rate": 7.354609181849659e-09, - "loss": 1.3248, - "step": 3006 - }, - { - "epoch": 2.9231268508376944, - "grad_norm": 0.3984375, - "learning_rate": 7.159886800869875e-09, - "loss": 1.3029, - "step": 3007 - }, - { - "epoch": 2.9241004421727315, - "grad_norm": 0.396484375, - "learning_rate": 6.967773182126048e-09, - "loss": 1.3156, - "step": 3008 - }, - { - "epoch": 2.9250740335077685, - "grad_norm": 0.39453125, - "learning_rate": 6.77826852666541e-09, - "loss": 1.32, - "step": 3009 - }, - { - "epoch": 2.9260476248428056, - "grad_norm": 0.390625, - "learning_rate": 6.591373032805437e-09, - "loss": 1.2957, - "step": 3010 - }, - { - "epoch": 2.9270212161778426, - "grad_norm": 0.390625, - "learning_rate": 6.407086896131898e-09, - "loss": 1.3003, - "step": 3011 - }, - { - "epoch": 2.9279948075128797, - "grad_norm": 0.392578125, - "learning_rate": 6.225410309501079e-09, - "loss": 1.3187, - "step": 3012 - }, - { - "epoch": 2.9289683988479167, - "grad_norm": 0.3984375, - "learning_rate": 6.046343463037563e-09, - "loss": 1.3073, - "step": 3013 - }, - { - "epoch": 2.929941990182954, - "grad_norm": 0.390625, - "learning_rate": 5.869886544135617e-09, - "loss": 1.3149, - "step": 3014 - }, - { - "epoch": 2.9309155815179913, - "grad_norm": 0.388671875, - "learning_rate": 5.696039737457526e-09, - "loss": 1.3011, - "step": 3015 - }, - { - "epoch": 2.9318891728530283, - "grad_norm": 0.38671875, - "learning_rate": 5.524803224934427e-09, - "loss": 1.3196, - "step": 3016 - }, - { - "epoch": 2.9328627641880654, - "grad_norm": 0.3984375, - "learning_rate": 5.356177185765477e-09, - "loss": 1.3142, - "step": 3017 - }, - { - "epoch": 2.9338363555231024, - "grad_norm": 0.3984375, - "learning_rate": 5.190161796418125e-09, - "loss": 1.3129, - "step": 3018 - }, - { - "epoch": 2.93480994685814, - "grad_norm": 0.390625, - "learning_rate": 5.026757230628121e-09, - "loss": 1.3309, - "step": 3019 - }, - { - "epoch": 2.935783538193177, - "grad_norm": 0.404296875, - "learning_rate": 4.865963659398676e-09, - "loss": 1.3174, - "step": 3020 - }, - { - "epoch": 2.936757129528214, - "grad_norm": 0.40625, - "learning_rate": 4.7077812510001875e-09, - "loss": 1.317, - "step": 3021 - }, - { - "epoch": 2.937730720863251, - "grad_norm": 0.40234375, - "learning_rate": 4.5522101709713496e-09, - "loss": 1.3266, - "step": 3022 - }, - { - "epoch": 2.938704312198288, - "grad_norm": 0.400390625, - "learning_rate": 4.3992505821172095e-09, - "loss": 1.3269, - "step": 3023 - }, - { - "epoch": 2.939677903533325, - "grad_norm": 0.392578125, - "learning_rate": 4.248902644510555e-09, - "loss": 1.31, - "step": 3024 - }, - { - "epoch": 2.940651494868362, - "grad_norm": 0.40625, - "learning_rate": 4.1011665154905285e-09, - "loss": 1.3415, - "step": 3025 - }, - { - "epoch": 2.9416250862033992, - "grad_norm": 0.392578125, - "learning_rate": 3.956042349663736e-09, - "loss": 1.3199, - "step": 3026 - }, - { - "epoch": 2.9425986775384367, - "grad_norm": 0.392578125, - "learning_rate": 3.813530298902579e-09, - "loss": 1.3236, - "step": 3027 - }, - { - "epoch": 2.943572268873474, - "grad_norm": 0.39453125, - "learning_rate": 3.6736305123460938e-09, - "loss": 1.315, - "step": 3028 - }, - { - "epoch": 2.944545860208511, - "grad_norm": 0.40234375, - "learning_rate": 3.5363431363999447e-09, - "loss": 1.3063, - "step": 3029 - }, - { - "epoch": 2.945519451543548, - "grad_norm": 0.392578125, - "learning_rate": 3.4016683147355954e-09, - "loss": 1.2863, - "step": 3030 - }, - { - "epoch": 2.946493042878585, - "grad_norm": 0.392578125, - "learning_rate": 3.2696061882905862e-09, - "loss": 1.3141, - "step": 3031 - }, - { - "epoch": 2.9474666342136224, - "grad_norm": 0.388671875, - "learning_rate": 3.1401568952679763e-09, - "loss": 1.3131, - "step": 3032 - }, - { - "epoch": 2.9484402255486595, - "grad_norm": 0.396484375, - "learning_rate": 3.013320571136902e-09, - "loss": 1.3477, - "step": 3033 - }, - { - "epoch": 2.9494138168836965, - "grad_norm": 0.388671875, - "learning_rate": 2.8890973486320193e-09, - "loss": 1.2948, - "step": 3034 - }, - { - "epoch": 2.9503874082187336, - "grad_norm": 0.392578125, - "learning_rate": 2.767487357752674e-09, - "loss": 1.319, - "step": 3035 - }, - { - "epoch": 2.9513609995537706, - "grad_norm": 0.40234375, - "learning_rate": 2.6484907257642856e-09, - "loss": 1.2956, - "step": 3036 - }, - { - "epoch": 2.9523345908888077, - "grad_norm": 0.3984375, - "learning_rate": 2.5321075771969627e-09, - "loss": 1.3126, - "step": 3037 - }, - { - "epoch": 2.9533081822238447, - "grad_norm": 0.3984375, - "learning_rate": 2.418338033846057e-09, - "loss": 1.2997, - "step": 3038 - }, - { - "epoch": 2.9542817735588818, - "grad_norm": 0.396484375, - "learning_rate": 2.3071822147716083e-09, - "loss": 1.3128, - "step": 3039 - }, - { - "epoch": 2.9552553648939193, - "grad_norm": 0.390625, - "learning_rate": 2.198640236298344e-09, - "loss": 1.3263, - "step": 3040 - }, - { - "epoch": 2.9562289562289563, - "grad_norm": 0.40625, - "learning_rate": 2.092712212015402e-09, - "loss": 1.2956, - "step": 3041 - }, - { - "epoch": 2.9572025475639934, - "grad_norm": 0.400390625, - "learning_rate": 1.989398252777164e-09, - "loss": 1.3155, - "step": 3042 - }, - { - "epoch": 2.9581761388990304, - "grad_norm": 0.412109375, - "learning_rate": 1.8886984667015883e-09, - "loss": 1.3278, - "step": 3043 - }, - { - "epoch": 2.9591497302340675, - "grad_norm": 0.396484375, - "learning_rate": 1.7906129591713228e-09, - "loss": 1.3078, - "step": 3044 - }, - { - "epoch": 2.960123321569105, - "grad_norm": 0.400390625, - "learning_rate": 1.695141832833147e-09, - "loss": 1.3291, - "step": 3045 - }, - { - "epoch": 2.961096912904142, - "grad_norm": 0.390625, - "learning_rate": 1.6022851875974189e-09, - "loss": 1.338, - "step": 3046 - }, - { - "epoch": 2.962070504239179, - "grad_norm": 0.40234375, - "learning_rate": 1.5120431206391839e-09, - "loss": 1.3142, - "step": 3047 - }, - { - "epoch": 2.963044095574216, - "grad_norm": 0.3984375, - "learning_rate": 1.4244157263967884e-09, - "loss": 1.3227, - "step": 3048 - }, - { - "epoch": 2.964017686909253, - "grad_norm": 0.40234375, - "learning_rate": 1.3394030965724335e-09, - "loss": 1.2864, - "step": 3049 - }, - { - "epoch": 2.96499127824429, - "grad_norm": 0.39453125, - "learning_rate": 1.2570053201318987e-09, - "loss": 1.3075, - "step": 3050 - }, - { - "epoch": 2.9659648695793273, - "grad_norm": 0.40234375, - "learning_rate": 1.177222483305096e-09, - "loss": 1.3226, - "step": 3051 - }, - { - "epoch": 2.9669384609143643, - "grad_norm": 0.400390625, - "learning_rate": 1.100054669584405e-09, - "loss": 1.3121, - "step": 3052 - }, - { - "epoch": 2.967912052249402, - "grad_norm": 0.396484375, - "learning_rate": 1.0255019597266158e-09, - "loss": 1.3362, - "step": 3053 - }, - { - "epoch": 2.968885643584439, - "grad_norm": 0.40234375, - "learning_rate": 9.535644317507086e-10, - "loss": 1.3209, - "step": 3054 - }, - { - "epoch": 2.969859234919476, - "grad_norm": 0.404296875, - "learning_rate": 8.842421609397966e-10, - "loss": 1.3304, - "step": 3055 - }, - { - "epoch": 2.970832826254513, - "grad_norm": 0.40234375, - "learning_rate": 8.17535219839738e-10, - "loss": 1.3378, - "step": 3056 - }, - { - "epoch": 2.97180641758955, - "grad_norm": 0.40234375, - "learning_rate": 7.534436782594135e-10, - "loss": 1.3223, - "step": 3057 - }, - { - "epoch": 2.9727800089245875, - "grad_norm": 0.40625, - "learning_rate": 6.919676032710044e-10, - "loss": 1.327, - "step": 3058 - }, - { - "epoch": 2.9737536002596245, - "grad_norm": 0.3984375, - "learning_rate": 6.331070592088817e-10, - "loss": 1.3237, - "step": 3059 - }, - { - "epoch": 2.9747271915946616, - "grad_norm": 0.40234375, - "learning_rate": 5.768621076712721e-10, - "loss": 1.3338, - "step": 3060 - }, - { - "epoch": 2.9757007829296986, - "grad_norm": 0.39453125, - "learning_rate": 5.232328075180371e-10, - "loss": 1.3052, - "step": 3061 - }, - { - "epoch": 2.9766743742647357, - "grad_norm": 0.39453125, - "learning_rate": 4.722192148728932e-10, - "loss": 1.3246, - "step": 3062 - }, - { - "epoch": 2.9776479655997727, - "grad_norm": 0.40234375, - "learning_rate": 4.2382138312119237e-10, - "loss": 1.32, - "step": 3063 - }, - { - "epoch": 2.97862155693481, - "grad_norm": 0.396484375, - "learning_rate": 3.7803936291186395e-10, - "loss": 1.3072, - "step": 3064 - }, - { - "epoch": 2.979595148269847, - "grad_norm": 0.41015625, - "learning_rate": 3.3487320215547235e-10, - "loss": 1.3114, - "step": 3065 - }, - { - "epoch": 2.9805687396048843, - "grad_norm": 0.392578125, - "learning_rate": 2.9432294602560476e-10, - "loss": 1.313, - "step": 3066 - }, - { - "epoch": 2.9815423309399214, - "grad_norm": 0.396484375, - "learning_rate": 2.563886369583157e-10, - "loss": 1.3014, - "step": 3067 - }, - { - "epoch": 2.9825159222749584, - "grad_norm": 0.41015625, - "learning_rate": 2.2107031465185002e-10, - "loss": 1.3306, - "step": 3068 - }, - { - "epoch": 2.9834895136099955, - "grad_norm": 0.40234375, - "learning_rate": 1.8836801606664233e-10, - "loss": 1.3305, - "step": 3069 - }, - { - "epoch": 2.9844631049450325, - "grad_norm": 0.40625, - "learning_rate": 1.5828177542642765e-10, - "loss": 1.318, - "step": 3070 - }, - { - "epoch": 2.98543669628007, - "grad_norm": 0.400390625, - "learning_rate": 1.3081162421574312e-10, - "loss": 1.3064, - "step": 3071 - }, - { - "epoch": 2.986410287615107, - "grad_norm": 0.39453125, - "learning_rate": 1.059575911824262e-10, - "loss": 1.2982, - "step": 3072 - }, - { - "epoch": 2.987383878950144, - "grad_norm": 0.400390625, - "learning_rate": 8.37197023365044e-11, - "loss": 1.3013, - "step": 3073 - }, - { - "epoch": 2.988357470285181, - "grad_norm": 0.400390625, - "learning_rate": 6.40979809499176e-11, - "loss": 1.3008, - "step": 3074 - }, - { - "epoch": 2.989331061620218, - "grad_norm": 0.388671875, - "learning_rate": 4.7092447556518204e-11, - "loss": 1.3004, - "step": 3075 - }, - { - "epoch": 2.9903046529552553, - "grad_norm": 0.390625, - "learning_rate": 3.270311995262621e-11, - "loss": 1.3092, - "step": 3076 - }, - { - "epoch": 2.9912782442902923, - "grad_norm": 0.388671875, - "learning_rate": 2.0930013197306697e-11, - "loss": 1.297, - "step": 3077 - }, - { - "epoch": 2.9922518356253294, - "grad_norm": 0.396484375, - "learning_rate": 1.1773139610427031e-11, - "loss": 1.3032, - "step": 3078 - }, - { - "epoch": 2.993225426960367, - "grad_norm": 0.388671875, - "learning_rate": 5.232508775154799e-12, - "loss": 1.3081, - "step": 3079 - }, - { - "epoch": 2.994199018295404, - "grad_norm": 0.40234375, - "learning_rate": 1.308127536014947e-12, - "loss": 1.3196, - "step": 3080 - }, - { - "epoch": 2.995172609630441, - "grad_norm": 0.39453125, - "learning_rate": 0.0, - "loss": 1.3089, - "step": 3081 - } - ], - "logging_steps": 1, - "max_steps": 3081, - "num_input_tokens_seen": 0, - "num_train_epochs": 3, - "save_steps": 1027, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 2.829516122582994e+19, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -}