{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21271221366941864, "eval_steps": 1000, "global_step": 12000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.7726017805784885e-05, "grad_norm": 636.0, "learning_rate": 2.9411764705882356e-07, "loss": 14.7402, "step": 1 }, { "epoch": 3.545203561156977e-05, "grad_norm": 620.0, "learning_rate": 5.882352941176471e-07, "loss": 13.8226, "step": 2 }, { "epoch": 7.090407122313954e-05, "grad_norm": 648.0, "learning_rate": 1.1764705882352942e-06, "loss": 14.3719, "step": 4 }, { "epoch": 0.00010635610683470932, "grad_norm": 556.0, "learning_rate": 1.7647058823529412e-06, "loss": 13.7975, "step": 6 }, { "epoch": 0.00014180814244627908, "grad_norm": 672.0, "learning_rate": 2.3529411764705885e-06, "loss": 14.2378, "step": 8 }, { "epoch": 0.00017726017805784887, "grad_norm": 552.0, "learning_rate": 2.9411764705882355e-06, "loss": 13.8208, "step": 10 }, { "epoch": 0.00021271221366941863, "grad_norm": 492.0, "learning_rate": 3.5294117647058825e-06, "loss": 12.2904, "step": 12 }, { "epoch": 0.0002481642492809884, "grad_norm": 450.0, "learning_rate": 4.11764705882353e-06, "loss": 12.2658, "step": 14 }, { "epoch": 0.00028361628489255816, "grad_norm": 338.0, "learning_rate": 4.705882352941177e-06, "loss": 10.4614, "step": 16 }, { "epoch": 0.00031906832050412795, "grad_norm": 209.0, "learning_rate": 5.294117647058824e-06, "loss": 8.4051, "step": 18 }, { "epoch": 0.00035452035611569774, "grad_norm": 125.5, "learning_rate": 5.882352941176471e-06, "loss": 7.5553, "step": 20 }, { "epoch": 0.00038997239172726747, "grad_norm": 90.0, "learning_rate": 6.470588235294119e-06, "loss": 7.013, "step": 22 }, { "epoch": 0.00042542442733883726, "grad_norm": 81.0, "learning_rate": 7.058823529411765e-06, "loss": 6.7415, "step": 24 }, { "epoch": 0.00046087646295040705, "grad_norm": 103.0, "learning_rate": 7.647058823529413e-06, "loss": 6.0122, "step": 26 }, { "epoch": 0.0004963284985619768, "grad_norm": 108.0, "learning_rate": 8.23529411764706e-06, "loss": 5.1164, "step": 28 }, { "epoch": 0.0005317805341735466, "grad_norm": 68.5, "learning_rate": 8.823529411764707e-06, "loss": 4.0865, "step": 30 }, { "epoch": 0.0005672325697851163, "grad_norm": 70.0, "learning_rate": 9.411764705882354e-06, "loss": 3.4705, "step": 32 }, { "epoch": 0.0006026846053966862, "grad_norm": 56.0, "learning_rate": 1e-05, "loss": 2.6308, "step": 34 }, { "epoch": 0.0006381366410082559, "grad_norm": 19.5, "learning_rate": 1.0588235294117648e-05, "loss": 2.1415, "step": 36 }, { "epoch": 0.0006735886766198256, "grad_norm": 13.4375, "learning_rate": 1.1176470588235295e-05, "loss": 1.9987, "step": 38 }, { "epoch": 0.0007090407122313955, "grad_norm": 13.625, "learning_rate": 1.1764705882352942e-05, "loss": 1.8561, "step": 40 }, { "epoch": 0.0007444927478429652, "grad_norm": 10.875, "learning_rate": 1.2352941176470589e-05, "loss": 1.8186, "step": 42 }, { "epoch": 0.0007799447834545349, "grad_norm": 6.8125, "learning_rate": 1.2941176470588238e-05, "loss": 1.7927, "step": 44 }, { "epoch": 0.0008153968190661048, "grad_norm": 8.4375, "learning_rate": 1.3529411764705883e-05, "loss": 1.6379, "step": 46 }, { "epoch": 0.0008508488546776745, "grad_norm": 5.5, "learning_rate": 1.411764705882353e-05, "loss": 1.5964, "step": 48 }, { "epoch": 0.0008863008902892443, "grad_norm": 5.3125, "learning_rate": 1.4705882352941177e-05, "loss": 1.4906, "step": 50 }, { "epoch": 0.0009217529259008141, "grad_norm": 5.59375, "learning_rate": 1.5294117647058826e-05, "loss": 1.4529, "step": 52 }, { "epoch": 0.0009572049615123838, "grad_norm": 4.46875, "learning_rate": 1.588235294117647e-05, "loss": 1.4715, "step": 54 }, { "epoch": 0.0009926569971239537, "grad_norm": 4.46875, "learning_rate": 1.647058823529412e-05, "loss": 1.3749, "step": 56 }, { "epoch": 0.0010281090327355234, "grad_norm": 3.0625, "learning_rate": 1.7058823529411767e-05, "loss": 1.3041, "step": 58 }, { "epoch": 0.0010635610683470932, "grad_norm": 3.015625, "learning_rate": 1.7647058823529414e-05, "loss": 1.2544, "step": 60 }, { "epoch": 0.0010990131039586629, "grad_norm": 3.40625, "learning_rate": 1.8235294117647057e-05, "loss": 1.3137, "step": 62 }, { "epoch": 0.0011344651395702326, "grad_norm": 3.15625, "learning_rate": 1.8823529411764708e-05, "loss": 1.2781, "step": 64 }, { "epoch": 0.0011699171751818026, "grad_norm": 3.40625, "learning_rate": 1.9411764705882355e-05, "loss": 1.2188, "step": 66 }, { "epoch": 0.0012053692107933723, "grad_norm": 3.984375, "learning_rate": 2e-05, "loss": 1.2637, "step": 68 }, { "epoch": 0.001240821246404942, "grad_norm": 3.625, "learning_rate": 2.058823529411765e-05, "loss": 1.2316, "step": 70 }, { "epoch": 0.0012762732820165118, "grad_norm": 4.0625, "learning_rate": 2.1176470588235296e-05, "loss": 1.1779, "step": 72 }, { "epoch": 0.0013117253176280815, "grad_norm": 3.28125, "learning_rate": 2.1764705882352943e-05, "loss": 1.214, "step": 74 }, { "epoch": 0.0013471773532396513, "grad_norm": 3.4375, "learning_rate": 2.235294117647059e-05, "loss": 1.3211, "step": 76 }, { "epoch": 0.0013826293888512212, "grad_norm": 3.765625, "learning_rate": 2.2941176470588237e-05, "loss": 1.1991, "step": 78 }, { "epoch": 0.001418081424462791, "grad_norm": 3.15625, "learning_rate": 2.3529411764705884e-05, "loss": 1.234, "step": 80 }, { "epoch": 0.0014535334600743607, "grad_norm": 3.09375, "learning_rate": 2.411764705882353e-05, "loss": 1.2325, "step": 82 }, { "epoch": 0.0014889854956859304, "grad_norm": 3.796875, "learning_rate": 2.4705882352941178e-05, "loss": 1.2462, "step": 84 }, { "epoch": 0.0015244375312975001, "grad_norm": 3.21875, "learning_rate": 2.5294117647058825e-05, "loss": 1.1721, "step": 86 }, { "epoch": 0.0015598895669090699, "grad_norm": 5.9375, "learning_rate": 2.5882352941176475e-05, "loss": 1.2239, "step": 88 }, { "epoch": 0.0015953416025206398, "grad_norm": 3.3125, "learning_rate": 2.647058823529412e-05, "loss": 1.1905, "step": 90 }, { "epoch": 0.0016307936381322096, "grad_norm": 2.953125, "learning_rate": 2.7058823529411766e-05, "loss": 1.1956, "step": 92 }, { "epoch": 0.0016662456737437793, "grad_norm": 3.71875, "learning_rate": 2.7647058823529416e-05, "loss": 1.1939, "step": 94 }, { "epoch": 0.001701697709355349, "grad_norm": 3.625, "learning_rate": 2.823529411764706e-05, "loss": 1.1435, "step": 96 }, { "epoch": 0.0017371497449669188, "grad_norm": 4.4375, "learning_rate": 2.8823529411764703e-05, "loss": 1.1985, "step": 98 }, { "epoch": 0.0017726017805784885, "grad_norm": 3.890625, "learning_rate": 2.9411764705882354e-05, "loss": 1.1163, "step": 100 }, { "epoch": 0.0018080538161900585, "grad_norm": 3.921875, "learning_rate": 3e-05, "loss": 1.1596, "step": 102 }, { "epoch": 0.0018435058518016282, "grad_norm": 3.828125, "learning_rate": 3.058823529411765e-05, "loss": 1.2044, "step": 104 }, { "epoch": 0.001878957887413198, "grad_norm": 3.390625, "learning_rate": 3.11764705882353e-05, "loss": 1.177, "step": 106 }, { "epoch": 0.0019144099230247677, "grad_norm": 4.40625, "learning_rate": 3.176470588235294e-05, "loss": 1.099, "step": 108 }, { "epoch": 0.0019498619586363374, "grad_norm": 3.703125, "learning_rate": 3.235294117647059e-05, "loss": 1.0945, "step": 110 }, { "epoch": 0.0019853139942479074, "grad_norm": 3.84375, "learning_rate": 3.294117647058824e-05, "loss": 1.1515, "step": 112 }, { "epoch": 0.002020766029859477, "grad_norm": 4.0, "learning_rate": 3.352941176470588e-05, "loss": 1.1436, "step": 114 }, { "epoch": 0.002056218065471047, "grad_norm": 4.5, "learning_rate": 3.411764705882353e-05, "loss": 1.1481, "step": 116 }, { "epoch": 0.0020916701010826166, "grad_norm": 3.6875, "learning_rate": 3.470588235294118e-05, "loss": 1.1345, "step": 118 }, { "epoch": 0.0021271221366941863, "grad_norm": 4.75, "learning_rate": 3.529411764705883e-05, "loss": 1.1136, "step": 120 }, { "epoch": 0.002162574172305756, "grad_norm": 3.203125, "learning_rate": 3.5882352941176474e-05, "loss": 1.1009, "step": 122 }, { "epoch": 0.0021980262079173258, "grad_norm": 3.640625, "learning_rate": 3.6470588235294114e-05, "loss": 1.1125, "step": 124 }, { "epoch": 0.0022334782435288955, "grad_norm": 3.5625, "learning_rate": 3.705882352941177e-05, "loss": 1.1209, "step": 126 }, { "epoch": 0.0022689302791404652, "grad_norm": 4.5, "learning_rate": 3.7647058823529415e-05, "loss": 1.1285, "step": 128 }, { "epoch": 0.002304382314752035, "grad_norm": 4.0, "learning_rate": 3.8235294117647055e-05, "loss": 1.1407, "step": 130 }, { "epoch": 0.002339834350363605, "grad_norm": 3.78125, "learning_rate": 3.882352941176471e-05, "loss": 1.1664, "step": 132 }, { "epoch": 0.002375286385975175, "grad_norm": 3.53125, "learning_rate": 3.9411764705882356e-05, "loss": 1.1043, "step": 134 }, { "epoch": 0.0024107384215867446, "grad_norm": 3.71875, "learning_rate": 4e-05, "loss": 1.1204, "step": 136 }, { "epoch": 0.0024461904571983144, "grad_norm": 3.546875, "learning_rate": 4.058823529411765e-05, "loss": 1.0994, "step": 138 }, { "epoch": 0.002481642492809884, "grad_norm": 3.640625, "learning_rate": 4.11764705882353e-05, "loss": 1.0434, "step": 140 }, { "epoch": 0.002517094528421454, "grad_norm": 3.515625, "learning_rate": 4.1764705882352944e-05, "loss": 1.0621, "step": 142 }, { "epoch": 0.0025525465640330236, "grad_norm": 3.6875, "learning_rate": 4.235294117647059e-05, "loss": 1.1062, "step": 144 }, { "epoch": 0.0025879985996445933, "grad_norm": 3.40625, "learning_rate": 4.294117647058823e-05, "loss": 1.0814, "step": 146 }, { "epoch": 0.002623450635256163, "grad_norm": 3.203125, "learning_rate": 4.3529411764705885e-05, "loss": 1.0785, "step": 148 }, { "epoch": 0.0026589026708677328, "grad_norm": 3.484375, "learning_rate": 4.411764705882353e-05, "loss": 1.0745, "step": 150 }, { "epoch": 0.0026943547064793025, "grad_norm": 3.796875, "learning_rate": 4.470588235294118e-05, "loss": 1.1046, "step": 152 }, { "epoch": 0.0027298067420908722, "grad_norm": 3.734375, "learning_rate": 4.5294117647058826e-05, "loss": 1.0708, "step": 154 }, { "epoch": 0.0027652587777024424, "grad_norm": 3.84375, "learning_rate": 4.588235294117647e-05, "loss": 1.1357, "step": 156 }, { "epoch": 0.002800710813314012, "grad_norm": 3.859375, "learning_rate": 4.647058823529412e-05, "loss": 1.1327, "step": 158 }, { "epoch": 0.002836162848925582, "grad_norm": 3.6875, "learning_rate": 4.705882352941177e-05, "loss": 1.06, "step": 160 }, { "epoch": 0.0028716148845371516, "grad_norm": 3.640625, "learning_rate": 4.7647058823529414e-05, "loss": 1.0476, "step": 162 }, { "epoch": 0.0029070669201487214, "grad_norm": 5.15625, "learning_rate": 4.823529411764706e-05, "loss": 1.0745, "step": 164 }, { "epoch": 0.002942518955760291, "grad_norm": 3.171875, "learning_rate": 4.882352941176471e-05, "loss": 1.0779, "step": 166 }, { "epoch": 0.002977970991371861, "grad_norm": 3.90625, "learning_rate": 4.9411764705882355e-05, "loss": 1.1239, "step": 168 }, { "epoch": 0.0030134230269834306, "grad_norm": 3.59375, "learning_rate": 5e-05, "loss": 1.0765, "step": 170 }, { "epoch": 0.0030488750625950003, "grad_norm": 5.0625, "learning_rate": 4.999999984400261e-05, "loss": 1.0344, "step": 172 }, { "epoch": 0.00308432709820657, "grad_norm": 3.8125, "learning_rate": 4.999999937601042e-05, "loss": 1.1249, "step": 174 }, { "epoch": 0.0031197791338181398, "grad_norm": 3.9375, "learning_rate": 4.9999998596023444e-05, "loss": 1.0789, "step": 176 }, { "epoch": 0.0031552311694297095, "grad_norm": 3.46875, "learning_rate": 4.9999997504041694e-05, "loss": 1.0495, "step": 178 }, { "epoch": 0.0031906832050412797, "grad_norm": 4.28125, "learning_rate": 4.999999610006519e-05, "loss": 1.0348, "step": 180 }, { "epoch": 0.0032261352406528494, "grad_norm": 3.640625, "learning_rate": 4.999999438409393e-05, "loss": 1.0471, "step": 182 }, { "epoch": 0.003261587276264419, "grad_norm": 3.546875, "learning_rate": 4.9999992356127956e-05, "loss": 1.0496, "step": 184 }, { "epoch": 0.003297039311875989, "grad_norm": 4.125, "learning_rate": 4.9999990016167286e-05, "loss": 1.0611, "step": 186 }, { "epoch": 0.0033324913474875586, "grad_norm": 3.328125, "learning_rate": 4.999998736421194e-05, "loss": 1.0915, "step": 188 }, { "epoch": 0.0033679433830991284, "grad_norm": 3.78125, "learning_rate": 4.999998440026197e-05, "loss": 1.0573, "step": 190 }, { "epoch": 0.003403395418710698, "grad_norm": 3.9375, "learning_rate": 4.999998112431738e-05, "loss": 1.0416, "step": 192 }, { "epoch": 0.003438847454322268, "grad_norm": 3.703125, "learning_rate": 4.999997753637825e-05, "loss": 1.0162, "step": 194 }, { "epoch": 0.0034742994899338376, "grad_norm": 3.046875, "learning_rate": 4.999997363644461e-05, "loss": 1.004, "step": 196 }, { "epoch": 0.0035097515255454073, "grad_norm": 4.65625, "learning_rate": 4.99999694245165e-05, "loss": 1.0658, "step": 198 }, { "epoch": 0.003545203561156977, "grad_norm": 3.65625, "learning_rate": 4.9999964900593975e-05, "loss": 0.9414, "step": 200 }, { "epoch": 0.0035806555967685468, "grad_norm": 3.8125, "learning_rate": 4.9999960064677104e-05, "loss": 1.0537, "step": 202 }, { "epoch": 0.003616107632380117, "grad_norm": 4.8125, "learning_rate": 4.9999954916765934e-05, "loss": 1.0669, "step": 204 }, { "epoch": 0.0036515596679916867, "grad_norm": 3.703125, "learning_rate": 4.999994945686053e-05, "loss": 0.9318, "step": 206 }, { "epoch": 0.0036870117036032564, "grad_norm": 3.71875, "learning_rate": 4.999994368496097e-05, "loss": 1.0402, "step": 208 }, { "epoch": 0.003722463739214826, "grad_norm": 4.40625, "learning_rate": 4.999993760106732e-05, "loss": 1.0629, "step": 210 }, { "epoch": 0.003757915774826396, "grad_norm": 3.796875, "learning_rate": 4.999993120517965e-05, "loss": 1.0532, "step": 212 }, { "epoch": 0.0037933678104379656, "grad_norm": 3.3125, "learning_rate": 4.999992449729806e-05, "loss": 1.0498, "step": 214 }, { "epoch": 0.0038288198460495354, "grad_norm": 3.28125, "learning_rate": 4.9999917477422594e-05, "loss": 0.9999, "step": 216 }, { "epoch": 0.003864271881661105, "grad_norm": 3.859375, "learning_rate": 4.9999910145553386e-05, "loss": 1.0786, "step": 218 }, { "epoch": 0.003899723917272675, "grad_norm": 3.46875, "learning_rate": 4.99999025016905e-05, "loss": 1.0175, "step": 220 }, { "epoch": 0.0039351759528842446, "grad_norm": 3.640625, "learning_rate": 4.9999894545834034e-05, "loss": 1.0597, "step": 222 }, { "epoch": 0.003970627988495815, "grad_norm": 3.8125, "learning_rate": 4.99998862779841e-05, "loss": 0.9901, "step": 224 }, { "epoch": 0.004006080024107384, "grad_norm": 3.625, "learning_rate": 4.9999877698140783e-05, "loss": 1.053, "step": 226 }, { "epoch": 0.004041532059718954, "grad_norm": 3.640625, "learning_rate": 4.999986880630421e-05, "loss": 1.0277, "step": 228 }, { "epoch": 0.0040769840953305235, "grad_norm": 3.140625, "learning_rate": 4.9999859602474474e-05, "loss": 0.9884, "step": 230 }, { "epoch": 0.004112436130942094, "grad_norm": 3.375, "learning_rate": 4.999985008665169e-05, "loss": 0.9866, "step": 232 }, { "epoch": 0.004147888166553663, "grad_norm": 3.234375, "learning_rate": 4.9999840258835994e-05, "loss": 0.976, "step": 234 }, { "epoch": 0.004183340202165233, "grad_norm": 3.328125, "learning_rate": 4.99998301190275e-05, "loss": 1.0003, "step": 236 }, { "epoch": 0.0042187922377768024, "grad_norm": 3.40625, "learning_rate": 4.999981966722633e-05, "loss": 1.0255, "step": 238 }, { "epoch": 0.004254244273388373, "grad_norm": 3.140625, "learning_rate": 4.999980890343262e-05, "loss": 1.0533, "step": 240 }, { "epoch": 0.004289696308999943, "grad_norm": 3.15625, "learning_rate": 4.99997978276465e-05, "loss": 1.0061, "step": 242 }, { "epoch": 0.004325148344611512, "grad_norm": 3.625, "learning_rate": 4.999978643986811e-05, "loss": 1.0097, "step": 244 }, { "epoch": 0.004360600380223082, "grad_norm": 3.4375, "learning_rate": 4.9999774740097597e-05, "loss": 0.996, "step": 246 }, { "epoch": 0.0043960524158346516, "grad_norm": 3.6875, "learning_rate": 4.9999762728335094e-05, "loss": 0.9974, "step": 248 }, { "epoch": 0.004431504451446222, "grad_norm": 3.1875, "learning_rate": 4.999975040458076e-05, "loss": 0.9845, "step": 250 }, { "epoch": 0.004466956487057791, "grad_norm": 3.46875, "learning_rate": 4.999973776883475e-05, "loss": 1.0087, "step": 252 }, { "epoch": 0.004502408522669361, "grad_norm": 4.1875, "learning_rate": 4.9999724821097226e-05, "loss": 1.0533, "step": 254 }, { "epoch": 0.0045378605582809305, "grad_norm": 3.984375, "learning_rate": 4.999971156136833e-05, "loss": 1.0019, "step": 256 }, { "epoch": 0.004573312593892501, "grad_norm": 4.40625, "learning_rate": 4.999969798964825e-05, "loss": 1.0433, "step": 258 }, { "epoch": 0.00460876462950407, "grad_norm": 3.34375, "learning_rate": 4.999968410593715e-05, "loss": 0.9849, "step": 260 }, { "epoch": 0.00464421666511564, "grad_norm": 3.453125, "learning_rate": 4.999966991023519e-05, "loss": 0.9957, "step": 262 }, { "epoch": 0.00467966870072721, "grad_norm": 3.296875, "learning_rate": 4.999965540254257e-05, "loss": 0.9741, "step": 264 }, { "epoch": 0.00471512073633878, "grad_norm": 3.140625, "learning_rate": 4.999964058285944e-05, "loss": 1.0172, "step": 266 }, { "epoch": 0.00475057277195035, "grad_norm": 4.0625, "learning_rate": 4.9999625451186014e-05, "loss": 1.0139, "step": 268 }, { "epoch": 0.004786024807561919, "grad_norm": 3.875, "learning_rate": 4.999961000752247e-05, "loss": 1.0714, "step": 270 }, { "epoch": 0.004821476843173489, "grad_norm": 3.640625, "learning_rate": 4.999959425186899e-05, "loss": 1.044, "step": 272 }, { "epoch": 0.0048569288787850586, "grad_norm": 3.796875, "learning_rate": 4.999957818422579e-05, "loss": 0.9591, "step": 274 }, { "epoch": 0.004892380914396629, "grad_norm": 4.09375, "learning_rate": 4.999956180459306e-05, "loss": 0.9922, "step": 276 }, { "epoch": 0.004927832950008198, "grad_norm": 3.546875, "learning_rate": 4.9999545112971004e-05, "loss": 1.0256, "step": 278 }, { "epoch": 0.004963284985619768, "grad_norm": 4.03125, "learning_rate": 4.9999528109359825e-05, "loss": 1.0298, "step": 280 }, { "epoch": 0.0049987370212313375, "grad_norm": 3.546875, "learning_rate": 4.999951079375976e-05, "loss": 1.0022, "step": 282 }, { "epoch": 0.005034189056842908, "grad_norm": 3.9375, "learning_rate": 4.9999493166170994e-05, "loss": 1.0502, "step": 284 }, { "epoch": 0.005069641092454477, "grad_norm": 3.625, "learning_rate": 4.999947522659376e-05, "loss": 1.0071, "step": 286 }, { "epoch": 0.005105093128066047, "grad_norm": 3.34375, "learning_rate": 4.999945697502829e-05, "loss": 1.0428, "step": 288 }, { "epoch": 0.005140545163677617, "grad_norm": 3.46875, "learning_rate": 4.9999438411474794e-05, "loss": 1.0481, "step": 290 }, { "epoch": 0.005175997199289187, "grad_norm": 3.265625, "learning_rate": 4.999941953593352e-05, "loss": 1.0106, "step": 292 }, { "epoch": 0.005211449234900757, "grad_norm": 3.1875, "learning_rate": 4.99994003484047e-05, "loss": 0.9525, "step": 294 }, { "epoch": 0.005246901270512326, "grad_norm": 3.4375, "learning_rate": 4.9999380848888566e-05, "loss": 0.9956, "step": 296 }, { "epoch": 0.005282353306123896, "grad_norm": 3.609375, "learning_rate": 4.9999361037385366e-05, "loss": 1.002, "step": 298 }, { "epoch": 0.0053178053417354655, "grad_norm": 4.1875, "learning_rate": 4.9999340913895347e-05, "loss": 1.0014, "step": 300 }, { "epoch": 0.005353257377347036, "grad_norm": 3.640625, "learning_rate": 4.9999320478418766e-05, "loss": 1.0703, "step": 302 }, { "epoch": 0.005388709412958605, "grad_norm": 3.40625, "learning_rate": 4.999929973095586e-05, "loss": 0.9922, "step": 304 }, { "epoch": 0.005424161448570175, "grad_norm": 3.3125, "learning_rate": 4.999927867150691e-05, "loss": 1.0016, "step": 306 }, { "epoch": 0.0054596134841817445, "grad_norm": 3.3125, "learning_rate": 4.999925730007217e-05, "loss": 1.0145, "step": 308 }, { "epoch": 0.005495065519793315, "grad_norm": 3.453125, "learning_rate": 4.999923561665191e-05, "loss": 1.0087, "step": 310 }, { "epoch": 0.005530517555404885, "grad_norm": 3.234375, "learning_rate": 4.999921362124639e-05, "loss": 0.9935, "step": 312 }, { "epoch": 0.005565969591016454, "grad_norm": 3.28125, "learning_rate": 4.9999191313855884e-05, "loss": 1.013, "step": 314 }, { "epoch": 0.005601421626628024, "grad_norm": 3.28125, "learning_rate": 4.999916869448069e-05, "loss": 0.9903, "step": 316 }, { "epoch": 0.005636873662239594, "grad_norm": 3.265625, "learning_rate": 4.999914576312107e-05, "loss": 0.9984, "step": 318 }, { "epoch": 0.005672325697851164, "grad_norm": 3.5625, "learning_rate": 4.999912251977732e-05, "loss": 1.0084, "step": 320 }, { "epoch": 0.005707777733462733, "grad_norm": 3.390625, "learning_rate": 4.999909896444972e-05, "loss": 0.9749, "step": 322 }, { "epoch": 0.005743229769074303, "grad_norm": 3.609375, "learning_rate": 4.9999075097138584e-05, "loss": 1.0102, "step": 324 }, { "epoch": 0.0057786818046858725, "grad_norm": 3.453125, "learning_rate": 4.9999050917844194e-05, "loss": 0.9708, "step": 326 }, { "epoch": 0.005814133840297443, "grad_norm": 3.453125, "learning_rate": 4.9999026426566854e-05, "loss": 1.0564, "step": 328 }, { "epoch": 0.005849585875909012, "grad_norm": 3.609375, "learning_rate": 4.9999001623306876e-05, "loss": 0.9852, "step": 330 }, { "epoch": 0.005885037911520582, "grad_norm": 3.8125, "learning_rate": 4.999897650806455e-05, "loss": 0.9472, "step": 332 }, { "epoch": 0.0059204899471321515, "grad_norm": 3.59375, "learning_rate": 4.9998951080840214e-05, "loss": 0.9868, "step": 334 }, { "epoch": 0.005955941982743722, "grad_norm": 3.546875, "learning_rate": 4.9998925341634187e-05, "loss": 1.0065, "step": 336 }, { "epoch": 0.005991394018355292, "grad_norm": 3.40625, "learning_rate": 4.999889929044676e-05, "loss": 1.008, "step": 338 }, { "epoch": 0.006026846053966861, "grad_norm": 3.8125, "learning_rate": 4.9998872927278284e-05, "loss": 1.0051, "step": 340 }, { "epoch": 0.006062298089578431, "grad_norm": 3.640625, "learning_rate": 4.9998846252129086e-05, "loss": 0.9748, "step": 342 }, { "epoch": 0.006097750125190001, "grad_norm": 3.53125, "learning_rate": 4.999881926499949e-05, "loss": 0.9838, "step": 344 }, { "epoch": 0.006133202160801571, "grad_norm": 3.25, "learning_rate": 4.999879196588983e-05, "loss": 0.9938, "step": 346 }, { "epoch": 0.00616865419641314, "grad_norm": 3.1875, "learning_rate": 4.999876435480045e-05, "loss": 0.9663, "step": 348 }, { "epoch": 0.00620410623202471, "grad_norm": 3.75, "learning_rate": 4.9998736431731715e-05, "loss": 0.963, "step": 350 }, { "epoch": 0.0062395582676362795, "grad_norm": 4.09375, "learning_rate": 4.9998708196683945e-05, "loss": 0.9871, "step": 352 }, { "epoch": 0.00627501030324785, "grad_norm": 3.625, "learning_rate": 4.999867964965751e-05, "loss": 0.9627, "step": 354 }, { "epoch": 0.006310462338859419, "grad_norm": 3.421875, "learning_rate": 4.999865079065275e-05, "loss": 1.0292, "step": 356 }, { "epoch": 0.006345914374470989, "grad_norm": 3.234375, "learning_rate": 4.9998621619670036e-05, "loss": 0.9694, "step": 358 }, { "epoch": 0.006381366410082559, "grad_norm": 3.40625, "learning_rate": 4.9998592136709746e-05, "loss": 0.9795, "step": 360 }, { "epoch": 0.006416818445694129, "grad_norm": 3.6875, "learning_rate": 4.999856234177221e-05, "loss": 0.9889, "step": 362 }, { "epoch": 0.006452270481305699, "grad_norm": 3.3125, "learning_rate": 4.999853223485784e-05, "loss": 0.9322, "step": 364 }, { "epoch": 0.006487722516917268, "grad_norm": 3.515625, "learning_rate": 4.999850181596699e-05, "loss": 1.0098, "step": 366 }, { "epoch": 0.006523174552528838, "grad_norm": 3.734375, "learning_rate": 4.9998471085100045e-05, "loss": 0.9977, "step": 368 }, { "epoch": 0.006558626588140408, "grad_norm": 3.28125, "learning_rate": 4.999844004225739e-05, "loss": 0.9989, "step": 370 }, { "epoch": 0.006594078623751978, "grad_norm": 3.046875, "learning_rate": 4.9998408687439405e-05, "loss": 0.9484, "step": 372 }, { "epoch": 0.006629530659363547, "grad_norm": 2.921875, "learning_rate": 4.999837702064649e-05, "loss": 0.9547, "step": 374 }, { "epoch": 0.006664982694975117, "grad_norm": 3.5, "learning_rate": 4.999834504187904e-05, "loss": 1.0299, "step": 376 }, { "epoch": 0.0067004347305866865, "grad_norm": 3.171875, "learning_rate": 4.999831275113744e-05, "loss": 1.0, "step": 378 }, { "epoch": 0.006735886766198257, "grad_norm": 3.453125, "learning_rate": 4.9998280148422104e-05, "loss": 1.0383, "step": 380 }, { "epoch": 0.006771338801809826, "grad_norm": 3.5625, "learning_rate": 4.999824723373345e-05, "loss": 0.9557, "step": 382 }, { "epoch": 0.006806790837421396, "grad_norm": 3.0625, "learning_rate": 4.999821400707186e-05, "loss": 0.9847, "step": 384 }, { "epoch": 0.006842242873032966, "grad_norm": 3.28125, "learning_rate": 4.9998180468437786e-05, "loss": 0.9431, "step": 386 }, { "epoch": 0.006877694908644536, "grad_norm": 3.9375, "learning_rate": 4.9998146617831616e-05, "loss": 1.0244, "step": 388 }, { "epoch": 0.006913146944256106, "grad_norm": 2.75, "learning_rate": 4.999811245525378e-05, "loss": 0.9302, "step": 390 }, { "epoch": 0.006948598979867675, "grad_norm": 3.078125, "learning_rate": 4.9998077980704716e-05, "loss": 0.9856, "step": 392 }, { "epoch": 0.006984051015479245, "grad_norm": 2.90625, "learning_rate": 4.999804319418484e-05, "loss": 0.978, "step": 394 }, { "epoch": 0.007019503051090815, "grad_norm": 3.71875, "learning_rate": 4.999800809569459e-05, "loss": 1.0063, "step": 396 }, { "epoch": 0.007054955086702385, "grad_norm": 3.453125, "learning_rate": 4.999797268523441e-05, "loss": 1.0083, "step": 398 }, { "epoch": 0.007090407122313954, "grad_norm": 3.53125, "learning_rate": 4.9997936962804736e-05, "loss": 1.0052, "step": 400 }, { "epoch": 0.007125859157925524, "grad_norm": 3.21875, "learning_rate": 4.999790092840602e-05, "loss": 0.9596, "step": 402 }, { "epoch": 0.0071613111935370935, "grad_norm": 3.015625, "learning_rate": 4.99978645820387e-05, "loss": 0.899, "step": 404 }, { "epoch": 0.007196763229148664, "grad_norm": 3.21875, "learning_rate": 4.999782792370323e-05, "loss": 0.9701, "step": 406 }, { "epoch": 0.007232215264760234, "grad_norm": 3.046875, "learning_rate": 4.999779095340009e-05, "loss": 0.9746, "step": 408 }, { "epoch": 0.007267667300371803, "grad_norm": 2.984375, "learning_rate": 4.999775367112972e-05, "loss": 1.0319, "step": 410 }, { "epoch": 0.007303119335983373, "grad_norm": 3.5625, "learning_rate": 4.999771607689259e-05, "loss": 0.9785, "step": 412 }, { "epoch": 0.007338571371594943, "grad_norm": 3.296875, "learning_rate": 4.9997678170689164e-05, "loss": 0.968, "step": 414 }, { "epoch": 0.007374023407206513, "grad_norm": 3.671875, "learning_rate": 4.999763995251993e-05, "loss": 0.9797, "step": 416 }, { "epoch": 0.007409475442818082, "grad_norm": 3.125, "learning_rate": 4.999760142238535e-05, "loss": 1.0253, "step": 418 }, { "epoch": 0.007444927478429652, "grad_norm": 3.34375, "learning_rate": 4.9997562580285914e-05, "loss": 0.9627, "step": 420 }, { "epoch": 0.007480379514041222, "grad_norm": 3.125, "learning_rate": 4.999752342622211e-05, "loss": 0.9922, "step": 422 }, { "epoch": 0.007515831549652792, "grad_norm": 3.53125, "learning_rate": 4.9997483960194405e-05, "loss": 0.973, "step": 424 }, { "epoch": 0.007551283585264361, "grad_norm": 3.296875, "learning_rate": 4.9997444182203316e-05, "loss": 0.9356, "step": 426 }, { "epoch": 0.007586735620875931, "grad_norm": 3.390625, "learning_rate": 4.9997404092249336e-05, "loss": 0.9873, "step": 428 }, { "epoch": 0.0076221876564875005, "grad_norm": 3.25, "learning_rate": 4.999736369033295e-05, "loss": 0.952, "step": 430 }, { "epoch": 0.007657639692099071, "grad_norm": 3.6875, "learning_rate": 4.999732297645467e-05, "loss": 0.9977, "step": 432 }, { "epoch": 0.007693091727710641, "grad_norm": 3.578125, "learning_rate": 4.999728195061502e-05, "loss": 1.0054, "step": 434 }, { "epoch": 0.00772854376332221, "grad_norm": 3.328125, "learning_rate": 4.999724061281448e-05, "loss": 0.9507, "step": 436 }, { "epoch": 0.00776399579893378, "grad_norm": 3.0625, "learning_rate": 4.9997198963053595e-05, "loss": 0.9529, "step": 438 }, { "epoch": 0.00779944783454535, "grad_norm": 2.96875, "learning_rate": 4.999715700133287e-05, "loss": 1.0338, "step": 440 }, { "epoch": 0.00783489987015692, "grad_norm": 3.296875, "learning_rate": 4.999711472765283e-05, "loss": 0.9645, "step": 442 }, { "epoch": 0.007870351905768489, "grad_norm": 3.3125, "learning_rate": 4.999707214201401e-05, "loss": 0.9345, "step": 444 }, { "epoch": 0.007905803941380058, "grad_norm": 3.40625, "learning_rate": 4.999702924441693e-05, "loss": 0.9372, "step": 446 }, { "epoch": 0.00794125597699163, "grad_norm": 3.578125, "learning_rate": 4.999698603486214e-05, "loss": 0.9688, "step": 448 }, { "epoch": 0.007976708012603199, "grad_norm": 3.296875, "learning_rate": 4.999694251335016e-05, "loss": 0.9641, "step": 450 }, { "epoch": 0.008012160048214768, "grad_norm": 3.125, "learning_rate": 4.9996898679881556e-05, "loss": 0.9635, "step": 452 }, { "epoch": 0.008047612083826337, "grad_norm": 3.515625, "learning_rate": 4.999685453445685e-05, "loss": 0.9986, "step": 454 }, { "epoch": 0.008083064119437908, "grad_norm": 3.484375, "learning_rate": 4.999681007707662e-05, "loss": 0.9567, "step": 456 }, { "epoch": 0.008118516155049478, "grad_norm": 3.09375, "learning_rate": 4.9996765307741394e-05, "loss": 0.8916, "step": 458 }, { "epoch": 0.008153968190661047, "grad_norm": 2.6875, "learning_rate": 4.999672022645175e-05, "loss": 0.9889, "step": 460 }, { "epoch": 0.008189420226272618, "grad_norm": 2.953125, "learning_rate": 4.999667483320825e-05, "loss": 0.9913, "step": 462 }, { "epoch": 0.008224872261884187, "grad_norm": 2.921875, "learning_rate": 4.999662912801144e-05, "loss": 0.9699, "step": 464 }, { "epoch": 0.008260324297495757, "grad_norm": 3.0625, "learning_rate": 4.999658311086191e-05, "loss": 0.9943, "step": 466 }, { "epoch": 0.008295776333107326, "grad_norm": 3.203125, "learning_rate": 4.9996536781760236e-05, "loss": 0.9744, "step": 468 }, { "epoch": 0.008331228368718897, "grad_norm": 3.625, "learning_rate": 4.999649014070698e-05, "loss": 0.9958, "step": 470 }, { "epoch": 0.008366680404330466, "grad_norm": 2.875, "learning_rate": 4.999644318770274e-05, "loss": 0.9638, "step": 472 }, { "epoch": 0.008402132439942036, "grad_norm": 3.28125, "learning_rate": 4.99963959227481e-05, "loss": 0.9533, "step": 474 }, { "epoch": 0.008437584475553605, "grad_norm": 3.171875, "learning_rate": 4.999634834584363e-05, "loss": 0.9881, "step": 476 }, { "epoch": 0.008473036511165176, "grad_norm": 2.890625, "learning_rate": 4.999630045698995e-05, "loss": 0.9871, "step": 478 }, { "epoch": 0.008508488546776745, "grad_norm": 2.90625, "learning_rate": 4.9996252256187646e-05, "loss": 0.9489, "step": 480 }, { "epoch": 0.008543940582388315, "grad_norm": 3.046875, "learning_rate": 4.999620374343732e-05, "loss": 0.9673, "step": 482 }, { "epoch": 0.008579392617999886, "grad_norm": 3.21875, "learning_rate": 4.999615491873957e-05, "loss": 0.9671, "step": 484 }, { "epoch": 0.008614844653611455, "grad_norm": 3.5, "learning_rate": 4.999610578209502e-05, "loss": 0.9086, "step": 486 }, { "epoch": 0.008650296689223024, "grad_norm": 3.390625, "learning_rate": 4.9996056333504275e-05, "loss": 0.9864, "step": 488 }, { "epoch": 0.008685748724834593, "grad_norm": 3.0625, "learning_rate": 4.999600657296796e-05, "loss": 0.9359, "step": 490 }, { "epoch": 0.008721200760446165, "grad_norm": 3.34375, "learning_rate": 4.9995956500486676e-05, "loss": 0.9322, "step": 492 }, { "epoch": 0.008756652796057734, "grad_norm": 3.375, "learning_rate": 4.999590611606107e-05, "loss": 0.9402, "step": 494 }, { "epoch": 0.008792104831669303, "grad_norm": 3.109375, "learning_rate": 4.9995855419691765e-05, "loss": 0.9617, "step": 496 }, { "epoch": 0.008827556867280872, "grad_norm": 3.28125, "learning_rate": 4.999580441137938e-05, "loss": 0.9414, "step": 498 }, { "epoch": 0.008863008902892443, "grad_norm": 3.46875, "learning_rate": 4.999575309112456e-05, "loss": 0.9602, "step": 500 }, { "epoch": 0.008898460938504013, "grad_norm": 2.859375, "learning_rate": 4.999570145892796e-05, "loss": 0.9457, "step": 502 }, { "epoch": 0.008933912974115582, "grad_norm": 3.1875, "learning_rate": 4.999564951479021e-05, "loss": 1.0017, "step": 504 }, { "epoch": 0.008969365009727153, "grad_norm": 3.421875, "learning_rate": 4.9995597258711954e-05, "loss": 0.9839, "step": 506 }, { "epoch": 0.009004817045338722, "grad_norm": 3.296875, "learning_rate": 4.999554469069386e-05, "loss": 0.9671, "step": 508 }, { "epoch": 0.009040269080950292, "grad_norm": 3.125, "learning_rate": 4.9995491810736564e-05, "loss": 0.9471, "step": 510 }, { "epoch": 0.009075721116561861, "grad_norm": 3.5, "learning_rate": 4.999543861884074e-05, "loss": 0.9322, "step": 512 }, { "epoch": 0.009111173152173432, "grad_norm": 3.5, "learning_rate": 4.9995385115007055e-05, "loss": 0.9606, "step": 514 }, { "epoch": 0.009146625187785001, "grad_norm": 3.03125, "learning_rate": 4.999533129923616e-05, "loss": 0.965, "step": 516 }, { "epoch": 0.00918207722339657, "grad_norm": 3.140625, "learning_rate": 4.999527717152874e-05, "loss": 1.0181, "step": 518 }, { "epoch": 0.00921752925900814, "grad_norm": 3.09375, "learning_rate": 4.999522273188547e-05, "loss": 0.9654, "step": 520 }, { "epoch": 0.009252981294619711, "grad_norm": 3.109375, "learning_rate": 4.9995167980307024e-05, "loss": 1.0039, "step": 522 }, { "epoch": 0.00928843333023128, "grad_norm": 3.0, "learning_rate": 4.9995112916794084e-05, "loss": 0.9443, "step": 524 }, { "epoch": 0.00932388536584285, "grad_norm": 3.609375, "learning_rate": 4.999505754134734e-05, "loss": 0.9385, "step": 526 }, { "epoch": 0.00935933740145442, "grad_norm": 3.125, "learning_rate": 4.999500185396749e-05, "loss": 0.9158, "step": 528 }, { "epoch": 0.00939478943706599, "grad_norm": 2.859375, "learning_rate": 4.999494585465523e-05, "loss": 0.9583, "step": 530 }, { "epoch": 0.00943024147267756, "grad_norm": 3.03125, "learning_rate": 4.999488954341124e-05, "loss": 0.9829, "step": 532 }, { "epoch": 0.009465693508289129, "grad_norm": 3.1875, "learning_rate": 4.999483292023624e-05, "loss": 0.9174, "step": 534 }, { "epoch": 0.0095011455439007, "grad_norm": 3.359375, "learning_rate": 4.9994775985130925e-05, "loss": 0.9316, "step": 536 }, { "epoch": 0.009536597579512269, "grad_norm": 3.109375, "learning_rate": 4.999471873809602e-05, "loss": 1.006, "step": 538 }, { "epoch": 0.009572049615123838, "grad_norm": 3.15625, "learning_rate": 4.999466117913223e-05, "loss": 1.008, "step": 540 }, { "epoch": 0.009607501650735407, "grad_norm": 3.359375, "learning_rate": 4.999460330824027e-05, "loss": 0.9878, "step": 542 }, { "epoch": 0.009642953686346978, "grad_norm": 3.0625, "learning_rate": 4.999454512542087e-05, "loss": 0.9953, "step": 544 }, { "epoch": 0.009678405721958548, "grad_norm": 3.0625, "learning_rate": 4.9994486630674744e-05, "loss": 0.9758, "step": 546 }, { "epoch": 0.009713857757570117, "grad_norm": 2.84375, "learning_rate": 4.999442782400264e-05, "loss": 0.9526, "step": 548 }, { "epoch": 0.009749309793181686, "grad_norm": 3.4375, "learning_rate": 4.999436870540528e-05, "loss": 0.9034, "step": 550 }, { "epoch": 0.009784761828793257, "grad_norm": 3.40625, "learning_rate": 4.999430927488341e-05, "loss": 0.9539, "step": 552 }, { "epoch": 0.009820213864404827, "grad_norm": 3.078125, "learning_rate": 4.999424953243776e-05, "loss": 0.9947, "step": 554 }, { "epoch": 0.009855665900016396, "grad_norm": 3.625, "learning_rate": 4.999418947806908e-05, "loss": 0.9475, "step": 556 }, { "epoch": 0.009891117935627967, "grad_norm": 3.34375, "learning_rate": 4.999412911177813e-05, "loss": 0.9541, "step": 558 }, { "epoch": 0.009926569971239536, "grad_norm": 3.75, "learning_rate": 4.999406843356564e-05, "loss": 0.9163, "step": 560 }, { "epoch": 0.009962022006851106, "grad_norm": 3.28125, "learning_rate": 4.99940074434324e-05, "loss": 0.932, "step": 562 }, { "epoch": 0.009997474042462675, "grad_norm": 3.734375, "learning_rate": 4.9993946141379145e-05, "loss": 0.9848, "step": 564 }, { "epoch": 0.010032926078074246, "grad_norm": 3.5625, "learning_rate": 4.999388452740664e-05, "loss": 0.9937, "step": 566 }, { "epoch": 0.010068378113685815, "grad_norm": 3.515625, "learning_rate": 4.999382260151567e-05, "loss": 0.9689, "step": 568 }, { "epoch": 0.010103830149297385, "grad_norm": 3.21875, "learning_rate": 4.9993760363706996e-05, "loss": 0.9488, "step": 570 }, { "epoch": 0.010139282184908954, "grad_norm": 3.15625, "learning_rate": 4.9993697813981404e-05, "loss": 0.9778, "step": 572 }, { "epoch": 0.010174734220520525, "grad_norm": 3.109375, "learning_rate": 4.999363495233966e-05, "loss": 0.9597, "step": 574 }, { "epoch": 0.010210186256132094, "grad_norm": 3.125, "learning_rate": 4.9993571778782565e-05, "loss": 0.968, "step": 576 }, { "epoch": 0.010245638291743664, "grad_norm": 3.25, "learning_rate": 4.9993508293310905e-05, "loss": 0.9588, "step": 578 }, { "epoch": 0.010281090327355235, "grad_norm": 2.734375, "learning_rate": 4.999344449592546e-05, "loss": 0.9334, "step": 580 }, { "epoch": 0.010316542362966804, "grad_norm": 3.25, "learning_rate": 4.999338038662703e-05, "loss": 0.975, "step": 582 }, { "epoch": 0.010351994398578373, "grad_norm": 3.28125, "learning_rate": 4.999331596541643e-05, "loss": 0.9033, "step": 584 }, { "epoch": 0.010387446434189943, "grad_norm": 3.609375, "learning_rate": 4.999325123229444e-05, "loss": 0.9773, "step": 586 }, { "epoch": 0.010422898469801514, "grad_norm": 3.234375, "learning_rate": 4.9993186187261884e-05, "loss": 0.9611, "step": 588 }, { "epoch": 0.010458350505413083, "grad_norm": 3.203125, "learning_rate": 4.999312083031957e-05, "loss": 0.9743, "step": 590 }, { "epoch": 0.010493802541024652, "grad_norm": 3.109375, "learning_rate": 4.999305516146832e-05, "loss": 0.9143, "step": 592 }, { "epoch": 0.010529254576636221, "grad_norm": 3.265625, "learning_rate": 4.999298918070894e-05, "loss": 0.9472, "step": 594 }, { "epoch": 0.010564706612247792, "grad_norm": 3.140625, "learning_rate": 4.999292288804227e-05, "loss": 0.934, "step": 596 }, { "epoch": 0.010600158647859362, "grad_norm": 3.109375, "learning_rate": 4.999285628346912e-05, "loss": 0.9666, "step": 598 }, { "epoch": 0.010635610683470931, "grad_norm": 2.765625, "learning_rate": 4.999278936699033e-05, "loss": 0.9855, "step": 600 }, { "epoch": 0.010671062719082502, "grad_norm": 3.109375, "learning_rate": 4.999272213860674e-05, "loss": 0.9246, "step": 602 }, { "epoch": 0.010706514754694071, "grad_norm": 3.578125, "learning_rate": 4.999265459831917e-05, "loss": 0.9739, "step": 604 }, { "epoch": 0.01074196679030564, "grad_norm": 3.015625, "learning_rate": 4.999258674612849e-05, "loss": 0.993, "step": 606 }, { "epoch": 0.01077741882591721, "grad_norm": 3.359375, "learning_rate": 4.999251858203553e-05, "loss": 0.9682, "step": 608 }, { "epoch": 0.010812870861528781, "grad_norm": 3.109375, "learning_rate": 4.9992450106041135e-05, "loss": 0.9582, "step": 610 }, { "epoch": 0.01084832289714035, "grad_norm": 2.921875, "learning_rate": 4.999238131814617e-05, "loss": 0.9464, "step": 612 }, { "epoch": 0.01088377493275192, "grad_norm": 3.078125, "learning_rate": 4.99923122183515e-05, "loss": 0.908, "step": 614 }, { "epoch": 0.010919226968363489, "grad_norm": 3.171875, "learning_rate": 4.999224280665798e-05, "loss": 0.9571, "step": 616 }, { "epoch": 0.01095467900397506, "grad_norm": 3.015625, "learning_rate": 4.9992173083066466e-05, "loss": 0.9992, "step": 618 }, { "epoch": 0.01099013103958663, "grad_norm": 3.0, "learning_rate": 4.999210304757784e-05, "loss": 0.9529, "step": 620 }, { "epoch": 0.011025583075198199, "grad_norm": 3.03125, "learning_rate": 4.999203270019298e-05, "loss": 0.9182, "step": 622 }, { "epoch": 0.01106103511080977, "grad_norm": 3.140625, "learning_rate": 4.9991962040912756e-05, "loss": 0.9034, "step": 624 }, { "epoch": 0.011096487146421339, "grad_norm": 3.140625, "learning_rate": 4.999189106973804e-05, "loss": 0.9601, "step": 626 }, { "epoch": 0.011131939182032908, "grad_norm": 3.59375, "learning_rate": 4.999181978666974e-05, "loss": 0.9431, "step": 628 }, { "epoch": 0.011167391217644478, "grad_norm": 2.71875, "learning_rate": 4.999174819170873e-05, "loss": 0.8976, "step": 630 }, { "epoch": 0.011202843253256049, "grad_norm": 3.03125, "learning_rate": 4.999167628485591e-05, "loss": 0.9586, "step": 632 }, { "epoch": 0.011238295288867618, "grad_norm": 2.78125, "learning_rate": 4.999160406611218e-05, "loss": 0.933, "step": 634 }, { "epoch": 0.011273747324479187, "grad_norm": 3.21875, "learning_rate": 4.999153153547842e-05, "loss": 0.9944, "step": 636 }, { "epoch": 0.011309199360090757, "grad_norm": 3.4375, "learning_rate": 4.999145869295557e-05, "loss": 0.9273, "step": 638 }, { "epoch": 0.011344651395702328, "grad_norm": 3.25, "learning_rate": 4.999138553854451e-05, "loss": 0.8649, "step": 640 }, { "epoch": 0.011380103431313897, "grad_norm": 3.28125, "learning_rate": 4.999131207224617e-05, "loss": 0.9654, "step": 642 }, { "epoch": 0.011415555466925466, "grad_norm": 2.890625, "learning_rate": 4.999123829406146e-05, "loss": 0.9769, "step": 644 }, { "epoch": 0.011451007502537035, "grad_norm": 3.0625, "learning_rate": 4.9991164203991295e-05, "loss": 0.9016, "step": 646 }, { "epoch": 0.011486459538148606, "grad_norm": 3.53125, "learning_rate": 4.999108980203662e-05, "loss": 0.9064, "step": 648 }, { "epoch": 0.011521911573760176, "grad_norm": 2.953125, "learning_rate": 4.999101508819833e-05, "loss": 0.8923, "step": 650 }, { "epoch": 0.011557363609371745, "grad_norm": 3.140625, "learning_rate": 4.9990940062477386e-05, "loss": 0.9613, "step": 652 }, { "epoch": 0.011592815644983316, "grad_norm": 3.421875, "learning_rate": 4.999086472487472e-05, "loss": 1.0074, "step": 654 }, { "epoch": 0.011628267680594885, "grad_norm": 3.015625, "learning_rate": 4.9990789075391264e-05, "loss": 0.9456, "step": 656 }, { "epoch": 0.011663719716206455, "grad_norm": 3.046875, "learning_rate": 4.9990713114027966e-05, "loss": 0.9861, "step": 658 }, { "epoch": 0.011699171751818024, "grad_norm": 3.25, "learning_rate": 4.9990636840785775e-05, "loss": 0.9855, "step": 660 }, { "epoch": 0.011734623787429595, "grad_norm": 3.234375, "learning_rate": 4.999056025566564e-05, "loss": 0.9234, "step": 662 }, { "epoch": 0.011770075823041164, "grad_norm": 3.015625, "learning_rate": 4.9990483358668514e-05, "loss": 0.9737, "step": 664 }, { "epoch": 0.011805527858652734, "grad_norm": 2.859375, "learning_rate": 4.999040614979536e-05, "loss": 0.9562, "step": 666 }, { "epoch": 0.011840979894264303, "grad_norm": 3.125, "learning_rate": 4.999032862904715e-05, "loss": 0.9459, "step": 668 }, { "epoch": 0.011876431929875874, "grad_norm": 3.03125, "learning_rate": 4.999025079642484e-05, "loss": 0.9167, "step": 670 }, { "epoch": 0.011911883965487443, "grad_norm": 2.9375, "learning_rate": 4.999017265192941e-05, "loss": 0.9068, "step": 672 }, { "epoch": 0.011947336001099013, "grad_norm": 2.9375, "learning_rate": 4.999009419556182e-05, "loss": 0.9414, "step": 674 }, { "epoch": 0.011982788036710584, "grad_norm": 3.59375, "learning_rate": 4.999001542732307e-05, "loss": 0.9365, "step": 676 }, { "epoch": 0.012018240072322153, "grad_norm": 3.359375, "learning_rate": 4.9989936347214125e-05, "loss": 0.9686, "step": 678 }, { "epoch": 0.012053692107933722, "grad_norm": 3.65625, "learning_rate": 4.9989856955235985e-05, "loss": 0.9942, "step": 680 }, { "epoch": 0.012089144143545292, "grad_norm": 3.453125, "learning_rate": 4.998977725138964e-05, "loss": 0.9682, "step": 682 }, { "epoch": 0.012124596179156863, "grad_norm": 3.390625, "learning_rate": 4.998969723567607e-05, "loss": 0.9719, "step": 684 }, { "epoch": 0.012160048214768432, "grad_norm": 2.96875, "learning_rate": 4.998961690809628e-05, "loss": 0.901, "step": 686 }, { "epoch": 0.012195500250380001, "grad_norm": 2.984375, "learning_rate": 4.998953626865128e-05, "loss": 0.9027, "step": 688 }, { "epoch": 0.01223095228599157, "grad_norm": 3.09375, "learning_rate": 4.9989455317342076e-05, "loss": 0.943, "step": 690 }, { "epoch": 0.012266404321603142, "grad_norm": 3.59375, "learning_rate": 4.9989374054169676e-05, "loss": 0.9789, "step": 692 }, { "epoch": 0.01230185635721471, "grad_norm": 3.125, "learning_rate": 4.998929247913509e-05, "loss": 0.9397, "step": 694 }, { "epoch": 0.01233730839282628, "grad_norm": 2.84375, "learning_rate": 4.998921059223933e-05, "loss": 0.9447, "step": 696 }, { "epoch": 0.012372760428437851, "grad_norm": 3.390625, "learning_rate": 4.9989128393483445e-05, "loss": 0.946, "step": 698 }, { "epoch": 0.01240821246404942, "grad_norm": 2.734375, "learning_rate": 4.9989045882868426e-05, "loss": 0.9301, "step": 700 }, { "epoch": 0.01244366449966099, "grad_norm": 3.0625, "learning_rate": 4.998896306039533e-05, "loss": 0.9113, "step": 702 }, { "epoch": 0.012479116535272559, "grad_norm": 2.734375, "learning_rate": 4.9988879926065174e-05, "loss": 0.9572, "step": 704 }, { "epoch": 0.01251456857088413, "grad_norm": 3.453125, "learning_rate": 4.998879647987901e-05, "loss": 0.9825, "step": 706 }, { "epoch": 0.0125500206064957, "grad_norm": 3.265625, "learning_rate": 4.998871272183786e-05, "loss": 0.9766, "step": 708 }, { "epoch": 0.012585472642107269, "grad_norm": 2.875, "learning_rate": 4.998862865194278e-05, "loss": 0.9208, "step": 710 }, { "epoch": 0.012620924677718838, "grad_norm": 2.96875, "learning_rate": 4.998854427019483e-05, "loss": 0.9375, "step": 712 }, { "epoch": 0.012656376713330409, "grad_norm": 3.140625, "learning_rate": 4.998845957659504e-05, "loss": 0.9579, "step": 714 }, { "epoch": 0.012691828748941978, "grad_norm": 3.328125, "learning_rate": 4.9988374571144484e-05, "loss": 0.9579, "step": 716 }, { "epoch": 0.012727280784553548, "grad_norm": 3.078125, "learning_rate": 4.9988289253844214e-05, "loss": 0.8884, "step": 718 }, { "epoch": 0.012762732820165119, "grad_norm": 3.0, "learning_rate": 4.998820362469531e-05, "loss": 0.9132, "step": 720 }, { "epoch": 0.012798184855776688, "grad_norm": 3.109375, "learning_rate": 4.998811768369882e-05, "loss": 0.958, "step": 722 }, { "epoch": 0.012833636891388257, "grad_norm": 3.0625, "learning_rate": 4.998803143085583e-05, "loss": 0.9573, "step": 724 }, { "epoch": 0.012869088926999827, "grad_norm": 3.328125, "learning_rate": 4.9987944866167405e-05, "loss": 0.9226, "step": 726 }, { "epoch": 0.012904540962611398, "grad_norm": 3.328125, "learning_rate": 4.998785798963464e-05, "loss": 0.9214, "step": 728 }, { "epoch": 0.012939992998222967, "grad_norm": 3.328125, "learning_rate": 4.9987770801258617e-05, "loss": 0.9307, "step": 730 }, { "epoch": 0.012975445033834536, "grad_norm": 3.078125, "learning_rate": 4.998768330104041e-05, "loss": 0.9398, "step": 732 }, { "epoch": 0.013010897069446106, "grad_norm": 3.03125, "learning_rate": 4.998759548898112e-05, "loss": 0.9196, "step": 734 }, { "epoch": 0.013046349105057677, "grad_norm": 2.921875, "learning_rate": 4.998750736508184e-05, "loss": 0.9937, "step": 736 }, { "epoch": 0.013081801140669246, "grad_norm": 3.265625, "learning_rate": 4.998741892934368e-05, "loss": 0.963, "step": 738 }, { "epoch": 0.013117253176280815, "grad_norm": 3.6875, "learning_rate": 4.998733018176774e-05, "loss": 0.9119, "step": 740 }, { "epoch": 0.013152705211892384, "grad_norm": 3.234375, "learning_rate": 4.9987241122355116e-05, "loss": 0.861, "step": 742 }, { "epoch": 0.013188157247503956, "grad_norm": 3.4375, "learning_rate": 4.9987151751106934e-05, "loss": 0.9322, "step": 744 }, { "epoch": 0.013223609283115525, "grad_norm": 3.8125, "learning_rate": 4.998706206802429e-05, "loss": 0.9604, "step": 746 }, { "epoch": 0.013259061318727094, "grad_norm": 3.125, "learning_rate": 4.9986972073108326e-05, "loss": 0.9062, "step": 748 }, { "epoch": 0.013294513354338665, "grad_norm": 3.1875, "learning_rate": 4.998688176636015e-05, "loss": 0.9155, "step": 750 }, { "epoch": 0.013329965389950234, "grad_norm": 3.375, "learning_rate": 4.99867911477809e-05, "loss": 0.897, "step": 752 }, { "epoch": 0.013365417425561804, "grad_norm": 2.953125, "learning_rate": 4.9986700217371694e-05, "loss": 0.9338, "step": 754 }, { "epoch": 0.013400869461173373, "grad_norm": 3.171875, "learning_rate": 4.998660897513367e-05, "loss": 0.9416, "step": 756 }, { "epoch": 0.013436321496784944, "grad_norm": 3.125, "learning_rate": 4.9986517421067986e-05, "loss": 0.9024, "step": 758 }, { "epoch": 0.013471773532396513, "grad_norm": 3.09375, "learning_rate": 4.9986425555175766e-05, "loss": 0.9484, "step": 760 }, { "epoch": 0.013507225568008083, "grad_norm": 2.953125, "learning_rate": 4.998633337745815e-05, "loss": 0.8972, "step": 762 }, { "epoch": 0.013542677603619652, "grad_norm": 3.390625, "learning_rate": 4.99862408879163e-05, "loss": 0.9608, "step": 764 }, { "epoch": 0.013578129639231223, "grad_norm": 3.375, "learning_rate": 4.998614808655137e-05, "loss": 0.9107, "step": 766 }, { "epoch": 0.013613581674842792, "grad_norm": 3.109375, "learning_rate": 4.9986054973364516e-05, "loss": 0.9088, "step": 768 }, { "epoch": 0.013649033710454362, "grad_norm": 3.234375, "learning_rate": 4.99859615483569e-05, "loss": 0.9374, "step": 770 }, { "epoch": 0.013684485746065933, "grad_norm": 3.125, "learning_rate": 4.9985867811529685e-05, "loss": 0.9202, "step": 772 }, { "epoch": 0.013719937781677502, "grad_norm": 2.71875, "learning_rate": 4.998577376288405e-05, "loss": 0.9546, "step": 774 }, { "epoch": 0.013755389817289071, "grad_norm": 2.953125, "learning_rate": 4.998567940242116e-05, "loss": 0.9569, "step": 776 }, { "epoch": 0.01379084185290064, "grad_norm": 3.0, "learning_rate": 4.9985584730142185e-05, "loss": 0.9551, "step": 778 }, { "epoch": 0.013826293888512212, "grad_norm": 2.78125, "learning_rate": 4.998548974604833e-05, "loss": 0.9098, "step": 780 }, { "epoch": 0.013861745924123781, "grad_norm": 3.15625, "learning_rate": 4.998539445014077e-05, "loss": 0.9948, "step": 782 }, { "epoch": 0.01389719795973535, "grad_norm": 3.125, "learning_rate": 4.998529884242068e-05, "loss": 0.9789, "step": 784 }, { "epoch": 0.01393264999534692, "grad_norm": 3.390625, "learning_rate": 4.998520292288927e-05, "loss": 0.9182, "step": 786 }, { "epoch": 0.01396810203095849, "grad_norm": 2.921875, "learning_rate": 4.998510669154773e-05, "loss": 0.9248, "step": 788 }, { "epoch": 0.01400355406657006, "grad_norm": 3.125, "learning_rate": 4.998501014839726e-05, "loss": 0.9428, "step": 790 }, { "epoch": 0.01403900610218163, "grad_norm": 2.953125, "learning_rate": 4.998491329343907e-05, "loss": 0.9488, "step": 792 }, { "epoch": 0.0140744581377932, "grad_norm": 3.234375, "learning_rate": 4.998481612667437e-05, "loss": 0.9789, "step": 794 }, { "epoch": 0.01410991017340477, "grad_norm": 3.28125, "learning_rate": 4.9984718648104365e-05, "loss": 0.9332, "step": 796 }, { "epoch": 0.014145362209016339, "grad_norm": 3.203125, "learning_rate": 4.998462085773027e-05, "loss": 0.9086, "step": 798 }, { "epoch": 0.014180814244627908, "grad_norm": 3.375, "learning_rate": 4.998452275555332e-05, "loss": 0.9826, "step": 800 }, { "epoch": 0.01421626628023948, "grad_norm": 3.09375, "learning_rate": 4.9984424341574724e-05, "loss": 0.903, "step": 802 }, { "epoch": 0.014251718315851048, "grad_norm": 3.984375, "learning_rate": 4.998432561579572e-05, "loss": 0.9578, "step": 804 }, { "epoch": 0.014287170351462618, "grad_norm": 3.171875, "learning_rate": 4.998422657821753e-05, "loss": 0.9535, "step": 806 }, { "epoch": 0.014322622387074187, "grad_norm": 3.375, "learning_rate": 4.99841272288414e-05, "loss": 0.945, "step": 808 }, { "epoch": 0.014358074422685758, "grad_norm": 2.890625, "learning_rate": 4.998402756766857e-05, "loss": 0.9676, "step": 810 }, { "epoch": 0.014393526458297327, "grad_norm": 2.859375, "learning_rate": 4.9983927594700275e-05, "loss": 0.874, "step": 812 }, { "epoch": 0.014428978493908897, "grad_norm": 3.125, "learning_rate": 4.998382730993777e-05, "loss": 0.935, "step": 814 }, { "epoch": 0.014464430529520468, "grad_norm": 2.71875, "learning_rate": 4.99837267133823e-05, "loss": 0.9059, "step": 816 }, { "epoch": 0.014499882565132037, "grad_norm": 3.0625, "learning_rate": 4.998362580503513e-05, "loss": 0.9293, "step": 818 }, { "epoch": 0.014535334600743606, "grad_norm": 3.3125, "learning_rate": 4.998352458489751e-05, "loss": 1.0035, "step": 820 }, { "epoch": 0.014570786636355176, "grad_norm": 3.4375, "learning_rate": 4.99834230529707e-05, "loss": 0.9548, "step": 822 }, { "epoch": 0.014606238671966747, "grad_norm": 3.171875, "learning_rate": 4.998332120925598e-05, "loss": 0.9474, "step": 824 }, { "epoch": 0.014641690707578316, "grad_norm": 2.984375, "learning_rate": 4.9983219053754627e-05, "loss": 0.9112, "step": 826 }, { "epoch": 0.014677142743189885, "grad_norm": 3.109375, "learning_rate": 4.9983116586467896e-05, "loss": 0.9462, "step": 828 }, { "epoch": 0.014712594778801455, "grad_norm": 3.09375, "learning_rate": 4.998301380739706e-05, "loss": 0.9302, "step": 830 }, { "epoch": 0.014748046814413026, "grad_norm": 2.734375, "learning_rate": 4.998291071654343e-05, "loss": 0.9114, "step": 832 }, { "epoch": 0.014783498850024595, "grad_norm": 2.765625, "learning_rate": 4.9982807313908273e-05, "loss": 0.9189, "step": 834 }, { "epoch": 0.014818950885636164, "grad_norm": 3.0625, "learning_rate": 4.998270359949289e-05, "loss": 0.9278, "step": 836 }, { "epoch": 0.014854402921247734, "grad_norm": 3.28125, "learning_rate": 4.998259957329856e-05, "loss": 0.9242, "step": 838 }, { "epoch": 0.014889854956859305, "grad_norm": 2.9375, "learning_rate": 4.99824952353266e-05, "loss": 0.9375, "step": 840 }, { "epoch": 0.014925306992470874, "grad_norm": 3.078125, "learning_rate": 4.9982390585578295e-05, "loss": 0.9858, "step": 842 }, { "epoch": 0.014960759028082443, "grad_norm": 3.34375, "learning_rate": 4.9982285624054956e-05, "loss": 0.9225, "step": 844 }, { "epoch": 0.014996211063694014, "grad_norm": 2.8125, "learning_rate": 4.9982180350757905e-05, "loss": 0.9209, "step": 846 }, { "epoch": 0.015031663099305584, "grad_norm": 3.453125, "learning_rate": 4.998207476568845e-05, "loss": 0.9432, "step": 848 }, { "epoch": 0.015067115134917153, "grad_norm": 3.28125, "learning_rate": 4.99819688688479e-05, "loss": 0.9355, "step": 850 }, { "epoch": 0.015102567170528722, "grad_norm": 2.96875, "learning_rate": 4.998186266023758e-05, "loss": 0.9108, "step": 852 }, { "epoch": 0.015138019206140293, "grad_norm": 3.171875, "learning_rate": 4.998175613985882e-05, "loss": 0.9086, "step": 854 }, { "epoch": 0.015173471241751862, "grad_norm": 2.90625, "learning_rate": 4.998164930771294e-05, "loss": 0.8956, "step": 856 }, { "epoch": 0.015208923277363432, "grad_norm": 3.125, "learning_rate": 4.998154216380129e-05, "loss": 0.9252, "step": 858 }, { "epoch": 0.015244375312975001, "grad_norm": 3.28125, "learning_rate": 4.9981434708125184e-05, "loss": 0.9134, "step": 860 }, { "epoch": 0.015279827348586572, "grad_norm": 3.015625, "learning_rate": 4.9981326940685985e-05, "loss": 0.9403, "step": 862 }, { "epoch": 0.015315279384198141, "grad_norm": 2.9375, "learning_rate": 4.998121886148503e-05, "loss": 0.8918, "step": 864 }, { "epoch": 0.01535073141980971, "grad_norm": 3.3125, "learning_rate": 4.998111047052366e-05, "loss": 0.9087, "step": 866 }, { "epoch": 0.015386183455421282, "grad_norm": 3.625, "learning_rate": 4.9981001767803245e-05, "loss": 0.9514, "step": 868 }, { "epoch": 0.015421635491032851, "grad_norm": 3.296875, "learning_rate": 4.998089275332513e-05, "loss": 0.9317, "step": 870 }, { "epoch": 0.01545708752664442, "grad_norm": 3.40625, "learning_rate": 4.998078342709067e-05, "loss": 0.874, "step": 872 }, { "epoch": 0.01549253956225599, "grad_norm": 3.1875, "learning_rate": 4.9980673789101234e-05, "loss": 0.9233, "step": 874 }, { "epoch": 0.01552799159786756, "grad_norm": 3.046875, "learning_rate": 4.99805638393582e-05, "loss": 0.9601, "step": 876 }, { "epoch": 0.01556344363347913, "grad_norm": 3.078125, "learning_rate": 4.998045357786293e-05, "loss": 0.9901, "step": 878 }, { "epoch": 0.0155988956690907, "grad_norm": 2.703125, "learning_rate": 4.9980343004616795e-05, "loss": 0.9001, "step": 880 }, { "epoch": 0.01563434770470227, "grad_norm": 3.15625, "learning_rate": 4.998023211962119e-05, "loss": 0.9135, "step": 882 }, { "epoch": 0.01566979974031384, "grad_norm": 2.59375, "learning_rate": 4.998012092287749e-05, "loss": 0.9025, "step": 884 }, { "epoch": 0.015705251775925407, "grad_norm": 2.84375, "learning_rate": 4.998000941438709e-05, "loss": 0.9301, "step": 886 }, { "epoch": 0.015740703811536978, "grad_norm": 3.015625, "learning_rate": 4.997989759415136e-05, "loss": 0.9281, "step": 888 }, { "epoch": 0.01577615584714855, "grad_norm": 2.921875, "learning_rate": 4.997978546217172e-05, "loss": 0.8869, "step": 890 }, { "epoch": 0.015811607882760117, "grad_norm": 3.375, "learning_rate": 4.9979673018449555e-05, "loss": 0.9427, "step": 892 }, { "epoch": 0.015847059918371688, "grad_norm": 2.984375, "learning_rate": 4.9979560262986284e-05, "loss": 0.9285, "step": 894 }, { "epoch": 0.01588251195398326, "grad_norm": 3.265625, "learning_rate": 4.9979447195783304e-05, "loss": 0.9488, "step": 896 }, { "epoch": 0.015917963989594826, "grad_norm": 3.171875, "learning_rate": 4.997933381684202e-05, "loss": 0.9196, "step": 898 }, { "epoch": 0.015953416025206398, "grad_norm": 3.140625, "learning_rate": 4.997922012616385e-05, "loss": 0.9429, "step": 900 }, { "epoch": 0.01598886806081797, "grad_norm": 3.0625, "learning_rate": 4.997910612375022e-05, "loss": 0.9284, "step": 902 }, { "epoch": 0.016024320096429536, "grad_norm": 3.09375, "learning_rate": 4.997899180960255e-05, "loss": 0.9539, "step": 904 }, { "epoch": 0.016059772132041107, "grad_norm": 3.359375, "learning_rate": 4.997887718372226e-05, "loss": 0.9394, "step": 906 }, { "epoch": 0.016095224167652675, "grad_norm": 3.765625, "learning_rate": 4.997876224611079e-05, "loss": 0.9208, "step": 908 }, { "epoch": 0.016130676203264246, "grad_norm": 3.5, "learning_rate": 4.9978646996769563e-05, "loss": 0.9476, "step": 910 }, { "epoch": 0.016166128238875817, "grad_norm": 2.890625, "learning_rate": 4.997853143570003e-05, "loss": 0.9158, "step": 912 }, { "epoch": 0.016201580274487384, "grad_norm": 3.046875, "learning_rate": 4.997841556290362e-05, "loss": 0.9036, "step": 914 }, { "epoch": 0.016237032310098955, "grad_norm": 2.984375, "learning_rate": 4.99782993783818e-05, "loss": 0.8826, "step": 916 }, { "epoch": 0.016272484345710526, "grad_norm": 2.765625, "learning_rate": 4.997818288213599e-05, "loss": 0.906, "step": 918 }, { "epoch": 0.016307936381322094, "grad_norm": 3.109375, "learning_rate": 4.997806607416767e-05, "loss": 0.9132, "step": 920 }, { "epoch": 0.016343388416933665, "grad_norm": 2.84375, "learning_rate": 4.997794895447829e-05, "loss": 0.9249, "step": 922 }, { "epoch": 0.016378840452545236, "grad_norm": 3.46875, "learning_rate": 4.9977831523069305e-05, "loss": 0.942, "step": 924 }, { "epoch": 0.016414292488156804, "grad_norm": 3.25, "learning_rate": 4.9977713779942195e-05, "loss": 0.9376, "step": 926 }, { "epoch": 0.016449744523768375, "grad_norm": 3.15625, "learning_rate": 4.99775957250984e-05, "loss": 0.8812, "step": 928 }, { "epoch": 0.016485196559379942, "grad_norm": 3.234375, "learning_rate": 4.997747735853943e-05, "loss": 0.9415, "step": 930 }, { "epoch": 0.016520648594991513, "grad_norm": 3.3125, "learning_rate": 4.997735868026674e-05, "loss": 0.9229, "step": 932 }, { "epoch": 0.016556100630603084, "grad_norm": 2.984375, "learning_rate": 4.9977239690281816e-05, "loss": 0.9193, "step": 934 }, { "epoch": 0.016591552666214652, "grad_norm": 2.859375, "learning_rate": 4.9977120388586144e-05, "loss": 0.9391, "step": 936 }, { "epoch": 0.016627004701826223, "grad_norm": 3.1875, "learning_rate": 4.99770007751812e-05, "loss": 0.8891, "step": 938 }, { "epoch": 0.016662456737437794, "grad_norm": 2.828125, "learning_rate": 4.99768808500685e-05, "loss": 0.9539, "step": 940 }, { "epoch": 0.01669790877304936, "grad_norm": 3.140625, "learning_rate": 4.997676061324953e-05, "loss": 0.9205, "step": 942 }, { "epoch": 0.016733360808660933, "grad_norm": 3.25, "learning_rate": 4.997664006472579e-05, "loss": 0.9477, "step": 944 }, { "epoch": 0.016768812844272504, "grad_norm": 2.96875, "learning_rate": 4.997651920449878e-05, "loss": 0.9004, "step": 946 }, { "epoch": 0.01680426487988407, "grad_norm": 2.96875, "learning_rate": 4.9976398032570015e-05, "loss": 0.8733, "step": 948 }, { "epoch": 0.016839716915495642, "grad_norm": 3.015625, "learning_rate": 4.9976276548941005e-05, "loss": 0.9192, "step": 950 }, { "epoch": 0.01687516895110721, "grad_norm": 2.96875, "learning_rate": 4.997615475361327e-05, "loss": 0.9484, "step": 952 }, { "epoch": 0.01691062098671878, "grad_norm": 3.078125, "learning_rate": 4.997603264658832e-05, "loss": 0.8731, "step": 954 }, { "epoch": 0.016946073022330352, "grad_norm": 3.65625, "learning_rate": 4.997591022786768e-05, "loss": 0.9159, "step": 956 }, { "epoch": 0.01698152505794192, "grad_norm": 2.953125, "learning_rate": 4.997578749745288e-05, "loss": 0.9644, "step": 958 }, { "epoch": 0.01701697709355349, "grad_norm": 3.0, "learning_rate": 4.997566445534547e-05, "loss": 0.9076, "step": 960 }, { "epoch": 0.01705242912916506, "grad_norm": 2.6875, "learning_rate": 4.997554110154696e-05, "loss": 0.9178, "step": 962 }, { "epoch": 0.01708788116477663, "grad_norm": 3.34375, "learning_rate": 4.99754174360589e-05, "loss": 0.935, "step": 964 }, { "epoch": 0.0171233332003882, "grad_norm": 2.65625, "learning_rate": 4.997529345888284e-05, "loss": 0.9035, "step": 966 }, { "epoch": 0.01715878523599977, "grad_norm": 3.046875, "learning_rate": 4.9975169170020306e-05, "loss": 0.8921, "step": 968 }, { "epoch": 0.01719423727161134, "grad_norm": 3.3125, "learning_rate": 4.997504456947287e-05, "loss": 0.8597, "step": 970 }, { "epoch": 0.01722968930722291, "grad_norm": 3.703125, "learning_rate": 4.997491965724208e-05, "loss": 0.9268, "step": 972 }, { "epoch": 0.017265141342834477, "grad_norm": 3.21875, "learning_rate": 4.99747944333295e-05, "loss": 0.9444, "step": 974 }, { "epoch": 0.01730059337844605, "grad_norm": 3.40625, "learning_rate": 4.997466889773668e-05, "loss": 0.9268, "step": 976 }, { "epoch": 0.01733604541405762, "grad_norm": 2.96875, "learning_rate": 4.997454305046519e-05, "loss": 0.9425, "step": 978 }, { "epoch": 0.017371497449669187, "grad_norm": 3.0, "learning_rate": 4.9974416891516615e-05, "loss": 0.9128, "step": 980 }, { "epoch": 0.017406949485280758, "grad_norm": 3.140625, "learning_rate": 4.9974290420892514e-05, "loss": 0.9078, "step": 982 }, { "epoch": 0.01744240152089233, "grad_norm": 3.203125, "learning_rate": 4.9974163638594475e-05, "loss": 0.9496, "step": 984 }, { "epoch": 0.017477853556503897, "grad_norm": 2.9375, "learning_rate": 4.9974036544624063e-05, "loss": 0.9128, "step": 986 }, { "epoch": 0.017513305592115468, "grad_norm": 3.046875, "learning_rate": 4.997390913898289e-05, "loss": 0.8829, "step": 988 }, { "epoch": 0.01754875762772704, "grad_norm": 2.71875, "learning_rate": 4.997378142167253e-05, "loss": 0.8951, "step": 990 }, { "epoch": 0.017584209663338606, "grad_norm": 2.875, "learning_rate": 4.997365339269457e-05, "loss": 0.9845, "step": 992 }, { "epoch": 0.017619661698950177, "grad_norm": 3.34375, "learning_rate": 4.997352505205062e-05, "loss": 0.8814, "step": 994 }, { "epoch": 0.017655113734561745, "grad_norm": 3.546875, "learning_rate": 4.997339639974229e-05, "loss": 0.9382, "step": 996 }, { "epoch": 0.017690565770173316, "grad_norm": 2.609375, "learning_rate": 4.9973267435771165e-05, "loss": 0.9112, "step": 998 }, { "epoch": 0.017726017805784887, "grad_norm": 3.03125, "learning_rate": 4.9973138160138865e-05, "loss": 0.886, "step": 1000 }, { "epoch": 0.017761469841396454, "grad_norm": 2.875, "learning_rate": 4.9973008572847e-05, "loss": 0.9003, "step": 1002 }, { "epoch": 0.017796921877008025, "grad_norm": 2.921875, "learning_rate": 4.9972878673897194e-05, "loss": 0.9066, "step": 1004 }, { "epoch": 0.017832373912619597, "grad_norm": 2.765625, "learning_rate": 4.997274846329106e-05, "loss": 0.8871, "step": 1006 }, { "epoch": 0.017867825948231164, "grad_norm": 2.6875, "learning_rate": 4.9972617941030236e-05, "loss": 0.8963, "step": 1008 }, { "epoch": 0.017903277983842735, "grad_norm": 3.15625, "learning_rate": 4.9972487107116336e-05, "loss": 0.8911, "step": 1010 }, { "epoch": 0.017938730019454306, "grad_norm": 3.265625, "learning_rate": 4.9972355961550995e-05, "loss": 0.9564, "step": 1012 }, { "epoch": 0.017974182055065874, "grad_norm": 3.40625, "learning_rate": 4.997222450433585e-05, "loss": 0.9578, "step": 1014 }, { "epoch": 0.018009634090677445, "grad_norm": 3.046875, "learning_rate": 4.9972092735472556e-05, "loss": 0.9335, "step": 1016 }, { "epoch": 0.018045086126289012, "grad_norm": 3.34375, "learning_rate": 4.997196065496274e-05, "loss": 0.9272, "step": 1018 }, { "epoch": 0.018080538161900583, "grad_norm": 2.921875, "learning_rate": 4.997182826280805e-05, "loss": 0.9273, "step": 1020 }, { "epoch": 0.018115990197512154, "grad_norm": 2.796875, "learning_rate": 4.9971695559010155e-05, "loss": 0.8782, "step": 1022 }, { "epoch": 0.018151442233123722, "grad_norm": 3.03125, "learning_rate": 4.997156254357069e-05, "loss": 0.935, "step": 1024 }, { "epoch": 0.018186894268735293, "grad_norm": 3.671875, "learning_rate": 4.997142921649134e-05, "loss": 0.9054, "step": 1026 }, { "epoch": 0.018222346304346864, "grad_norm": 3.25, "learning_rate": 4.997129557777375e-05, "loss": 0.911, "step": 1028 }, { "epoch": 0.01825779833995843, "grad_norm": 2.71875, "learning_rate": 4.9971161627419585e-05, "loss": 0.9384, "step": 1030 }, { "epoch": 0.018293250375570003, "grad_norm": 3.390625, "learning_rate": 4.9971027365430526e-05, "loss": 0.9084, "step": 1032 }, { "epoch": 0.018328702411181574, "grad_norm": 2.921875, "learning_rate": 4.997089279180825e-05, "loss": 0.8903, "step": 1034 }, { "epoch": 0.01836415444679314, "grad_norm": 2.8125, "learning_rate": 4.997075790655443e-05, "loss": 0.9273, "step": 1036 }, { "epoch": 0.018399606482404712, "grad_norm": 2.65625, "learning_rate": 4.997062270967075e-05, "loss": 0.9002, "step": 1038 }, { "epoch": 0.01843505851801628, "grad_norm": 3.21875, "learning_rate": 4.9970487201158903e-05, "loss": 0.887, "step": 1040 }, { "epoch": 0.01847051055362785, "grad_norm": 3.265625, "learning_rate": 4.997035138102058e-05, "loss": 0.9246, "step": 1042 }, { "epoch": 0.018505962589239422, "grad_norm": 2.90625, "learning_rate": 4.997021524925747e-05, "loss": 0.9138, "step": 1044 }, { "epoch": 0.01854141462485099, "grad_norm": 3.140625, "learning_rate": 4.997007880587127e-05, "loss": 0.9084, "step": 1046 }, { "epoch": 0.01857686666046256, "grad_norm": 3.28125, "learning_rate": 4.99699420508637e-05, "loss": 0.9483, "step": 1048 }, { "epoch": 0.01861231869607413, "grad_norm": 3.09375, "learning_rate": 4.996980498423644e-05, "loss": 0.9521, "step": 1050 }, { "epoch": 0.0186477707316857, "grad_norm": 3.109375, "learning_rate": 4.996966760599122e-05, "loss": 0.9273, "step": 1052 }, { "epoch": 0.01868322276729727, "grad_norm": 2.875, "learning_rate": 4.996952991612975e-05, "loss": 0.8884, "step": 1054 }, { "epoch": 0.01871867480290884, "grad_norm": 2.640625, "learning_rate": 4.996939191465375e-05, "loss": 0.9442, "step": 1056 }, { "epoch": 0.01875412683852041, "grad_norm": 2.890625, "learning_rate": 4.9969253601564935e-05, "loss": 0.9197, "step": 1058 }, { "epoch": 0.01878957887413198, "grad_norm": 3.265625, "learning_rate": 4.996911497686503e-05, "loss": 0.9621, "step": 1060 }, { "epoch": 0.018825030909743547, "grad_norm": 2.953125, "learning_rate": 4.9968976040555785e-05, "loss": 0.9149, "step": 1062 }, { "epoch": 0.01886048294535512, "grad_norm": 3.96875, "learning_rate": 4.9968836792638904e-05, "loss": 0.9447, "step": 1064 }, { "epoch": 0.01889593498096669, "grad_norm": 3.046875, "learning_rate": 4.996869723311616e-05, "loss": 0.9461, "step": 1066 }, { "epoch": 0.018931387016578257, "grad_norm": 2.671875, "learning_rate": 4.996855736198926e-05, "loss": 0.9095, "step": 1068 }, { "epoch": 0.018966839052189828, "grad_norm": 3.21875, "learning_rate": 4.996841717925997e-05, "loss": 0.9161, "step": 1070 }, { "epoch": 0.0190022910878014, "grad_norm": 3.03125, "learning_rate": 4.996827668493003e-05, "loss": 0.9484, "step": 1072 }, { "epoch": 0.019037743123412967, "grad_norm": 3.421875, "learning_rate": 4.99681358790012e-05, "loss": 0.9372, "step": 1074 }, { "epoch": 0.019073195159024538, "grad_norm": 2.65625, "learning_rate": 4.996799476147524e-05, "loss": 0.9262, "step": 1076 }, { "epoch": 0.019108647194636105, "grad_norm": 3.234375, "learning_rate": 4.99678533323539e-05, "loss": 0.9125, "step": 1078 }, { "epoch": 0.019144099230247676, "grad_norm": 2.828125, "learning_rate": 4.996771159163895e-05, "loss": 0.8768, "step": 1080 }, { "epoch": 0.019179551265859247, "grad_norm": 3.046875, "learning_rate": 4.996756953933216e-05, "loss": 0.9753, "step": 1082 }, { "epoch": 0.019215003301470815, "grad_norm": 2.953125, "learning_rate": 4.996742717543531e-05, "loss": 0.8453, "step": 1084 }, { "epoch": 0.019250455337082386, "grad_norm": 3.140625, "learning_rate": 4.996728449995016e-05, "loss": 0.96, "step": 1086 }, { "epoch": 0.019285907372693957, "grad_norm": 3.34375, "learning_rate": 4.99671415128785e-05, "loss": 0.9682, "step": 1088 }, { "epoch": 0.019321359408305525, "grad_norm": 3.640625, "learning_rate": 4.996699821422212e-05, "loss": 0.922, "step": 1090 }, { "epoch": 0.019356811443917096, "grad_norm": 3.125, "learning_rate": 4.99668546039828e-05, "loss": 0.8806, "step": 1092 }, { "epoch": 0.019392263479528667, "grad_norm": 3.046875, "learning_rate": 4.996671068216233e-05, "loss": 0.8965, "step": 1094 }, { "epoch": 0.019427715515140234, "grad_norm": 2.890625, "learning_rate": 4.996656644876252e-05, "loss": 0.9415, "step": 1096 }, { "epoch": 0.019463167550751805, "grad_norm": 2.9375, "learning_rate": 4.996642190378515e-05, "loss": 0.9455, "step": 1098 }, { "epoch": 0.019498619586363373, "grad_norm": 3.375, "learning_rate": 4.996627704723205e-05, "loss": 0.9358, "step": 1100 }, { "epoch": 0.019534071621974944, "grad_norm": 3.359375, "learning_rate": 4.9966131879105003e-05, "loss": 0.9367, "step": 1102 }, { "epoch": 0.019569523657586515, "grad_norm": 3.0, "learning_rate": 4.996598639940583e-05, "loss": 0.9078, "step": 1104 }, { "epoch": 0.019604975693198082, "grad_norm": 3.4375, "learning_rate": 4.996584060813635e-05, "loss": 0.9562, "step": 1106 }, { "epoch": 0.019640427728809653, "grad_norm": 3.421875, "learning_rate": 4.996569450529838e-05, "loss": 0.9156, "step": 1108 }, { "epoch": 0.019675879764421225, "grad_norm": 3.109375, "learning_rate": 4.996554809089373e-05, "loss": 0.9195, "step": 1110 }, { "epoch": 0.019711331800032792, "grad_norm": 2.84375, "learning_rate": 4.9965401364924254e-05, "loss": 0.8973, "step": 1112 }, { "epoch": 0.019746783835644363, "grad_norm": 3.375, "learning_rate": 4.996525432739176e-05, "loss": 0.9351, "step": 1114 }, { "epoch": 0.019782235871255934, "grad_norm": 3.15625, "learning_rate": 4.99651069782981e-05, "loss": 0.9017, "step": 1116 }, { "epoch": 0.0198176879068675, "grad_norm": 2.90625, "learning_rate": 4.99649593176451e-05, "loss": 0.9318, "step": 1118 }, { "epoch": 0.019853139942479073, "grad_norm": 3.25, "learning_rate": 4.996481134543461e-05, "loss": 0.8764, "step": 1120 }, { "epoch": 0.01988859197809064, "grad_norm": 3.390625, "learning_rate": 4.996466306166847e-05, "loss": 0.9343, "step": 1122 }, { "epoch": 0.01992404401370221, "grad_norm": 3.265625, "learning_rate": 4.996451446634854e-05, "loss": 0.9393, "step": 1124 }, { "epoch": 0.019959496049313782, "grad_norm": 2.75, "learning_rate": 4.996436555947667e-05, "loss": 0.9263, "step": 1126 }, { "epoch": 0.01999494808492535, "grad_norm": 3.25, "learning_rate": 4.996421634105471e-05, "loss": 0.9262, "step": 1128 }, { "epoch": 0.02003040012053692, "grad_norm": 3.265625, "learning_rate": 4.996406681108453e-05, "loss": 0.9101, "step": 1130 }, { "epoch": 0.020065852156148492, "grad_norm": 3.046875, "learning_rate": 4.9963916969568e-05, "loss": 0.9742, "step": 1132 }, { "epoch": 0.02010130419176006, "grad_norm": 3.046875, "learning_rate": 4.996376681650698e-05, "loss": 0.9335, "step": 1134 }, { "epoch": 0.02013675622737163, "grad_norm": 3.34375, "learning_rate": 4.996361635190336e-05, "loss": 0.9756, "step": 1136 }, { "epoch": 0.0201722082629832, "grad_norm": 3.4375, "learning_rate": 4.9963465575759006e-05, "loss": 0.9531, "step": 1138 }, { "epoch": 0.02020766029859477, "grad_norm": 2.78125, "learning_rate": 4.9963314488075795e-05, "loss": 0.8895, "step": 1140 }, { "epoch": 0.02024311233420634, "grad_norm": 2.75, "learning_rate": 4.996316308885562e-05, "loss": 0.8957, "step": 1142 }, { "epoch": 0.020278564369817908, "grad_norm": 2.96875, "learning_rate": 4.9963011378100376e-05, "loss": 0.9058, "step": 1144 }, { "epoch": 0.02031401640542948, "grad_norm": 3.0, "learning_rate": 4.996285935581194e-05, "loss": 0.9273, "step": 1146 }, { "epoch": 0.02034946844104105, "grad_norm": 3.5625, "learning_rate": 4.996270702199223e-05, "loss": 0.9218, "step": 1148 }, { "epoch": 0.020384920476652617, "grad_norm": 2.921875, "learning_rate": 4.996255437664312e-05, "loss": 0.9251, "step": 1150 }, { "epoch": 0.02042037251226419, "grad_norm": 3.109375, "learning_rate": 4.996240141976654e-05, "loss": 0.892, "step": 1152 }, { "epoch": 0.02045582454787576, "grad_norm": 3.203125, "learning_rate": 4.996224815136439e-05, "loss": 0.9465, "step": 1154 }, { "epoch": 0.020491276583487327, "grad_norm": 3.1875, "learning_rate": 4.996209457143858e-05, "loss": 0.9364, "step": 1156 }, { "epoch": 0.020526728619098898, "grad_norm": 3.03125, "learning_rate": 4.996194067999103e-05, "loss": 0.9111, "step": 1158 }, { "epoch": 0.02056218065471047, "grad_norm": 3.546875, "learning_rate": 4.996178647702366e-05, "loss": 0.8519, "step": 1160 }, { "epoch": 0.020597632690322037, "grad_norm": 2.890625, "learning_rate": 4.996163196253839e-05, "loss": 0.976, "step": 1162 }, { "epoch": 0.020633084725933608, "grad_norm": 2.8125, "learning_rate": 4.996147713653716e-05, "loss": 0.8858, "step": 1164 }, { "epoch": 0.020668536761545175, "grad_norm": 2.875, "learning_rate": 4.9961321999021886e-05, "loss": 0.926, "step": 1166 }, { "epoch": 0.020703988797156746, "grad_norm": 3.0, "learning_rate": 4.996116654999452e-05, "loss": 0.9177, "step": 1168 }, { "epoch": 0.020739440832768317, "grad_norm": 2.859375, "learning_rate": 4.996101078945699e-05, "loss": 0.9003, "step": 1170 }, { "epoch": 0.020774892868379885, "grad_norm": 2.71875, "learning_rate": 4.9960854717411243e-05, "loss": 0.9368, "step": 1172 }, { "epoch": 0.020810344903991456, "grad_norm": 2.890625, "learning_rate": 4.9960698333859234e-05, "loss": 0.9016, "step": 1174 }, { "epoch": 0.020845796939603027, "grad_norm": 3.1875, "learning_rate": 4.9960541638802903e-05, "loss": 0.8804, "step": 1176 }, { "epoch": 0.020881248975214595, "grad_norm": 3.109375, "learning_rate": 4.9960384632244216e-05, "loss": 0.9707, "step": 1178 }, { "epoch": 0.020916701010826166, "grad_norm": 2.859375, "learning_rate": 4.9960227314185124e-05, "loss": 0.8634, "step": 1180 }, { "epoch": 0.020952153046437737, "grad_norm": 2.84375, "learning_rate": 4.99600696846276e-05, "loss": 0.9332, "step": 1182 }, { "epoch": 0.020987605082049304, "grad_norm": 3.125, "learning_rate": 4.99599117435736e-05, "loss": 0.9643, "step": 1184 }, { "epoch": 0.021023057117660875, "grad_norm": 3.421875, "learning_rate": 4.9959753491025095e-05, "loss": 0.888, "step": 1186 }, { "epoch": 0.021058509153272443, "grad_norm": 2.859375, "learning_rate": 4.9959594926984074e-05, "loss": 0.9664, "step": 1188 }, { "epoch": 0.021093961188884014, "grad_norm": 3.109375, "learning_rate": 4.99594360514525e-05, "loss": 0.9423, "step": 1190 }, { "epoch": 0.021129413224495585, "grad_norm": 2.796875, "learning_rate": 4.995927686443237e-05, "loss": 0.8912, "step": 1192 }, { "epoch": 0.021164865260107153, "grad_norm": 3.359375, "learning_rate": 4.9959117365925654e-05, "loss": 0.9002, "step": 1194 }, { "epoch": 0.021200317295718724, "grad_norm": 3.296875, "learning_rate": 4.995895755593436e-05, "loss": 0.9246, "step": 1196 }, { "epoch": 0.021235769331330295, "grad_norm": 2.90625, "learning_rate": 4.995879743446047e-05, "loss": 0.8685, "step": 1198 }, { "epoch": 0.021271221366941862, "grad_norm": 2.8125, "learning_rate": 4.995863700150599e-05, "loss": 0.9362, "step": 1200 }, { "epoch": 0.021306673402553433, "grad_norm": 2.8125, "learning_rate": 4.9958476257072914e-05, "loss": 0.9406, "step": 1202 }, { "epoch": 0.021342125438165004, "grad_norm": 3.0, "learning_rate": 4.995831520116326e-05, "loss": 0.9096, "step": 1204 }, { "epoch": 0.021377577473776572, "grad_norm": 3.296875, "learning_rate": 4.9958153833779027e-05, "loss": 0.8959, "step": 1206 }, { "epoch": 0.021413029509388143, "grad_norm": 2.828125, "learning_rate": 4.995799215492223e-05, "loss": 0.8913, "step": 1208 }, { "epoch": 0.02144848154499971, "grad_norm": 3.203125, "learning_rate": 4.9957830164594893e-05, "loss": 0.9452, "step": 1210 }, { "epoch": 0.02148393358061128, "grad_norm": 2.84375, "learning_rate": 4.995766786279903e-05, "loss": 0.9203, "step": 1212 }, { "epoch": 0.021519385616222853, "grad_norm": 2.59375, "learning_rate": 4.9957505249536676e-05, "loss": 0.9122, "step": 1214 }, { "epoch": 0.02155483765183442, "grad_norm": 2.640625, "learning_rate": 4.995734232480985e-05, "loss": 0.8905, "step": 1216 }, { "epoch": 0.02159028968744599, "grad_norm": 2.859375, "learning_rate": 4.995717908862059e-05, "loss": 0.9156, "step": 1218 }, { "epoch": 0.021625741723057562, "grad_norm": 3.03125, "learning_rate": 4.995701554097094e-05, "loss": 0.9158, "step": 1220 }, { "epoch": 0.02166119375866913, "grad_norm": 2.96875, "learning_rate": 4.995685168186293e-05, "loss": 0.8903, "step": 1222 }, { "epoch": 0.0216966457942807, "grad_norm": 3.453125, "learning_rate": 4.9956687511298604e-05, "loss": 0.9174, "step": 1224 }, { "epoch": 0.021732097829892272, "grad_norm": 3.234375, "learning_rate": 4.995652302928002e-05, "loss": 0.9353, "step": 1226 }, { "epoch": 0.02176754986550384, "grad_norm": 2.8125, "learning_rate": 4.995635823580922e-05, "loss": 0.9103, "step": 1228 }, { "epoch": 0.02180300190111541, "grad_norm": 3.390625, "learning_rate": 4.9956193130888276e-05, "loss": 0.9258, "step": 1230 }, { "epoch": 0.021838453936726978, "grad_norm": 3.1875, "learning_rate": 4.995602771451924e-05, "loss": 0.8866, "step": 1232 }, { "epoch": 0.02187390597233855, "grad_norm": 3.1875, "learning_rate": 4.9955861986704175e-05, "loss": 0.9572, "step": 1234 }, { "epoch": 0.02190935800795012, "grad_norm": 2.9375, "learning_rate": 4.9955695947445145e-05, "loss": 0.9017, "step": 1236 }, { "epoch": 0.021944810043561688, "grad_norm": 3.046875, "learning_rate": 4.995552959674423e-05, "loss": 0.9137, "step": 1238 }, { "epoch": 0.02198026207917326, "grad_norm": 2.875, "learning_rate": 4.995536293460351e-05, "loss": 0.8681, "step": 1240 }, { "epoch": 0.02201571411478483, "grad_norm": 3.34375, "learning_rate": 4.995519596102506e-05, "loss": 0.9348, "step": 1242 }, { "epoch": 0.022051166150396397, "grad_norm": 2.90625, "learning_rate": 4.995502867601095e-05, "loss": 0.9005, "step": 1244 }, { "epoch": 0.02208661818600797, "grad_norm": 3.296875, "learning_rate": 4.995486107956329e-05, "loss": 0.9338, "step": 1246 }, { "epoch": 0.02212207022161954, "grad_norm": 2.90625, "learning_rate": 4.995469317168415e-05, "loss": 0.8978, "step": 1248 }, { "epoch": 0.022157522257231107, "grad_norm": 2.828125, "learning_rate": 4.9954524952375646e-05, "loss": 0.8685, "step": 1250 }, { "epoch": 0.022192974292842678, "grad_norm": 2.75, "learning_rate": 4.995435642163987e-05, "loss": 0.9427, "step": 1252 }, { "epoch": 0.022228426328454245, "grad_norm": 3.015625, "learning_rate": 4.995418757947892e-05, "loss": 0.915, "step": 1254 }, { "epoch": 0.022263878364065817, "grad_norm": 2.71875, "learning_rate": 4.99540184258949e-05, "loss": 0.8801, "step": 1256 }, { "epoch": 0.022299330399677388, "grad_norm": 3.265625, "learning_rate": 4.995384896088994e-05, "loss": 0.9515, "step": 1258 }, { "epoch": 0.022334782435288955, "grad_norm": 2.96875, "learning_rate": 4.995367918446613e-05, "loss": 0.9193, "step": 1260 }, { "epoch": 0.022370234470900526, "grad_norm": 2.9375, "learning_rate": 4.995350909662561e-05, "loss": 0.9507, "step": 1262 }, { "epoch": 0.022405686506512097, "grad_norm": 2.953125, "learning_rate": 4.995333869737049e-05, "loss": 0.8967, "step": 1264 }, { "epoch": 0.022441138542123665, "grad_norm": 2.671875, "learning_rate": 4.9953167986702905e-05, "loss": 0.9278, "step": 1266 }, { "epoch": 0.022476590577735236, "grad_norm": 2.984375, "learning_rate": 4.9952996964624976e-05, "loss": 0.8715, "step": 1268 }, { "epoch": 0.022512042613346807, "grad_norm": 3.734375, "learning_rate": 4.995282563113885e-05, "loss": 0.8882, "step": 1270 }, { "epoch": 0.022547494648958374, "grad_norm": 2.90625, "learning_rate": 4.9952653986246646e-05, "loss": 0.9051, "step": 1272 }, { "epoch": 0.022582946684569945, "grad_norm": 3.109375, "learning_rate": 4.995248202995052e-05, "loss": 0.9401, "step": 1274 }, { "epoch": 0.022618398720181513, "grad_norm": 2.671875, "learning_rate": 4.9952309762252624e-05, "loss": 0.8906, "step": 1276 }, { "epoch": 0.022653850755793084, "grad_norm": 3.078125, "learning_rate": 4.995213718315509e-05, "loss": 0.9692, "step": 1278 }, { "epoch": 0.022689302791404655, "grad_norm": 3.140625, "learning_rate": 4.995196429266009e-05, "loss": 0.8744, "step": 1280 }, { "epoch": 0.022724754827016223, "grad_norm": 3.296875, "learning_rate": 4.995179109076976e-05, "loss": 0.8542, "step": 1282 }, { "epoch": 0.022760206862627794, "grad_norm": 2.8125, "learning_rate": 4.9951617577486285e-05, "loss": 0.8996, "step": 1284 }, { "epoch": 0.022795658898239365, "grad_norm": 2.953125, "learning_rate": 4.995144375281182e-05, "loss": 0.9078, "step": 1286 }, { "epoch": 0.022831110933850932, "grad_norm": 3.03125, "learning_rate": 4.9951269616748534e-05, "loss": 0.9386, "step": 1288 }, { "epoch": 0.022866562969462503, "grad_norm": 3.46875, "learning_rate": 4.995109516929859e-05, "loss": 0.8791, "step": 1290 }, { "epoch": 0.02290201500507407, "grad_norm": 2.84375, "learning_rate": 4.995092041046419e-05, "loss": 0.8965, "step": 1292 }, { "epoch": 0.022937467040685642, "grad_norm": 2.90625, "learning_rate": 4.995074534024748e-05, "loss": 0.9523, "step": 1294 }, { "epoch": 0.022972919076297213, "grad_norm": 2.78125, "learning_rate": 4.9950569958650684e-05, "loss": 0.9555, "step": 1296 }, { "epoch": 0.02300837111190878, "grad_norm": 3.546875, "learning_rate": 4.995039426567596e-05, "loss": 0.8951, "step": 1298 }, { "epoch": 0.02304382314752035, "grad_norm": 2.90625, "learning_rate": 4.995021826132552e-05, "loss": 0.8788, "step": 1300 }, { "epoch": 0.023079275183131923, "grad_norm": 3.09375, "learning_rate": 4.995004194560155e-05, "loss": 0.9304, "step": 1302 }, { "epoch": 0.02311472721874349, "grad_norm": 3.203125, "learning_rate": 4.9949865318506254e-05, "loss": 0.9267, "step": 1304 }, { "epoch": 0.02315017925435506, "grad_norm": 2.96875, "learning_rate": 4.994968838004184e-05, "loss": 0.8883, "step": 1306 }, { "epoch": 0.023185631289966632, "grad_norm": 2.890625, "learning_rate": 4.99495111302105e-05, "loss": 0.9201, "step": 1308 }, { "epoch": 0.0232210833255782, "grad_norm": 2.984375, "learning_rate": 4.9949333569014464e-05, "loss": 0.9868, "step": 1310 }, { "epoch": 0.02325653536118977, "grad_norm": 2.703125, "learning_rate": 4.994915569645594e-05, "loss": 0.8611, "step": 1312 }, { "epoch": 0.02329198739680134, "grad_norm": 2.65625, "learning_rate": 4.994897751253715e-05, "loss": 0.9253, "step": 1314 }, { "epoch": 0.02332743943241291, "grad_norm": 2.953125, "learning_rate": 4.9948799017260325e-05, "loss": 0.9014, "step": 1316 }, { "epoch": 0.02336289146802448, "grad_norm": 2.765625, "learning_rate": 4.994862021062767e-05, "loss": 0.9189, "step": 1318 }, { "epoch": 0.023398343503636048, "grad_norm": 3.390625, "learning_rate": 4.994844109264145e-05, "loss": 0.8979, "step": 1320 }, { "epoch": 0.02343379553924762, "grad_norm": 3.015625, "learning_rate": 4.994826166330386e-05, "loss": 0.8681, "step": 1322 }, { "epoch": 0.02346924757485919, "grad_norm": 2.671875, "learning_rate": 4.994808192261718e-05, "loss": 0.8864, "step": 1324 }, { "epoch": 0.023504699610470758, "grad_norm": 3.046875, "learning_rate": 4.994790187058363e-05, "loss": 0.9201, "step": 1326 }, { "epoch": 0.02354015164608233, "grad_norm": 3.0625, "learning_rate": 4.994772150720545e-05, "loss": 0.8995, "step": 1328 }, { "epoch": 0.0235756036816939, "grad_norm": 3.109375, "learning_rate": 4.9947540832484904e-05, "loss": 0.8978, "step": 1330 }, { "epoch": 0.023611055717305467, "grad_norm": 2.875, "learning_rate": 4.994735984642426e-05, "loss": 0.903, "step": 1332 }, { "epoch": 0.02364650775291704, "grad_norm": 2.875, "learning_rate": 4.9947178549025745e-05, "loss": 0.907, "step": 1334 }, { "epoch": 0.023681959788528606, "grad_norm": 2.984375, "learning_rate": 4.9946996940291644e-05, "loss": 0.8918, "step": 1336 }, { "epoch": 0.023717411824140177, "grad_norm": 2.984375, "learning_rate": 4.9946815020224215e-05, "loss": 0.8664, "step": 1338 }, { "epoch": 0.023752863859751748, "grad_norm": 3.265625, "learning_rate": 4.994663278882573e-05, "loss": 0.9018, "step": 1340 }, { "epoch": 0.023788315895363316, "grad_norm": 2.921875, "learning_rate": 4.994645024609847e-05, "loss": 0.875, "step": 1342 }, { "epoch": 0.023823767930974887, "grad_norm": 3.125, "learning_rate": 4.9946267392044696e-05, "loss": 0.9201, "step": 1344 }, { "epoch": 0.023859219966586458, "grad_norm": 3.234375, "learning_rate": 4.994608422666671e-05, "loss": 0.929, "step": 1346 }, { "epoch": 0.023894672002198025, "grad_norm": 3.34375, "learning_rate": 4.994590074996679e-05, "loss": 0.929, "step": 1348 }, { "epoch": 0.023930124037809596, "grad_norm": 2.859375, "learning_rate": 4.994571696194722e-05, "loss": 0.9022, "step": 1350 }, { "epoch": 0.023965576073421167, "grad_norm": 2.984375, "learning_rate": 4.99455328626103e-05, "loss": 0.8865, "step": 1352 }, { "epoch": 0.024001028109032735, "grad_norm": 2.84375, "learning_rate": 4.994534845195832e-05, "loss": 0.8835, "step": 1354 }, { "epoch": 0.024036480144644306, "grad_norm": 2.9375, "learning_rate": 4.99451637299936e-05, "loss": 0.9338, "step": 1356 }, { "epoch": 0.024071932180255873, "grad_norm": 3.0625, "learning_rate": 4.9944978696718416e-05, "loss": 0.9045, "step": 1358 }, { "epoch": 0.024107384215867445, "grad_norm": 2.625, "learning_rate": 4.99447933521351e-05, "loss": 0.9218, "step": 1360 }, { "epoch": 0.024142836251479016, "grad_norm": 3.09375, "learning_rate": 4.994460769624596e-05, "loss": 0.9163, "step": 1362 }, { "epoch": 0.024178288287090583, "grad_norm": 3.09375, "learning_rate": 4.994442172905331e-05, "loss": 0.8869, "step": 1364 }, { "epoch": 0.024213740322702154, "grad_norm": 3.046875, "learning_rate": 4.994423545055948e-05, "loss": 0.9001, "step": 1366 }, { "epoch": 0.024249192358313725, "grad_norm": 3.015625, "learning_rate": 4.994404886076678e-05, "loss": 0.9176, "step": 1368 }, { "epoch": 0.024284644393925293, "grad_norm": 3.109375, "learning_rate": 4.994386195967754e-05, "loss": 0.9031, "step": 1370 }, { "epoch": 0.024320096429536864, "grad_norm": 2.90625, "learning_rate": 4.994367474729411e-05, "loss": 0.8798, "step": 1372 }, { "epoch": 0.024355548465148435, "grad_norm": 3.40625, "learning_rate": 4.994348722361881e-05, "loss": 0.8774, "step": 1374 }, { "epoch": 0.024391000500760002, "grad_norm": 3.15625, "learning_rate": 4.9943299388653984e-05, "loss": 0.9432, "step": 1376 }, { "epoch": 0.024426452536371573, "grad_norm": 3.390625, "learning_rate": 4.9943111242401974e-05, "loss": 0.9392, "step": 1378 }, { "epoch": 0.02446190457198314, "grad_norm": 3.109375, "learning_rate": 4.994292278486514e-05, "loss": 0.8717, "step": 1380 }, { "epoch": 0.024497356607594712, "grad_norm": 3.046875, "learning_rate": 4.994273401604582e-05, "loss": 0.8945, "step": 1382 }, { "epoch": 0.024532808643206283, "grad_norm": 3.140625, "learning_rate": 4.994254493594637e-05, "loss": 0.9413, "step": 1384 }, { "epoch": 0.02456826067881785, "grad_norm": 2.625, "learning_rate": 4.994235554456916e-05, "loss": 0.889, "step": 1386 }, { "epoch": 0.02460371271442942, "grad_norm": 3.125, "learning_rate": 4.9942165841916545e-05, "loss": 0.9052, "step": 1388 }, { "epoch": 0.024639164750040993, "grad_norm": 2.796875, "learning_rate": 4.99419758279909e-05, "loss": 0.9182, "step": 1390 }, { "epoch": 0.02467461678565256, "grad_norm": 2.90625, "learning_rate": 4.9941785502794586e-05, "loss": 0.8578, "step": 1392 }, { "epoch": 0.02471006882126413, "grad_norm": 2.828125, "learning_rate": 4.994159486632999e-05, "loss": 0.8931, "step": 1394 }, { "epoch": 0.024745520856875702, "grad_norm": 3.21875, "learning_rate": 4.994140391859947e-05, "loss": 0.9515, "step": 1396 }, { "epoch": 0.02478097289248727, "grad_norm": 3.34375, "learning_rate": 4.994121265960544e-05, "loss": 0.9105, "step": 1398 }, { "epoch": 0.02481642492809884, "grad_norm": 3.171875, "learning_rate": 4.994102108935027e-05, "loss": 0.9351, "step": 1400 }, { "epoch": 0.02485187696371041, "grad_norm": 2.546875, "learning_rate": 4.9940829207836346e-05, "loss": 0.8385, "step": 1402 }, { "epoch": 0.02488732899932198, "grad_norm": 2.90625, "learning_rate": 4.994063701506607e-05, "loss": 0.8899, "step": 1404 }, { "epoch": 0.02492278103493355, "grad_norm": 2.875, "learning_rate": 4.994044451104184e-05, "loss": 0.9206, "step": 1406 }, { "epoch": 0.024958233070545118, "grad_norm": 3.015625, "learning_rate": 4.994025169576605e-05, "loss": 0.9093, "step": 1408 }, { "epoch": 0.02499368510615669, "grad_norm": 3.046875, "learning_rate": 4.9940058569241125e-05, "loss": 0.9178, "step": 1410 }, { "epoch": 0.02502913714176826, "grad_norm": 2.8125, "learning_rate": 4.993986513146945e-05, "loss": 0.8901, "step": 1412 }, { "epoch": 0.025064589177379828, "grad_norm": 2.796875, "learning_rate": 4.993967138245347e-05, "loss": 0.8781, "step": 1414 }, { "epoch": 0.0251000412129914, "grad_norm": 2.765625, "learning_rate": 4.9939477322195574e-05, "loss": 0.9092, "step": 1416 }, { "epoch": 0.02513549324860297, "grad_norm": 3.375, "learning_rate": 4.9939282950698195e-05, "loss": 0.9014, "step": 1418 }, { "epoch": 0.025170945284214537, "grad_norm": 3.53125, "learning_rate": 4.9939088267963763e-05, "loss": 0.8958, "step": 1420 }, { "epoch": 0.02520639731982611, "grad_norm": 3.078125, "learning_rate": 4.9938893273994706e-05, "loss": 0.918, "step": 1422 }, { "epoch": 0.025241849355437676, "grad_norm": 2.859375, "learning_rate": 4.9938697968793454e-05, "loss": 0.8702, "step": 1424 }, { "epoch": 0.025277301391049247, "grad_norm": 3.765625, "learning_rate": 4.9938502352362435e-05, "loss": 0.9012, "step": 1426 }, { "epoch": 0.025312753426660818, "grad_norm": 2.765625, "learning_rate": 4.9938306424704114e-05, "loss": 0.8525, "step": 1428 }, { "epoch": 0.025348205462272386, "grad_norm": 2.984375, "learning_rate": 4.993811018582092e-05, "loss": 0.909, "step": 1430 }, { "epoch": 0.025383657497883957, "grad_norm": 3.15625, "learning_rate": 4.993791363571531e-05, "loss": 0.8888, "step": 1432 }, { "epoch": 0.025419109533495528, "grad_norm": 2.96875, "learning_rate": 4.993771677438972e-05, "loss": 0.9036, "step": 1434 }, { "epoch": 0.025454561569107095, "grad_norm": 3.046875, "learning_rate": 4.993751960184663e-05, "loss": 0.8765, "step": 1436 }, { "epoch": 0.025490013604718666, "grad_norm": 3.546875, "learning_rate": 4.993732211808848e-05, "loss": 0.9343, "step": 1438 }, { "epoch": 0.025525465640330237, "grad_norm": 3.203125, "learning_rate": 4.993712432311775e-05, "loss": 0.8502, "step": 1440 }, { "epoch": 0.025560917675941805, "grad_norm": 3.046875, "learning_rate": 4.9936926216936905e-05, "loss": 0.9446, "step": 1442 }, { "epoch": 0.025596369711553376, "grad_norm": 3.015625, "learning_rate": 4.993672779954841e-05, "loss": 0.9214, "step": 1444 }, { "epoch": 0.025631821747164944, "grad_norm": 2.640625, "learning_rate": 4.993652907095475e-05, "loss": 0.8599, "step": 1446 }, { "epoch": 0.025667273782776515, "grad_norm": 2.9375, "learning_rate": 4.99363300311584e-05, "loss": 0.9155, "step": 1448 }, { "epoch": 0.025702725818388086, "grad_norm": 2.90625, "learning_rate": 4.993613068016184e-05, "loss": 0.9058, "step": 1450 }, { "epoch": 0.025738177853999653, "grad_norm": 2.9375, "learning_rate": 4.993593101796756e-05, "loss": 0.9138, "step": 1452 }, { "epoch": 0.025773629889611224, "grad_norm": 2.765625, "learning_rate": 4.993573104457806e-05, "loss": 0.888, "step": 1454 }, { "epoch": 0.025809081925222795, "grad_norm": 2.828125, "learning_rate": 4.993553075999584e-05, "loss": 0.9234, "step": 1456 }, { "epoch": 0.025844533960834363, "grad_norm": 3.015625, "learning_rate": 4.9935330164223376e-05, "loss": 0.91, "step": 1458 }, { "epoch": 0.025879985996445934, "grad_norm": 2.890625, "learning_rate": 4.993512925726319e-05, "loss": 0.9006, "step": 1460 }, { "epoch": 0.025915438032057505, "grad_norm": 2.890625, "learning_rate": 4.993492803911778e-05, "loss": 0.8324, "step": 1462 }, { "epoch": 0.025950890067669072, "grad_norm": 2.734375, "learning_rate": 4.993472650978968e-05, "loss": 0.8797, "step": 1464 }, { "epoch": 0.025986342103280644, "grad_norm": 2.859375, "learning_rate": 4.993452466928137e-05, "loss": 0.932, "step": 1466 }, { "epoch": 0.02602179413889221, "grad_norm": 3.203125, "learning_rate": 4.993432251759538e-05, "loss": 0.8812, "step": 1468 }, { "epoch": 0.026057246174503782, "grad_norm": 3.078125, "learning_rate": 4.993412005473425e-05, "loss": 0.9186, "step": 1470 }, { "epoch": 0.026092698210115353, "grad_norm": 3.359375, "learning_rate": 4.993391728070049e-05, "loss": 0.9155, "step": 1472 }, { "epoch": 0.02612815024572692, "grad_norm": 3.0625, "learning_rate": 4.993371419549664e-05, "loss": 0.8935, "step": 1474 }, { "epoch": 0.026163602281338492, "grad_norm": 2.875, "learning_rate": 4.9933510799125224e-05, "loss": 0.8947, "step": 1476 }, { "epoch": 0.026199054316950063, "grad_norm": 3.078125, "learning_rate": 4.9933307091588796e-05, "loss": 0.8763, "step": 1478 }, { "epoch": 0.02623450635256163, "grad_norm": 2.875, "learning_rate": 4.993310307288988e-05, "loss": 0.8889, "step": 1480 }, { "epoch": 0.0262699583881732, "grad_norm": 2.65625, "learning_rate": 4.993289874303103e-05, "loss": 0.8904, "step": 1482 }, { "epoch": 0.02630541042378477, "grad_norm": 3.328125, "learning_rate": 4.993269410201481e-05, "loss": 0.9644, "step": 1484 }, { "epoch": 0.02634086245939634, "grad_norm": 2.84375, "learning_rate": 4.993248914984375e-05, "loss": 0.9051, "step": 1486 }, { "epoch": 0.02637631449500791, "grad_norm": 2.8125, "learning_rate": 4.9932283886520413e-05, "loss": 0.9146, "step": 1488 }, { "epoch": 0.02641176653061948, "grad_norm": 2.609375, "learning_rate": 4.993207831204738e-05, "loss": 0.8888, "step": 1490 }, { "epoch": 0.02644721856623105, "grad_norm": 2.90625, "learning_rate": 4.9931872426427196e-05, "loss": 0.9044, "step": 1492 }, { "epoch": 0.02648267060184262, "grad_norm": 2.875, "learning_rate": 4.9931666229662435e-05, "loss": 0.955, "step": 1494 }, { "epoch": 0.02651812263745419, "grad_norm": 3.046875, "learning_rate": 4.993145972175567e-05, "loss": 0.9151, "step": 1496 }, { "epoch": 0.02655357467306576, "grad_norm": 3.09375, "learning_rate": 4.993125290270949e-05, "loss": 0.917, "step": 1498 }, { "epoch": 0.02658902670867733, "grad_norm": 3.25, "learning_rate": 4.993104577252646e-05, "loss": 0.9137, "step": 1500 }, { "epoch": 0.026624478744288898, "grad_norm": 2.71875, "learning_rate": 4.993083833120917e-05, "loss": 0.9038, "step": 1502 }, { "epoch": 0.02665993077990047, "grad_norm": 2.765625, "learning_rate": 4.993063057876022e-05, "loss": 0.9221, "step": 1504 }, { "epoch": 0.026695382815512037, "grad_norm": 3.3125, "learning_rate": 4.993042251518218e-05, "loss": 0.8918, "step": 1506 }, { "epoch": 0.026730834851123608, "grad_norm": 3.15625, "learning_rate": 4.993021414047767e-05, "loss": 0.9025, "step": 1508 }, { "epoch": 0.02676628688673518, "grad_norm": 3.0625, "learning_rate": 4.9930005454649276e-05, "loss": 0.8823, "step": 1510 }, { "epoch": 0.026801738922346746, "grad_norm": 2.96875, "learning_rate": 4.9929796457699606e-05, "loss": 0.8694, "step": 1512 }, { "epoch": 0.026837190957958317, "grad_norm": 3.03125, "learning_rate": 4.9929587149631265e-05, "loss": 0.9034, "step": 1514 }, { "epoch": 0.026872642993569888, "grad_norm": 3.171875, "learning_rate": 4.9929377530446876e-05, "loss": 0.9044, "step": 1516 }, { "epoch": 0.026908095029181456, "grad_norm": 2.859375, "learning_rate": 4.992916760014904e-05, "loss": 0.9068, "step": 1518 }, { "epoch": 0.026943547064793027, "grad_norm": 2.609375, "learning_rate": 4.992895735874039e-05, "loss": 0.8867, "step": 1520 }, { "epoch": 0.026978999100404598, "grad_norm": 2.90625, "learning_rate": 4.9928746806223545e-05, "loss": 0.8914, "step": 1522 }, { "epoch": 0.027014451136016165, "grad_norm": 3.046875, "learning_rate": 4.992853594260114e-05, "loss": 0.9118, "step": 1524 }, { "epoch": 0.027049903171627736, "grad_norm": 2.65625, "learning_rate": 4.992832476787579e-05, "loss": 0.8973, "step": 1526 }, { "epoch": 0.027085355207239304, "grad_norm": 2.828125, "learning_rate": 4.992811328205013e-05, "loss": 0.9082, "step": 1528 }, { "epoch": 0.027120807242850875, "grad_norm": 2.765625, "learning_rate": 4.992790148512682e-05, "loss": 0.8552, "step": 1530 }, { "epoch": 0.027156259278462446, "grad_norm": 3.25, "learning_rate": 4.992768937710849e-05, "loss": 0.9192, "step": 1532 }, { "epoch": 0.027191711314074014, "grad_norm": 2.984375, "learning_rate": 4.992747695799779e-05, "loss": 0.9207, "step": 1534 }, { "epoch": 0.027227163349685585, "grad_norm": 2.703125, "learning_rate": 4.992726422779737e-05, "loss": 0.9069, "step": 1536 }, { "epoch": 0.027262615385297156, "grad_norm": 2.890625, "learning_rate": 4.9927051186509876e-05, "loss": 0.8553, "step": 1538 }, { "epoch": 0.027298067420908723, "grad_norm": 2.96875, "learning_rate": 4.992683783413798e-05, "loss": 0.9094, "step": 1540 }, { "epoch": 0.027333519456520294, "grad_norm": 2.921875, "learning_rate": 4.9926624170684345e-05, "loss": 0.9098, "step": 1542 }, { "epoch": 0.027368971492131865, "grad_norm": 2.921875, "learning_rate": 4.9926410196151625e-05, "loss": 0.8634, "step": 1544 }, { "epoch": 0.027404423527743433, "grad_norm": 3.484375, "learning_rate": 4.99261959105425e-05, "loss": 0.9041, "step": 1546 }, { "epoch": 0.027439875563355004, "grad_norm": 2.953125, "learning_rate": 4.992598131385964e-05, "loss": 0.8923, "step": 1548 }, { "epoch": 0.02747532759896657, "grad_norm": 2.921875, "learning_rate": 4.992576640610572e-05, "loss": 0.9115, "step": 1550 }, { "epoch": 0.027510779634578143, "grad_norm": 2.5625, "learning_rate": 4.992555118728344e-05, "loss": 0.8475, "step": 1552 }, { "epoch": 0.027546231670189714, "grad_norm": 3.1875, "learning_rate": 4.992533565739547e-05, "loss": 0.9031, "step": 1554 }, { "epoch": 0.02758168370580128, "grad_norm": 3.0625, "learning_rate": 4.99251198164445e-05, "loss": 0.8947, "step": 1556 }, { "epoch": 0.027617135741412852, "grad_norm": 2.625, "learning_rate": 4.992490366443322e-05, "loss": 0.8605, "step": 1558 }, { "epoch": 0.027652587777024423, "grad_norm": 3.0625, "learning_rate": 4.992468720136434e-05, "loss": 0.8773, "step": 1560 }, { "epoch": 0.02768803981263599, "grad_norm": 3.046875, "learning_rate": 4.9924470427240556e-05, "loss": 0.9447, "step": 1562 }, { "epoch": 0.027723491848247562, "grad_norm": 2.484375, "learning_rate": 4.992425334206457e-05, "loss": 0.9087, "step": 1564 }, { "epoch": 0.027758943883859133, "grad_norm": 2.890625, "learning_rate": 4.992403594583909e-05, "loss": 0.9347, "step": 1566 }, { "epoch": 0.0277943959194707, "grad_norm": 2.96875, "learning_rate": 4.9923818238566844e-05, "loss": 0.894, "step": 1568 }, { "epoch": 0.02782984795508227, "grad_norm": 3.25, "learning_rate": 4.9923600220250526e-05, "loss": 0.855, "step": 1570 }, { "epoch": 0.02786529999069384, "grad_norm": 3.1875, "learning_rate": 4.9923381890892874e-05, "loss": 0.8794, "step": 1572 }, { "epoch": 0.02790075202630541, "grad_norm": 2.78125, "learning_rate": 4.9923163250496606e-05, "loss": 0.8898, "step": 1574 }, { "epoch": 0.02793620406191698, "grad_norm": 3.25, "learning_rate": 4.992294429906445e-05, "loss": 0.9033, "step": 1576 }, { "epoch": 0.02797165609752855, "grad_norm": 2.703125, "learning_rate": 4.9922725036599146e-05, "loss": 0.9061, "step": 1578 }, { "epoch": 0.02800710813314012, "grad_norm": 2.9375, "learning_rate": 4.992250546310342e-05, "loss": 0.8935, "step": 1580 }, { "epoch": 0.02804256016875169, "grad_norm": 2.890625, "learning_rate": 4.992228557858002e-05, "loss": 0.8952, "step": 1582 }, { "epoch": 0.02807801220436326, "grad_norm": 2.984375, "learning_rate": 4.992206538303168e-05, "loss": 0.8433, "step": 1584 }, { "epoch": 0.02811346423997483, "grad_norm": 2.859375, "learning_rate": 4.992184487646116e-05, "loss": 0.8904, "step": 1586 }, { "epoch": 0.0281489162755864, "grad_norm": 2.921875, "learning_rate": 4.992162405887121e-05, "loss": 0.8927, "step": 1588 }, { "epoch": 0.028184368311197968, "grad_norm": 3.046875, "learning_rate": 4.992140293026458e-05, "loss": 0.908, "step": 1590 }, { "epoch": 0.02821982034680954, "grad_norm": 3.109375, "learning_rate": 4.992118149064403e-05, "loss": 0.8945, "step": 1592 }, { "epoch": 0.028255272382421107, "grad_norm": 3.0625, "learning_rate": 4.9920959740012326e-05, "loss": 0.9162, "step": 1594 }, { "epoch": 0.028290724418032678, "grad_norm": 3.078125, "learning_rate": 4.9920737678372234e-05, "loss": 0.8884, "step": 1596 }, { "epoch": 0.02832617645364425, "grad_norm": 3.0625, "learning_rate": 4.9920515305726526e-05, "loss": 0.8824, "step": 1598 }, { "epoch": 0.028361628489255816, "grad_norm": 3.0625, "learning_rate": 4.992029262207798e-05, "loss": 0.8814, "step": 1600 }, { "epoch": 0.028397080524867387, "grad_norm": 2.921875, "learning_rate": 4.9920069627429375e-05, "loss": 0.9012, "step": 1602 }, { "epoch": 0.02843253256047896, "grad_norm": 3.28125, "learning_rate": 4.991984632178349e-05, "loss": 0.9315, "step": 1604 }, { "epoch": 0.028467984596090526, "grad_norm": 2.734375, "learning_rate": 4.9919622705143106e-05, "loss": 0.9475, "step": 1606 }, { "epoch": 0.028503436631702097, "grad_norm": 3.125, "learning_rate": 4.991939877751103e-05, "loss": 0.9051, "step": 1608 }, { "epoch": 0.028538888667313668, "grad_norm": 3.078125, "learning_rate": 4.991917453889004e-05, "loss": 0.887, "step": 1610 }, { "epoch": 0.028574340702925236, "grad_norm": 3.03125, "learning_rate": 4.991894998928295e-05, "loss": 0.9247, "step": 1612 }, { "epoch": 0.028609792738536807, "grad_norm": 3.078125, "learning_rate": 4.991872512869256e-05, "loss": 0.9195, "step": 1614 }, { "epoch": 0.028645244774148374, "grad_norm": 3.078125, "learning_rate": 4.9918499957121654e-05, "loss": 0.9334, "step": 1616 }, { "epoch": 0.028680696809759945, "grad_norm": 2.84375, "learning_rate": 4.991827447457307e-05, "loss": 0.8793, "step": 1618 }, { "epoch": 0.028716148845371516, "grad_norm": 2.6875, "learning_rate": 4.991804868104961e-05, "loss": 0.9034, "step": 1620 }, { "epoch": 0.028751600880983084, "grad_norm": 3.0625, "learning_rate": 4.991782257655408e-05, "loss": 0.9069, "step": 1622 }, { "epoch": 0.028787052916594655, "grad_norm": 2.9375, "learning_rate": 4.991759616108933e-05, "loss": 0.8551, "step": 1624 }, { "epoch": 0.028822504952206226, "grad_norm": 2.9375, "learning_rate": 4.991736943465816e-05, "loss": 0.9543, "step": 1626 }, { "epoch": 0.028857956987817793, "grad_norm": 2.546875, "learning_rate": 4.991714239726342e-05, "loss": 0.9071, "step": 1628 }, { "epoch": 0.028893409023429364, "grad_norm": 3.125, "learning_rate": 4.991691504890792e-05, "loss": 0.9125, "step": 1630 }, { "epoch": 0.028928861059040935, "grad_norm": 3.09375, "learning_rate": 4.991668738959452e-05, "loss": 0.9668, "step": 1632 }, { "epoch": 0.028964313094652503, "grad_norm": 3.015625, "learning_rate": 4.991645941932604e-05, "loss": 0.8547, "step": 1634 }, { "epoch": 0.028999765130264074, "grad_norm": 2.921875, "learning_rate": 4.9916231138105354e-05, "loss": 0.8627, "step": 1636 }, { "epoch": 0.02903521716587564, "grad_norm": 2.84375, "learning_rate": 4.991600254593527e-05, "loss": 0.8693, "step": 1638 }, { "epoch": 0.029070669201487213, "grad_norm": 2.8125, "learning_rate": 4.9915773642818684e-05, "loss": 0.8523, "step": 1640 }, { "epoch": 0.029106121237098784, "grad_norm": 2.9375, "learning_rate": 4.991554442875842e-05, "loss": 0.8958, "step": 1642 }, { "epoch": 0.02914157327271035, "grad_norm": 3.03125, "learning_rate": 4.991531490375736e-05, "loss": 0.8493, "step": 1644 }, { "epoch": 0.029177025308321922, "grad_norm": 3.078125, "learning_rate": 4.9915085067818355e-05, "loss": 0.9503, "step": 1646 }, { "epoch": 0.029212477343933493, "grad_norm": 2.90625, "learning_rate": 4.9914854920944276e-05, "loss": 0.9112, "step": 1648 }, { "epoch": 0.02924792937954506, "grad_norm": 3.125, "learning_rate": 4.9914624463138e-05, "loss": 0.9116, "step": 1650 }, { "epoch": 0.029283381415156632, "grad_norm": 3.265625, "learning_rate": 4.991439369440239e-05, "loss": 0.899, "step": 1652 }, { "epoch": 0.029318833450768203, "grad_norm": 2.90625, "learning_rate": 4.9914162614740355e-05, "loss": 0.8528, "step": 1654 }, { "epoch": 0.02935428548637977, "grad_norm": 3.15625, "learning_rate": 4.991393122415475e-05, "loss": 0.9004, "step": 1656 }, { "epoch": 0.02938973752199134, "grad_norm": 2.921875, "learning_rate": 4.991369952264847e-05, "loss": 0.915, "step": 1658 }, { "epoch": 0.02942518955760291, "grad_norm": 2.734375, "learning_rate": 4.991346751022441e-05, "loss": 0.8629, "step": 1660 }, { "epoch": 0.02946064159321448, "grad_norm": 2.921875, "learning_rate": 4.9913235186885464e-05, "loss": 0.8786, "step": 1662 }, { "epoch": 0.02949609362882605, "grad_norm": 3.78125, "learning_rate": 4.991300255263454e-05, "loss": 0.8372, "step": 1664 }, { "epoch": 0.02953154566443762, "grad_norm": 2.890625, "learning_rate": 4.991276960747452e-05, "loss": 0.8547, "step": 1666 }, { "epoch": 0.02956699770004919, "grad_norm": 3.078125, "learning_rate": 4.9912536351408334e-05, "loss": 0.9204, "step": 1668 }, { "epoch": 0.02960244973566076, "grad_norm": 3.28125, "learning_rate": 4.991230278443888e-05, "loss": 0.8767, "step": 1670 }, { "epoch": 0.02963790177127233, "grad_norm": 2.59375, "learning_rate": 4.9912068906569076e-05, "loss": 0.8974, "step": 1672 }, { "epoch": 0.0296733538068839, "grad_norm": 3.109375, "learning_rate": 4.991183471780184e-05, "loss": 0.8934, "step": 1674 }, { "epoch": 0.029708805842495467, "grad_norm": 2.953125, "learning_rate": 4.9911600218140107e-05, "loss": 0.9092, "step": 1676 }, { "epoch": 0.029744257878107038, "grad_norm": 3.1875, "learning_rate": 4.9911365407586774e-05, "loss": 0.8898, "step": 1678 }, { "epoch": 0.02977970991371861, "grad_norm": 2.921875, "learning_rate": 4.9911130286144805e-05, "loss": 0.8761, "step": 1680 }, { "epoch": 0.029815161949330177, "grad_norm": 2.90625, "learning_rate": 4.9910894853817106e-05, "loss": 0.8469, "step": 1682 }, { "epoch": 0.029850613984941748, "grad_norm": 3.328125, "learning_rate": 4.991065911060663e-05, "loss": 0.8584, "step": 1684 }, { "epoch": 0.02988606602055332, "grad_norm": 3.078125, "learning_rate": 4.991042305651632e-05, "loss": 0.8896, "step": 1686 }, { "epoch": 0.029921518056164886, "grad_norm": 2.875, "learning_rate": 4.9910186691549123e-05, "loss": 0.8259, "step": 1688 }, { "epoch": 0.029956970091776457, "grad_norm": 2.90625, "learning_rate": 4.990995001570798e-05, "loss": 0.9028, "step": 1690 }, { "epoch": 0.02999242212738803, "grad_norm": 2.9375, "learning_rate": 4.9909713028995845e-05, "loss": 0.8824, "step": 1692 }, { "epoch": 0.030027874162999596, "grad_norm": 2.96875, "learning_rate": 4.9909475731415686e-05, "loss": 0.9096, "step": 1694 }, { "epoch": 0.030063326198611167, "grad_norm": 2.953125, "learning_rate": 4.990923812297046e-05, "loss": 0.8724, "step": 1696 }, { "epoch": 0.030098778234222735, "grad_norm": 2.9375, "learning_rate": 4.990900020366313e-05, "loss": 0.8796, "step": 1698 }, { "epoch": 0.030134230269834306, "grad_norm": 3.140625, "learning_rate": 4.990876197349665e-05, "loss": 0.8311, "step": 1700 }, { "epoch": 0.030169682305445877, "grad_norm": 3.53125, "learning_rate": 4.9908523432474024e-05, "loss": 0.9301, "step": 1702 }, { "epoch": 0.030205134341057444, "grad_norm": 2.5625, "learning_rate": 4.9908284580598206e-05, "loss": 0.8491, "step": 1704 }, { "epoch": 0.030240586376669015, "grad_norm": 2.609375, "learning_rate": 4.99080454178722e-05, "loss": 0.9107, "step": 1706 }, { "epoch": 0.030276038412280586, "grad_norm": 2.875, "learning_rate": 4.990780594429896e-05, "loss": 0.9217, "step": 1708 }, { "epoch": 0.030311490447892154, "grad_norm": 3.203125, "learning_rate": 4.990756615988149e-05, "loss": 0.9592, "step": 1710 }, { "epoch": 0.030346942483503725, "grad_norm": 2.625, "learning_rate": 4.9907326064622786e-05, "loss": 0.8798, "step": 1712 }, { "epoch": 0.030382394519115296, "grad_norm": 2.890625, "learning_rate": 4.990708565852584e-05, "loss": 0.9163, "step": 1714 }, { "epoch": 0.030417846554726864, "grad_norm": 2.859375, "learning_rate": 4.9906844941593654e-05, "loss": 0.9099, "step": 1716 }, { "epoch": 0.030453298590338435, "grad_norm": 2.9375, "learning_rate": 4.990660391382923e-05, "loss": 0.9131, "step": 1718 }, { "epoch": 0.030488750625950002, "grad_norm": 3.09375, "learning_rate": 4.9906362575235575e-05, "loss": 0.8907, "step": 1720 }, { "epoch": 0.030524202661561573, "grad_norm": 3.0, "learning_rate": 4.9906120925815706e-05, "loss": 0.8797, "step": 1722 }, { "epoch": 0.030559654697173144, "grad_norm": 3.265625, "learning_rate": 4.990587896557263e-05, "loss": 0.934, "step": 1724 }, { "epoch": 0.030595106732784712, "grad_norm": 2.890625, "learning_rate": 4.990563669450938e-05, "loss": 0.8834, "step": 1726 }, { "epoch": 0.030630558768396283, "grad_norm": 2.859375, "learning_rate": 4.990539411262897e-05, "loss": 0.9206, "step": 1728 }, { "epoch": 0.030666010804007854, "grad_norm": 2.8125, "learning_rate": 4.990515121993442e-05, "loss": 0.8585, "step": 1730 }, { "epoch": 0.03070146283961942, "grad_norm": 2.890625, "learning_rate": 4.990490801642878e-05, "loss": 0.8668, "step": 1732 }, { "epoch": 0.030736914875230992, "grad_norm": 2.84375, "learning_rate": 4.990466450211507e-05, "loss": 0.911, "step": 1734 }, { "epoch": 0.030772366910842563, "grad_norm": 3.078125, "learning_rate": 4.990442067699634e-05, "loss": 0.8918, "step": 1736 }, { "epoch": 0.03080781894645413, "grad_norm": 2.71875, "learning_rate": 4.990417654107562e-05, "loss": 0.881, "step": 1738 }, { "epoch": 0.030843270982065702, "grad_norm": 2.6875, "learning_rate": 4.990393209435596e-05, "loss": 0.9107, "step": 1740 }, { "epoch": 0.03087872301767727, "grad_norm": 3.359375, "learning_rate": 4.990368733684043e-05, "loss": 0.9067, "step": 1742 }, { "epoch": 0.03091417505328884, "grad_norm": 3.15625, "learning_rate": 4.9903442268532066e-05, "loss": 0.853, "step": 1744 }, { "epoch": 0.03094962708890041, "grad_norm": 3.21875, "learning_rate": 4.990319688943392e-05, "loss": 0.8587, "step": 1746 }, { "epoch": 0.03098507912451198, "grad_norm": 2.796875, "learning_rate": 4.990295119954906e-05, "loss": 0.8913, "step": 1748 }, { "epoch": 0.03102053116012355, "grad_norm": 2.90625, "learning_rate": 4.990270519888057e-05, "loss": 0.9057, "step": 1750 }, { "epoch": 0.03105598319573512, "grad_norm": 2.75, "learning_rate": 4.99024588874315e-05, "loss": 0.8899, "step": 1752 }, { "epoch": 0.03109143523134669, "grad_norm": 2.890625, "learning_rate": 4.990221226520493e-05, "loss": 0.8811, "step": 1754 }, { "epoch": 0.03112688726695826, "grad_norm": 3.03125, "learning_rate": 4.990196533220394e-05, "loss": 0.8812, "step": 1756 }, { "epoch": 0.03116233930256983, "grad_norm": 2.953125, "learning_rate": 4.99017180884316e-05, "loss": 0.8764, "step": 1758 }, { "epoch": 0.0311977913381814, "grad_norm": 2.71875, "learning_rate": 4.9901470533891014e-05, "loss": 0.9297, "step": 1760 }, { "epoch": 0.03123324337379297, "grad_norm": 3.09375, "learning_rate": 4.9901222668585266e-05, "loss": 0.8929, "step": 1762 }, { "epoch": 0.03126869540940454, "grad_norm": 2.828125, "learning_rate": 4.9900974492517435e-05, "loss": 0.8731, "step": 1764 }, { "epoch": 0.03130414744501611, "grad_norm": 2.734375, "learning_rate": 4.990072600569064e-05, "loss": 0.8794, "step": 1766 }, { "epoch": 0.03133959948062768, "grad_norm": 2.6875, "learning_rate": 4.9900477208107957e-05, "loss": 0.8343, "step": 1768 }, { "epoch": 0.03137505151623925, "grad_norm": 3.015625, "learning_rate": 4.9900228099772516e-05, "loss": 0.9005, "step": 1770 }, { "epoch": 0.031410503551850814, "grad_norm": 2.625, "learning_rate": 4.9899978680687406e-05, "loss": 0.879, "step": 1772 }, { "epoch": 0.03144595558746239, "grad_norm": 2.84375, "learning_rate": 4.9899728950855764e-05, "loss": 0.876, "step": 1774 }, { "epoch": 0.031481407623073956, "grad_norm": 3.171875, "learning_rate": 4.989947891028067e-05, "loss": 0.9064, "step": 1776 }, { "epoch": 0.031516859658685524, "grad_norm": 3.09375, "learning_rate": 4.989922855896528e-05, "loss": 0.8831, "step": 1778 }, { "epoch": 0.0315523116942971, "grad_norm": 3.03125, "learning_rate": 4.98989778969127e-05, "loss": 0.8988, "step": 1780 }, { "epoch": 0.031587763729908666, "grad_norm": 3.203125, "learning_rate": 4.989872692412606e-05, "loss": 0.8684, "step": 1782 }, { "epoch": 0.031623215765520234, "grad_norm": 3.578125, "learning_rate": 4.98984756406085e-05, "loss": 0.8947, "step": 1784 }, { "epoch": 0.03165866780113181, "grad_norm": 3.3125, "learning_rate": 4.989822404636314e-05, "loss": 0.8621, "step": 1786 }, { "epoch": 0.031694119836743376, "grad_norm": 2.796875, "learning_rate": 4.9897972141393135e-05, "loss": 0.872, "step": 1788 }, { "epoch": 0.03172957187235494, "grad_norm": 2.90625, "learning_rate": 4.989771992570163e-05, "loss": 0.893, "step": 1790 }, { "epoch": 0.03176502390796652, "grad_norm": 2.96875, "learning_rate": 4.9897467399291756e-05, "loss": 0.916, "step": 1792 }, { "epoch": 0.031800475943578085, "grad_norm": 2.375, "learning_rate": 4.989721456216668e-05, "loss": 0.8348, "step": 1794 }, { "epoch": 0.03183592797918965, "grad_norm": 2.796875, "learning_rate": 4.989696141432955e-05, "loss": 0.8458, "step": 1796 }, { "epoch": 0.03187138001480123, "grad_norm": 2.96875, "learning_rate": 4.9896707955783526e-05, "loss": 0.9631, "step": 1798 }, { "epoch": 0.031906832050412795, "grad_norm": 3.0, "learning_rate": 4.989645418653177e-05, "loss": 0.9095, "step": 1800 }, { "epoch": 0.03194228408602436, "grad_norm": 3.046875, "learning_rate": 4.9896200106577465e-05, "loss": 0.8879, "step": 1802 }, { "epoch": 0.03197773612163594, "grad_norm": 3.140625, "learning_rate": 4.9895945715923754e-05, "loss": 0.9092, "step": 1804 }, { "epoch": 0.032013188157247505, "grad_norm": 2.578125, "learning_rate": 4.989569101457383e-05, "loss": 0.843, "step": 1806 }, { "epoch": 0.03204864019285907, "grad_norm": 3.046875, "learning_rate": 4.989543600253087e-05, "loss": 0.9134, "step": 1808 }, { "epoch": 0.03208409222847065, "grad_norm": 2.984375, "learning_rate": 4.989518067979805e-05, "loss": 0.8729, "step": 1810 }, { "epoch": 0.032119544264082214, "grad_norm": 2.828125, "learning_rate": 4.989492504637856e-05, "loss": 0.8983, "step": 1812 }, { "epoch": 0.03215499629969378, "grad_norm": 3.0625, "learning_rate": 4.989466910227559e-05, "loss": 0.8304, "step": 1814 }, { "epoch": 0.03219044833530535, "grad_norm": 3.328125, "learning_rate": 4.9894412847492345e-05, "loss": 0.8675, "step": 1816 }, { "epoch": 0.032225900370916924, "grad_norm": 3.34375, "learning_rate": 4.9894156282032e-05, "loss": 0.9471, "step": 1818 }, { "epoch": 0.03226135240652849, "grad_norm": 3.234375, "learning_rate": 4.989389940589778e-05, "loss": 0.9131, "step": 1820 }, { "epoch": 0.03229680444214006, "grad_norm": 2.90625, "learning_rate": 4.989364221909287e-05, "loss": 0.909, "step": 1822 }, { "epoch": 0.032332256477751634, "grad_norm": 3.046875, "learning_rate": 4.98933847216205e-05, "loss": 0.8465, "step": 1824 }, { "epoch": 0.0323677085133632, "grad_norm": 2.96875, "learning_rate": 4.989312691348387e-05, "loss": 0.8581, "step": 1826 }, { "epoch": 0.03240316054897477, "grad_norm": 2.6875, "learning_rate": 4.98928687946862e-05, "loss": 0.9257, "step": 1828 }, { "epoch": 0.03243861258458634, "grad_norm": 2.65625, "learning_rate": 4.989261036523071e-05, "loss": 0.8431, "step": 1830 }, { "epoch": 0.03247406462019791, "grad_norm": 2.96875, "learning_rate": 4.989235162512064e-05, "loss": 0.9564, "step": 1832 }, { "epoch": 0.03250951665580948, "grad_norm": 2.984375, "learning_rate": 4.989209257435919e-05, "loss": 0.8758, "step": 1834 }, { "epoch": 0.03254496869142105, "grad_norm": 2.828125, "learning_rate": 4.989183321294961e-05, "loss": 0.8783, "step": 1836 }, { "epoch": 0.03258042072703262, "grad_norm": 3.28125, "learning_rate": 4.989157354089515e-05, "loss": 0.927, "step": 1838 }, { "epoch": 0.03261587276264419, "grad_norm": 3.390625, "learning_rate": 4.9891313558199025e-05, "loss": 0.9246, "step": 1840 }, { "epoch": 0.03265132479825576, "grad_norm": 2.96875, "learning_rate": 4.98910532648645e-05, "loss": 0.8639, "step": 1842 }, { "epoch": 0.03268677683386733, "grad_norm": 3.03125, "learning_rate": 4.9890792660894806e-05, "loss": 0.856, "step": 1844 }, { "epoch": 0.0327222288694789, "grad_norm": 2.984375, "learning_rate": 4.989053174629321e-05, "loss": 0.8842, "step": 1846 }, { "epoch": 0.03275768090509047, "grad_norm": 2.921875, "learning_rate": 4.989027052106295e-05, "loss": 0.8997, "step": 1848 }, { "epoch": 0.03279313294070204, "grad_norm": 3.03125, "learning_rate": 4.989000898520732e-05, "loss": 0.8674, "step": 1850 }, { "epoch": 0.03282858497631361, "grad_norm": 2.921875, "learning_rate": 4.988974713872955e-05, "loss": 0.8712, "step": 1852 }, { "epoch": 0.03286403701192518, "grad_norm": 3.234375, "learning_rate": 4.9889484981632913e-05, "loss": 0.9267, "step": 1854 }, { "epoch": 0.03289948904753675, "grad_norm": 3.0625, "learning_rate": 4.98892225139207e-05, "loss": 0.8959, "step": 1856 }, { "epoch": 0.03293494108314832, "grad_norm": 2.96875, "learning_rate": 4.9888959735596165e-05, "loss": 0.887, "step": 1858 }, { "epoch": 0.032970393118759884, "grad_norm": 2.984375, "learning_rate": 4.9888696646662606e-05, "loss": 0.8938, "step": 1860 }, { "epoch": 0.03300584515437146, "grad_norm": 3.71875, "learning_rate": 4.9888433247123284e-05, "loss": 0.8704, "step": 1862 }, { "epoch": 0.03304129718998303, "grad_norm": 2.546875, "learning_rate": 4.988816953698151e-05, "loss": 0.926, "step": 1864 }, { "epoch": 0.033076749225594594, "grad_norm": 2.875, "learning_rate": 4.9887905516240555e-05, "loss": 0.8907, "step": 1866 }, { "epoch": 0.03311220126120617, "grad_norm": 3.078125, "learning_rate": 4.988764118490373e-05, "loss": 0.9096, "step": 1868 }, { "epoch": 0.033147653296817736, "grad_norm": 3.078125, "learning_rate": 4.988737654297432e-05, "loss": 0.8758, "step": 1870 }, { "epoch": 0.033183105332429304, "grad_norm": 2.90625, "learning_rate": 4.988711159045564e-05, "loss": 0.9465, "step": 1872 }, { "epoch": 0.03321855736804088, "grad_norm": 2.65625, "learning_rate": 4.988684632735099e-05, "loss": 0.8731, "step": 1874 }, { "epoch": 0.033254009403652446, "grad_norm": 3.0625, "learning_rate": 4.988658075366368e-05, "loss": 0.9215, "step": 1876 }, { "epoch": 0.03328946143926401, "grad_norm": 2.515625, "learning_rate": 4.9886314869397025e-05, "loss": 0.9032, "step": 1878 }, { "epoch": 0.03332491347487559, "grad_norm": 2.875, "learning_rate": 4.9886048674554347e-05, "loss": 0.8975, "step": 1880 }, { "epoch": 0.033360365510487155, "grad_norm": 3.078125, "learning_rate": 4.988578216913896e-05, "loss": 0.9586, "step": 1882 }, { "epoch": 0.03339581754609872, "grad_norm": 2.9375, "learning_rate": 4.98855153531542e-05, "loss": 0.8882, "step": 1884 }, { "epoch": 0.0334312695817103, "grad_norm": 2.90625, "learning_rate": 4.9885248226603397e-05, "loss": 0.8855, "step": 1886 }, { "epoch": 0.033466721617321865, "grad_norm": 2.90625, "learning_rate": 4.9884980789489865e-05, "loss": 0.8807, "step": 1888 }, { "epoch": 0.03350217365293343, "grad_norm": 3.140625, "learning_rate": 4.988471304181697e-05, "loss": 0.8932, "step": 1890 }, { "epoch": 0.03353762568854501, "grad_norm": 2.765625, "learning_rate": 4.988444498358803e-05, "loss": 0.8452, "step": 1892 }, { "epoch": 0.033573077724156575, "grad_norm": 2.859375, "learning_rate": 4.98841766148064e-05, "loss": 0.8801, "step": 1894 }, { "epoch": 0.03360852975976814, "grad_norm": 3.234375, "learning_rate": 4.9883907935475436e-05, "loss": 0.8889, "step": 1896 }, { "epoch": 0.03364398179537972, "grad_norm": 2.71875, "learning_rate": 4.988363894559847e-05, "loss": 0.8927, "step": 1898 }, { "epoch": 0.033679433830991284, "grad_norm": 2.796875, "learning_rate": 4.988336964517889e-05, "loss": 0.8917, "step": 1900 }, { "epoch": 0.03371488586660285, "grad_norm": 2.6875, "learning_rate": 4.988310003422003e-05, "loss": 0.8618, "step": 1902 }, { "epoch": 0.03375033790221442, "grad_norm": 2.6875, "learning_rate": 4.9882830112725264e-05, "loss": 0.9061, "step": 1904 }, { "epoch": 0.033785789937825994, "grad_norm": 2.984375, "learning_rate": 4.9882559880697964e-05, "loss": 0.8871, "step": 1906 }, { "epoch": 0.03382124197343756, "grad_norm": 2.75, "learning_rate": 4.9882289338141494e-05, "loss": 0.9005, "step": 1908 }, { "epoch": 0.03385669400904913, "grad_norm": 2.734375, "learning_rate": 4.988201848505925e-05, "loss": 0.8677, "step": 1910 }, { "epoch": 0.033892146044660704, "grad_norm": 2.71875, "learning_rate": 4.988174732145458e-05, "loss": 0.8617, "step": 1912 }, { "epoch": 0.03392759808027227, "grad_norm": 3.015625, "learning_rate": 4.988147584733089e-05, "loss": 0.8704, "step": 1914 }, { "epoch": 0.03396305011588384, "grad_norm": 3.03125, "learning_rate": 4.9881204062691575e-05, "loss": 0.8464, "step": 1916 }, { "epoch": 0.03399850215149541, "grad_norm": 3.0625, "learning_rate": 4.988093196754001e-05, "loss": 0.9313, "step": 1918 }, { "epoch": 0.03403395418710698, "grad_norm": 3.078125, "learning_rate": 4.9880659561879596e-05, "loss": 0.8973, "step": 1920 }, { "epoch": 0.03406940622271855, "grad_norm": 2.75, "learning_rate": 4.988038684571373e-05, "loss": 0.9628, "step": 1922 }, { "epoch": 0.03410485825833012, "grad_norm": 2.828125, "learning_rate": 4.988011381904581e-05, "loss": 0.87, "step": 1924 }, { "epoch": 0.03414031029394169, "grad_norm": 3.0, "learning_rate": 4.987984048187927e-05, "loss": 0.9069, "step": 1926 }, { "epoch": 0.03417576232955326, "grad_norm": 3.078125, "learning_rate": 4.98795668342175e-05, "loss": 0.9091, "step": 1928 }, { "epoch": 0.03421121436516483, "grad_norm": 2.828125, "learning_rate": 4.987929287606391e-05, "loss": 0.8688, "step": 1930 }, { "epoch": 0.0342466664007764, "grad_norm": 2.9375, "learning_rate": 4.9879018607421927e-05, "loss": 0.8515, "step": 1932 }, { "epoch": 0.03428211843638797, "grad_norm": 2.765625, "learning_rate": 4.9878744028294974e-05, "loss": 0.899, "step": 1934 }, { "epoch": 0.03431757047199954, "grad_norm": 3.0625, "learning_rate": 4.987846913868648e-05, "loss": 0.8567, "step": 1936 }, { "epoch": 0.03435302250761111, "grad_norm": 3.125, "learning_rate": 4.987819393859987e-05, "loss": 0.8997, "step": 1938 }, { "epoch": 0.03438847454322268, "grad_norm": 2.765625, "learning_rate": 4.987791842803858e-05, "loss": 0.8767, "step": 1940 }, { "epoch": 0.03442392657883425, "grad_norm": 2.828125, "learning_rate": 4.9877642607006056e-05, "loss": 0.8827, "step": 1942 }, { "epoch": 0.03445937861444582, "grad_norm": 3.0, "learning_rate": 4.9877366475505735e-05, "loss": 0.8539, "step": 1944 }, { "epoch": 0.03449483065005739, "grad_norm": 2.859375, "learning_rate": 4.9877090033541065e-05, "loss": 0.9004, "step": 1946 }, { "epoch": 0.034530282685668955, "grad_norm": 2.96875, "learning_rate": 4.987681328111548e-05, "loss": 0.8699, "step": 1948 }, { "epoch": 0.03456573472128053, "grad_norm": 2.859375, "learning_rate": 4.987653621823245e-05, "loss": 0.8887, "step": 1950 }, { "epoch": 0.0346011867568921, "grad_norm": 2.96875, "learning_rate": 4.987625884489544e-05, "loss": 0.9072, "step": 1952 }, { "epoch": 0.034636638792503664, "grad_norm": 2.921875, "learning_rate": 4.9875981161107885e-05, "loss": 0.9238, "step": 1954 }, { "epoch": 0.03467209082811524, "grad_norm": 3.046875, "learning_rate": 4.987570316687328e-05, "loss": 0.9007, "step": 1956 }, { "epoch": 0.034707542863726806, "grad_norm": 3.09375, "learning_rate": 4.987542486219507e-05, "loss": 0.8812, "step": 1958 }, { "epoch": 0.034742994899338374, "grad_norm": 3.421875, "learning_rate": 4.987514624707675e-05, "loss": 0.9161, "step": 1960 }, { "epoch": 0.03477844693494995, "grad_norm": 3.28125, "learning_rate": 4.9874867321521776e-05, "loss": 0.893, "step": 1962 }, { "epoch": 0.034813898970561516, "grad_norm": 2.828125, "learning_rate": 4.9874588085533644e-05, "loss": 0.8903, "step": 1964 }, { "epoch": 0.034849351006173084, "grad_norm": 3.359375, "learning_rate": 4.987430853911583e-05, "loss": 0.9052, "step": 1966 }, { "epoch": 0.03488480304178466, "grad_norm": 3.109375, "learning_rate": 4.987402868227183e-05, "loss": 0.8775, "step": 1968 }, { "epoch": 0.034920255077396226, "grad_norm": 2.515625, "learning_rate": 4.9873748515005134e-05, "loss": 0.861, "step": 1970 }, { "epoch": 0.03495570711300779, "grad_norm": 2.75, "learning_rate": 4.987346803731924e-05, "loss": 0.8776, "step": 1972 }, { "epoch": 0.03499115914861937, "grad_norm": 2.703125, "learning_rate": 4.987318724921764e-05, "loss": 0.8981, "step": 1974 }, { "epoch": 0.035026611184230935, "grad_norm": 3.0625, "learning_rate": 4.987290615070385e-05, "loss": 0.8984, "step": 1976 }, { "epoch": 0.0350620632198425, "grad_norm": 2.96875, "learning_rate": 4.987262474178136e-05, "loss": 0.9129, "step": 1978 }, { "epoch": 0.03509751525545408, "grad_norm": 2.78125, "learning_rate": 4.98723430224537e-05, "loss": 0.9045, "step": 1980 }, { "epoch": 0.035132967291065645, "grad_norm": 2.78125, "learning_rate": 4.9872060992724387e-05, "loss": 0.8857, "step": 1982 }, { "epoch": 0.03516841932667721, "grad_norm": 2.953125, "learning_rate": 4.9871778652596926e-05, "loss": 0.8699, "step": 1984 }, { "epoch": 0.03520387136228878, "grad_norm": 2.90625, "learning_rate": 4.9871496002074845e-05, "loss": 0.8877, "step": 1986 }, { "epoch": 0.035239323397900355, "grad_norm": 2.765625, "learning_rate": 4.987121304116168e-05, "loss": 0.8973, "step": 1988 }, { "epoch": 0.03527477543351192, "grad_norm": 2.84375, "learning_rate": 4.987092976986095e-05, "loss": 0.8566, "step": 1990 }, { "epoch": 0.03531022746912349, "grad_norm": 2.78125, "learning_rate": 4.9870646188176205e-05, "loss": 0.8753, "step": 1992 }, { "epoch": 0.035345679504735064, "grad_norm": 2.6875, "learning_rate": 4.9870362296110974e-05, "loss": 0.9174, "step": 1994 }, { "epoch": 0.03538113154034663, "grad_norm": 2.9375, "learning_rate": 4.9870078093668795e-05, "loss": 0.8929, "step": 1996 }, { "epoch": 0.0354165835759582, "grad_norm": 2.8125, "learning_rate": 4.986979358085323e-05, "loss": 0.8878, "step": 1998 }, { "epoch": 0.035452035611569774, "grad_norm": 2.828125, "learning_rate": 4.9869508757667816e-05, "loss": 0.912, "step": 2000 }, { "epoch": 0.03548748764718134, "grad_norm": 2.6875, "learning_rate": 4.986922362411611e-05, "loss": 0.8657, "step": 2002 }, { "epoch": 0.03552293968279291, "grad_norm": 2.78125, "learning_rate": 4.986893818020168e-05, "loss": 0.8613, "step": 2004 }, { "epoch": 0.03555839171840448, "grad_norm": 3.0, "learning_rate": 4.986865242592807e-05, "loss": 0.8583, "step": 2006 }, { "epoch": 0.03559384375401605, "grad_norm": 3.15625, "learning_rate": 4.9868366361298873e-05, "loss": 0.8839, "step": 2008 }, { "epoch": 0.03562929578962762, "grad_norm": 3.109375, "learning_rate": 4.986807998631764e-05, "loss": 0.8905, "step": 2010 }, { "epoch": 0.03566474782523919, "grad_norm": 2.90625, "learning_rate": 4.9867793300987945e-05, "loss": 0.8616, "step": 2012 }, { "epoch": 0.03570019986085076, "grad_norm": 3.015625, "learning_rate": 4.986750630531336e-05, "loss": 0.8757, "step": 2014 }, { "epoch": 0.03573565189646233, "grad_norm": 2.640625, "learning_rate": 4.9867218999297486e-05, "loss": 0.8668, "step": 2016 }, { "epoch": 0.0357711039320739, "grad_norm": 3.046875, "learning_rate": 4.98669313829439e-05, "loss": 0.8505, "step": 2018 }, { "epoch": 0.03580655596768547, "grad_norm": 2.875, "learning_rate": 4.986664345625619e-05, "loss": 0.89, "step": 2020 }, { "epoch": 0.03584200800329704, "grad_norm": 2.6875, "learning_rate": 4.986635521923794e-05, "loss": 0.8667, "step": 2022 }, { "epoch": 0.03587746003890861, "grad_norm": 3.109375, "learning_rate": 4.9866066671892765e-05, "loss": 0.954, "step": 2024 }, { "epoch": 0.03591291207452018, "grad_norm": 2.703125, "learning_rate": 4.9865777814224254e-05, "loss": 0.9229, "step": 2026 }, { "epoch": 0.03594836411013175, "grad_norm": 3.140625, "learning_rate": 4.9865488646236014e-05, "loss": 0.8738, "step": 2028 }, { "epoch": 0.035983816145743315, "grad_norm": 3.0, "learning_rate": 4.9865199167931657e-05, "loss": 0.9409, "step": 2030 }, { "epoch": 0.03601926818135489, "grad_norm": 3.171875, "learning_rate": 4.986490937931479e-05, "loss": 0.8501, "step": 2032 }, { "epoch": 0.03605472021696646, "grad_norm": 2.625, "learning_rate": 4.9864619280389036e-05, "loss": 0.8757, "step": 2034 }, { "epoch": 0.036090172252578025, "grad_norm": 2.875, "learning_rate": 4.986432887115801e-05, "loss": 0.8583, "step": 2036 }, { "epoch": 0.0361256242881896, "grad_norm": 2.703125, "learning_rate": 4.986403815162534e-05, "loss": 0.8564, "step": 2038 }, { "epoch": 0.03616107632380117, "grad_norm": 3.046875, "learning_rate": 4.9863747121794654e-05, "loss": 0.9092, "step": 2040 }, { "epoch": 0.036196528359412734, "grad_norm": 3.09375, "learning_rate": 4.986345578166958e-05, "loss": 0.9119, "step": 2042 }, { "epoch": 0.03623198039502431, "grad_norm": 3.25, "learning_rate": 4.986316413125376e-05, "loss": 0.8834, "step": 2044 }, { "epoch": 0.036267432430635876, "grad_norm": 2.921875, "learning_rate": 4.9862872170550826e-05, "loss": 0.8866, "step": 2046 }, { "epoch": 0.036302884466247444, "grad_norm": 2.9375, "learning_rate": 4.9862579899564435e-05, "loss": 0.9096, "step": 2048 }, { "epoch": 0.03633833650185902, "grad_norm": 2.9375, "learning_rate": 4.9862287318298213e-05, "loss": 0.8794, "step": 2050 }, { "epoch": 0.036373788537470586, "grad_norm": 2.875, "learning_rate": 4.9861994426755834e-05, "loss": 0.909, "step": 2052 }, { "epoch": 0.036409240573082154, "grad_norm": 3.234375, "learning_rate": 4.986170122494093e-05, "loss": 0.8997, "step": 2054 }, { "epoch": 0.03644469260869373, "grad_norm": 2.453125, "learning_rate": 4.9861407712857185e-05, "loss": 0.8905, "step": 2056 }, { "epoch": 0.036480144644305296, "grad_norm": 2.96875, "learning_rate": 4.9861113890508246e-05, "loss": 0.9056, "step": 2058 }, { "epoch": 0.03651559667991686, "grad_norm": 3.015625, "learning_rate": 4.986081975789778e-05, "loss": 0.9046, "step": 2060 }, { "epoch": 0.03655104871552844, "grad_norm": 2.875, "learning_rate": 4.986052531502947e-05, "loss": 0.9152, "step": 2062 }, { "epoch": 0.036586500751140005, "grad_norm": 2.828125, "learning_rate": 4.9860230561906976e-05, "loss": 0.8837, "step": 2064 }, { "epoch": 0.03662195278675157, "grad_norm": 3.53125, "learning_rate": 4.985993549853398e-05, "loss": 0.9131, "step": 2066 }, { "epoch": 0.03665740482236315, "grad_norm": 3.078125, "learning_rate": 4.9859640124914174e-05, "loss": 0.8591, "step": 2068 }, { "epoch": 0.036692856857974715, "grad_norm": 3.203125, "learning_rate": 4.9859344441051234e-05, "loss": 0.9129, "step": 2070 }, { "epoch": 0.03672830889358628, "grad_norm": 2.890625, "learning_rate": 4.985904844694885e-05, "loss": 0.8509, "step": 2072 }, { "epoch": 0.03676376092919785, "grad_norm": 3.078125, "learning_rate": 4.985875214261073e-05, "loss": 0.8665, "step": 2074 }, { "epoch": 0.036799212964809425, "grad_norm": 2.75, "learning_rate": 4.985845552804055e-05, "loss": 0.8363, "step": 2076 }, { "epoch": 0.03683466500042099, "grad_norm": 2.65625, "learning_rate": 4.985815860324203e-05, "loss": 0.8305, "step": 2078 }, { "epoch": 0.03687011703603256, "grad_norm": 2.890625, "learning_rate": 4.985786136821886e-05, "loss": 0.8186, "step": 2080 }, { "epoch": 0.036905569071644134, "grad_norm": 2.875, "learning_rate": 4.985756382297476e-05, "loss": 0.8541, "step": 2082 }, { "epoch": 0.0369410211072557, "grad_norm": 2.65625, "learning_rate": 4.985726596751344e-05, "loss": 0.877, "step": 2084 }, { "epoch": 0.03697647314286727, "grad_norm": 2.90625, "learning_rate": 4.985696780183863e-05, "loss": 0.9137, "step": 2086 }, { "epoch": 0.037011925178478844, "grad_norm": 2.984375, "learning_rate": 4.985666932595403e-05, "loss": 0.89, "step": 2088 }, { "epoch": 0.03704737721409041, "grad_norm": 2.921875, "learning_rate": 4.985637053986337e-05, "loss": 0.8867, "step": 2090 }, { "epoch": 0.03708282924970198, "grad_norm": 3.390625, "learning_rate": 4.985607144357039e-05, "loss": 0.8978, "step": 2092 }, { "epoch": 0.037118281285313554, "grad_norm": 3.09375, "learning_rate": 4.9855772037078814e-05, "loss": 0.8873, "step": 2094 }, { "epoch": 0.03715373332092512, "grad_norm": 2.859375, "learning_rate": 4.9855472320392375e-05, "loss": 0.9264, "step": 2096 }, { "epoch": 0.03718918535653669, "grad_norm": 3.09375, "learning_rate": 4.9855172293514826e-05, "loss": 0.8793, "step": 2098 }, { "epoch": 0.03722463739214826, "grad_norm": 2.765625, "learning_rate": 4.98548719564499e-05, "loss": 0.8243, "step": 2100 }, { "epoch": 0.03726008942775983, "grad_norm": 2.6875, "learning_rate": 4.985457130920135e-05, "loss": 0.8314, "step": 2102 }, { "epoch": 0.0372955414633714, "grad_norm": 2.578125, "learning_rate": 4.9854270351772925e-05, "loss": 0.8263, "step": 2104 }, { "epoch": 0.03733099349898297, "grad_norm": 2.96875, "learning_rate": 4.9853969084168386e-05, "loss": 0.8607, "step": 2106 }, { "epoch": 0.03736644553459454, "grad_norm": 2.921875, "learning_rate": 4.985366750639148e-05, "loss": 0.8569, "step": 2108 }, { "epoch": 0.03740189757020611, "grad_norm": 2.953125, "learning_rate": 4.985336561844599e-05, "loss": 0.8951, "step": 2110 }, { "epoch": 0.03743734960581768, "grad_norm": 2.96875, "learning_rate": 4.985306342033567e-05, "loss": 0.9011, "step": 2112 }, { "epoch": 0.03747280164142925, "grad_norm": 2.96875, "learning_rate": 4.98527609120643e-05, "loss": 0.884, "step": 2114 }, { "epoch": 0.03750825367704082, "grad_norm": 2.6875, "learning_rate": 4.9852458093635646e-05, "loss": 0.8846, "step": 2116 }, { "epoch": 0.037543705712652385, "grad_norm": 3.109375, "learning_rate": 4.985215496505349e-05, "loss": 0.8899, "step": 2118 }, { "epoch": 0.03757915774826396, "grad_norm": 2.9375, "learning_rate": 4.985185152632162e-05, "loss": 0.8511, "step": 2120 }, { "epoch": 0.03761460978387553, "grad_norm": 3.140625, "learning_rate": 4.985154777744382e-05, "loss": 0.8845, "step": 2122 }, { "epoch": 0.037650061819487095, "grad_norm": 2.921875, "learning_rate": 4.9851243718423875e-05, "loss": 0.8729, "step": 2124 }, { "epoch": 0.03768551385509867, "grad_norm": 3.4375, "learning_rate": 4.9850939349265587e-05, "loss": 0.8871, "step": 2126 }, { "epoch": 0.03772096589071024, "grad_norm": 3.015625, "learning_rate": 4.985063466997275e-05, "loss": 0.9002, "step": 2128 }, { "epoch": 0.037756417926321804, "grad_norm": 2.65625, "learning_rate": 4.985032968054917e-05, "loss": 0.8798, "step": 2130 }, { "epoch": 0.03779186996193338, "grad_norm": 3.359375, "learning_rate": 4.985002438099865e-05, "loss": 0.9102, "step": 2132 }, { "epoch": 0.037827321997544947, "grad_norm": 2.828125, "learning_rate": 4.984971877132501e-05, "loss": 0.8697, "step": 2134 }, { "epoch": 0.037862774033156514, "grad_norm": 2.984375, "learning_rate": 4.984941285153204e-05, "loss": 0.9321, "step": 2136 }, { "epoch": 0.03789822606876809, "grad_norm": 2.734375, "learning_rate": 4.984910662162359e-05, "loss": 0.8666, "step": 2138 }, { "epoch": 0.037933678104379656, "grad_norm": 2.953125, "learning_rate": 4.9848800081603455e-05, "loss": 0.8904, "step": 2140 }, { "epoch": 0.037969130139991224, "grad_norm": 2.84375, "learning_rate": 4.9848493231475466e-05, "loss": 0.9229, "step": 2142 }, { "epoch": 0.0380045821756028, "grad_norm": 2.703125, "learning_rate": 4.9848186071243463e-05, "loss": 0.8472, "step": 2144 }, { "epoch": 0.038040034211214366, "grad_norm": 3.0625, "learning_rate": 4.984787860091128e-05, "loss": 0.8923, "step": 2146 }, { "epoch": 0.03807548624682593, "grad_norm": 3.1875, "learning_rate": 4.984757082048274e-05, "loss": 0.8802, "step": 2148 }, { "epoch": 0.03811093828243751, "grad_norm": 2.671875, "learning_rate": 4.9847262729961694e-05, "loss": 0.8569, "step": 2150 }, { "epoch": 0.038146390318049075, "grad_norm": 2.8125, "learning_rate": 4.984695432935198e-05, "loss": 0.9036, "step": 2152 }, { "epoch": 0.03818184235366064, "grad_norm": 2.953125, "learning_rate": 4.9846645618657463e-05, "loss": 0.8576, "step": 2154 }, { "epoch": 0.03821729438927221, "grad_norm": 2.609375, "learning_rate": 4.984633659788197e-05, "loss": 0.8861, "step": 2156 }, { "epoch": 0.038252746424883785, "grad_norm": 2.65625, "learning_rate": 4.984602726702938e-05, "loss": 0.9025, "step": 2158 }, { "epoch": 0.03828819846049535, "grad_norm": 2.796875, "learning_rate": 4.984571762610354e-05, "loss": 0.8429, "step": 2160 }, { "epoch": 0.03832365049610692, "grad_norm": 2.9375, "learning_rate": 4.984540767510833e-05, "loss": 0.8757, "step": 2162 }, { "epoch": 0.038359102531718495, "grad_norm": 2.953125, "learning_rate": 4.984509741404759e-05, "loss": 0.8641, "step": 2164 }, { "epoch": 0.03839455456733006, "grad_norm": 2.5625, "learning_rate": 4.984478684292522e-05, "loss": 0.8471, "step": 2166 }, { "epoch": 0.03843000660294163, "grad_norm": 2.90625, "learning_rate": 4.984447596174507e-05, "loss": 0.8905, "step": 2168 }, { "epoch": 0.038465458638553204, "grad_norm": 3.015625, "learning_rate": 4.984416477051106e-05, "loss": 0.8571, "step": 2170 }, { "epoch": 0.03850091067416477, "grad_norm": 2.984375, "learning_rate": 4.984385326922703e-05, "loss": 0.8629, "step": 2172 }, { "epoch": 0.03853636270977634, "grad_norm": 2.921875, "learning_rate": 4.9843541457896894e-05, "loss": 0.8954, "step": 2174 }, { "epoch": 0.038571814745387914, "grad_norm": 2.75, "learning_rate": 4.9843229336524526e-05, "loss": 0.8768, "step": 2176 }, { "epoch": 0.03860726678099948, "grad_norm": 2.71875, "learning_rate": 4.984291690511384e-05, "loss": 0.8464, "step": 2178 }, { "epoch": 0.03864271881661105, "grad_norm": 2.828125, "learning_rate": 4.984260416366872e-05, "loss": 0.9009, "step": 2180 }, { "epoch": 0.038678170852222624, "grad_norm": 2.84375, "learning_rate": 4.9842291112193076e-05, "loss": 0.8651, "step": 2182 }, { "epoch": 0.03871362288783419, "grad_norm": 2.90625, "learning_rate": 4.9841977750690815e-05, "loss": 0.8651, "step": 2184 }, { "epoch": 0.03874907492344576, "grad_norm": 2.796875, "learning_rate": 4.984166407916584e-05, "loss": 0.8658, "step": 2186 }, { "epoch": 0.03878452695905733, "grad_norm": 2.859375, "learning_rate": 4.984135009762208e-05, "loss": 0.9066, "step": 2188 }, { "epoch": 0.0388199789946689, "grad_norm": 3.09375, "learning_rate": 4.984103580606344e-05, "loss": 0.9346, "step": 2190 }, { "epoch": 0.03885543103028047, "grad_norm": 2.984375, "learning_rate": 4.984072120449385e-05, "loss": 0.8453, "step": 2192 }, { "epoch": 0.03889088306589204, "grad_norm": 3.0625, "learning_rate": 4.984040629291723e-05, "loss": 0.8937, "step": 2194 }, { "epoch": 0.03892633510150361, "grad_norm": 2.875, "learning_rate": 4.9840091071337514e-05, "loss": 0.844, "step": 2196 }, { "epoch": 0.03896178713711518, "grad_norm": 3.015625, "learning_rate": 4.9839775539758635e-05, "loss": 0.843, "step": 2198 }, { "epoch": 0.038997239172726746, "grad_norm": 3.015625, "learning_rate": 4.983945969818453e-05, "loss": 0.8641, "step": 2200 }, { "epoch": 0.03903269120833832, "grad_norm": 2.9375, "learning_rate": 4.9839143546619146e-05, "loss": 0.9192, "step": 2202 }, { "epoch": 0.03906814324394989, "grad_norm": 3.3125, "learning_rate": 4.983882708506642e-05, "loss": 0.8461, "step": 2204 }, { "epoch": 0.039103595279561455, "grad_norm": 2.625, "learning_rate": 4.9838510313530304e-05, "loss": 0.8884, "step": 2206 }, { "epoch": 0.03913904731517303, "grad_norm": 2.875, "learning_rate": 4.983819323201476e-05, "loss": 0.8661, "step": 2208 }, { "epoch": 0.0391744993507846, "grad_norm": 2.828125, "learning_rate": 4.9837875840523734e-05, "loss": 0.9119, "step": 2210 }, { "epoch": 0.039209951386396165, "grad_norm": 2.96875, "learning_rate": 4.983755813906119e-05, "loss": 0.929, "step": 2212 }, { "epoch": 0.03924540342200774, "grad_norm": 2.78125, "learning_rate": 4.9837240127631094e-05, "loss": 0.8929, "step": 2214 }, { "epoch": 0.03928085545761931, "grad_norm": 2.921875, "learning_rate": 4.9836921806237416e-05, "loss": 0.8669, "step": 2216 }, { "epoch": 0.039316307493230875, "grad_norm": 2.765625, "learning_rate": 4.983660317488412e-05, "loss": 0.8984, "step": 2218 }, { "epoch": 0.03935175952884245, "grad_norm": 3.0625, "learning_rate": 4.98362842335752e-05, "loss": 0.9475, "step": 2220 }, { "epoch": 0.03938721156445402, "grad_norm": 2.875, "learning_rate": 4.983596498231462e-05, "loss": 0.8664, "step": 2222 }, { "epoch": 0.039422663600065584, "grad_norm": 2.625, "learning_rate": 4.983564542110637e-05, "loss": 0.89, "step": 2224 }, { "epoch": 0.03945811563567716, "grad_norm": 2.921875, "learning_rate": 4.9835325549954446e-05, "loss": 0.864, "step": 2226 }, { "epoch": 0.039493567671288726, "grad_norm": 2.953125, "learning_rate": 4.983500536886282e-05, "loss": 0.8795, "step": 2228 }, { "epoch": 0.039529019706900294, "grad_norm": 3.125, "learning_rate": 4.9834684877835506e-05, "loss": 0.894, "step": 2230 }, { "epoch": 0.03956447174251187, "grad_norm": 3.265625, "learning_rate": 4.9834364076876493e-05, "loss": 0.8737, "step": 2232 }, { "epoch": 0.039599923778123436, "grad_norm": 2.734375, "learning_rate": 4.983404296598979e-05, "loss": 0.866, "step": 2234 }, { "epoch": 0.039635375813735, "grad_norm": 3.46875, "learning_rate": 4.9833721545179415e-05, "loss": 0.8949, "step": 2236 }, { "epoch": 0.03967082784934658, "grad_norm": 2.78125, "learning_rate": 4.983339981444936e-05, "loss": 0.8475, "step": 2238 }, { "epoch": 0.039706279884958146, "grad_norm": 3.0, "learning_rate": 4.9833077773803635e-05, "loss": 0.868, "step": 2240 }, { "epoch": 0.03974173192056971, "grad_norm": 2.984375, "learning_rate": 4.983275542324629e-05, "loss": 0.8695, "step": 2242 }, { "epoch": 0.03977718395618128, "grad_norm": 2.671875, "learning_rate": 4.983243276278132e-05, "loss": 0.9194, "step": 2244 }, { "epoch": 0.039812635991792855, "grad_norm": 2.40625, "learning_rate": 4.9832109792412764e-05, "loss": 0.8362, "step": 2246 }, { "epoch": 0.03984808802740442, "grad_norm": 3.171875, "learning_rate": 4.983178651214465e-05, "loss": 0.9008, "step": 2248 }, { "epoch": 0.03988354006301599, "grad_norm": 3.25, "learning_rate": 4.983146292198101e-05, "loss": 0.8607, "step": 2250 }, { "epoch": 0.039918992098627565, "grad_norm": 3.5625, "learning_rate": 4.9831139021925886e-05, "loss": 0.9423, "step": 2252 }, { "epoch": 0.03995444413423913, "grad_norm": 2.984375, "learning_rate": 4.9830814811983316e-05, "loss": 0.9248, "step": 2254 }, { "epoch": 0.0399898961698507, "grad_norm": 3.0, "learning_rate": 4.983049029215735e-05, "loss": 0.9025, "step": 2256 }, { "epoch": 0.040025348205462274, "grad_norm": 2.84375, "learning_rate": 4.983016546245205e-05, "loss": 0.8686, "step": 2258 }, { "epoch": 0.04006080024107384, "grad_norm": 3.015625, "learning_rate": 4.982984032287145e-05, "loss": 0.8586, "step": 2260 }, { "epoch": 0.04009625227668541, "grad_norm": 2.875, "learning_rate": 4.98295148734196e-05, "loss": 0.8336, "step": 2262 }, { "epoch": 0.040131704312296984, "grad_norm": 2.984375, "learning_rate": 4.982918911410059e-05, "loss": 0.8966, "step": 2264 }, { "epoch": 0.04016715634790855, "grad_norm": 2.90625, "learning_rate": 4.982886304491847e-05, "loss": 0.8924, "step": 2266 }, { "epoch": 0.04020260838352012, "grad_norm": 2.953125, "learning_rate": 4.9828536665877304e-05, "loss": 0.8736, "step": 2268 }, { "epoch": 0.040238060419131694, "grad_norm": 3.015625, "learning_rate": 4.9828209976981174e-05, "loss": 0.864, "step": 2270 }, { "epoch": 0.04027351245474326, "grad_norm": 3.09375, "learning_rate": 4.9827882978234165e-05, "loss": 0.8624, "step": 2272 }, { "epoch": 0.04030896449035483, "grad_norm": 2.984375, "learning_rate": 4.9827555669640335e-05, "loss": 0.8635, "step": 2274 }, { "epoch": 0.0403444165259664, "grad_norm": 2.890625, "learning_rate": 4.982722805120379e-05, "loss": 0.8958, "step": 2276 }, { "epoch": 0.04037986856157797, "grad_norm": 2.765625, "learning_rate": 4.982690012292861e-05, "loss": 0.9135, "step": 2278 }, { "epoch": 0.04041532059718954, "grad_norm": 2.875, "learning_rate": 4.9826571884818886e-05, "loss": 0.8516, "step": 2280 }, { "epoch": 0.04045077263280111, "grad_norm": 2.96875, "learning_rate": 4.982624333687871e-05, "loss": 0.8412, "step": 2282 }, { "epoch": 0.04048622466841268, "grad_norm": 3.203125, "learning_rate": 4.982591447911219e-05, "loss": 0.9317, "step": 2284 }, { "epoch": 0.04052167670402425, "grad_norm": 2.8125, "learning_rate": 4.982558531152343e-05, "loss": 0.8704, "step": 2286 }, { "epoch": 0.040557128739635816, "grad_norm": 2.71875, "learning_rate": 4.982525583411654e-05, "loss": 0.8408, "step": 2288 }, { "epoch": 0.04059258077524739, "grad_norm": 3.09375, "learning_rate": 4.982492604689562e-05, "loss": 0.8701, "step": 2290 }, { "epoch": 0.04062803281085896, "grad_norm": 2.796875, "learning_rate": 4.98245959498648e-05, "loss": 0.8691, "step": 2292 }, { "epoch": 0.040663484846470525, "grad_norm": 2.796875, "learning_rate": 4.9824265543028195e-05, "loss": 0.8744, "step": 2294 }, { "epoch": 0.0406989368820821, "grad_norm": 3.015625, "learning_rate": 4.9823934826389916e-05, "loss": 0.863, "step": 2296 }, { "epoch": 0.04073438891769367, "grad_norm": 3.046875, "learning_rate": 4.982360379995411e-05, "loss": 0.8763, "step": 2298 }, { "epoch": 0.040769840953305235, "grad_norm": 2.734375, "learning_rate": 4.98232724637249e-05, "loss": 0.8696, "step": 2300 }, { "epoch": 0.04080529298891681, "grad_norm": 2.875, "learning_rate": 4.982294081770641e-05, "loss": 0.9117, "step": 2302 }, { "epoch": 0.04084074502452838, "grad_norm": 2.75, "learning_rate": 4.9822608861902795e-05, "loss": 0.9322, "step": 2304 }, { "epoch": 0.040876197060139945, "grad_norm": 2.6875, "learning_rate": 4.9822276596318195e-05, "loss": 0.8368, "step": 2306 }, { "epoch": 0.04091164909575152, "grad_norm": 3.0, "learning_rate": 4.982194402095675e-05, "loss": 0.8842, "step": 2308 }, { "epoch": 0.04094710113136309, "grad_norm": 2.90625, "learning_rate": 4.9821611135822615e-05, "loss": 0.8414, "step": 2310 }, { "epoch": 0.040982553166974654, "grad_norm": 2.59375, "learning_rate": 4.982127794091994e-05, "loss": 0.8953, "step": 2312 }, { "epoch": 0.04101800520258623, "grad_norm": 2.921875, "learning_rate": 4.982094443625289e-05, "loss": 0.9333, "step": 2314 }, { "epoch": 0.041053457238197796, "grad_norm": 2.8125, "learning_rate": 4.982061062182562e-05, "loss": 0.876, "step": 2316 }, { "epoch": 0.041088909273809364, "grad_norm": 2.890625, "learning_rate": 4.98202764976423e-05, "loss": 0.8729, "step": 2318 }, { "epoch": 0.04112436130942094, "grad_norm": 2.796875, "learning_rate": 4.9819942063707105e-05, "loss": 0.9321, "step": 2320 }, { "epoch": 0.041159813345032506, "grad_norm": 2.609375, "learning_rate": 4.981960732002419e-05, "loss": 0.867, "step": 2322 }, { "epoch": 0.041195265380644074, "grad_norm": 3.21875, "learning_rate": 4.9819272266597755e-05, "loss": 0.8381, "step": 2324 }, { "epoch": 0.04123071741625565, "grad_norm": 2.984375, "learning_rate": 4.981893690343197e-05, "loss": 0.847, "step": 2326 }, { "epoch": 0.041266169451867216, "grad_norm": 2.84375, "learning_rate": 4.981860123053102e-05, "loss": 0.8989, "step": 2328 }, { "epoch": 0.04130162148747878, "grad_norm": 2.59375, "learning_rate": 4.98182652478991e-05, "loss": 0.8794, "step": 2330 }, { "epoch": 0.04133707352309035, "grad_norm": 2.625, "learning_rate": 4.9817928955540395e-05, "loss": 0.8438, "step": 2332 }, { "epoch": 0.041372525558701925, "grad_norm": 2.90625, "learning_rate": 4.9817592353459104e-05, "loss": 0.8878, "step": 2334 }, { "epoch": 0.04140797759431349, "grad_norm": 2.703125, "learning_rate": 4.981725544165943e-05, "loss": 0.8578, "step": 2336 }, { "epoch": 0.04144342962992506, "grad_norm": 3.046875, "learning_rate": 4.981691822014558e-05, "loss": 0.8464, "step": 2338 }, { "epoch": 0.041478881665536635, "grad_norm": 2.734375, "learning_rate": 4.981658068892176e-05, "loss": 0.8744, "step": 2340 }, { "epoch": 0.0415143337011482, "grad_norm": 2.921875, "learning_rate": 4.9816242847992176e-05, "loss": 0.8701, "step": 2342 }, { "epoch": 0.04154978573675977, "grad_norm": 2.75, "learning_rate": 4.981590469736106e-05, "loss": 0.9046, "step": 2344 }, { "epoch": 0.041585237772371345, "grad_norm": 3.1875, "learning_rate": 4.981556623703262e-05, "loss": 0.841, "step": 2346 }, { "epoch": 0.04162068980798291, "grad_norm": 3.046875, "learning_rate": 4.981522746701107e-05, "loss": 0.9096, "step": 2348 }, { "epoch": 0.04165614184359448, "grad_norm": 2.703125, "learning_rate": 4.981488838730066e-05, "loss": 0.8283, "step": 2350 }, { "epoch": 0.041691593879206054, "grad_norm": 2.765625, "learning_rate": 4.9814548997905616e-05, "loss": 0.8977, "step": 2352 }, { "epoch": 0.04172704591481762, "grad_norm": 3.28125, "learning_rate": 4.981420929883016e-05, "loss": 0.8914, "step": 2354 }, { "epoch": 0.04176249795042919, "grad_norm": 2.96875, "learning_rate": 4.981386929007854e-05, "loss": 0.8514, "step": 2356 }, { "epoch": 0.041797949986040764, "grad_norm": 2.828125, "learning_rate": 4.981352897165501e-05, "loss": 0.8564, "step": 2358 }, { "epoch": 0.04183340202165233, "grad_norm": 3.015625, "learning_rate": 4.98131883435638e-05, "loss": 0.9608, "step": 2360 }, { "epoch": 0.0418688540572639, "grad_norm": 2.984375, "learning_rate": 4.981284740580916e-05, "loss": 0.9024, "step": 2362 }, { "epoch": 0.041904306092875473, "grad_norm": 2.921875, "learning_rate": 4.9812506158395365e-05, "loss": 0.906, "step": 2364 }, { "epoch": 0.04193975812848704, "grad_norm": 2.8125, "learning_rate": 4.981216460132666e-05, "loss": 0.8399, "step": 2366 }, { "epoch": 0.04197521016409861, "grad_norm": 2.9375, "learning_rate": 4.9811822734607304e-05, "loss": 0.8214, "step": 2368 }, { "epoch": 0.042010662199710176, "grad_norm": 3.0625, "learning_rate": 4.981148055824157e-05, "loss": 0.8488, "step": 2370 }, { "epoch": 0.04204611423532175, "grad_norm": 3.09375, "learning_rate": 4.981113807223372e-05, "loss": 0.9123, "step": 2372 }, { "epoch": 0.04208156627093332, "grad_norm": 2.96875, "learning_rate": 4.981079527658804e-05, "loss": 0.8889, "step": 2374 }, { "epoch": 0.042117018306544886, "grad_norm": 2.5, "learning_rate": 4.981045217130881e-05, "loss": 0.871, "step": 2376 }, { "epoch": 0.04215247034215646, "grad_norm": 2.9375, "learning_rate": 4.9810108756400294e-05, "loss": 0.8747, "step": 2378 }, { "epoch": 0.04218792237776803, "grad_norm": 3.015625, "learning_rate": 4.980976503186679e-05, "loss": 0.831, "step": 2380 }, { "epoch": 0.042223374413379595, "grad_norm": 2.53125, "learning_rate": 4.980942099771259e-05, "loss": 0.8865, "step": 2382 }, { "epoch": 0.04225882644899117, "grad_norm": 3.125, "learning_rate": 4.980907665394198e-05, "loss": 0.8713, "step": 2384 }, { "epoch": 0.04229427848460274, "grad_norm": 3.1875, "learning_rate": 4.980873200055927e-05, "loss": 0.8802, "step": 2386 }, { "epoch": 0.042329730520214305, "grad_norm": 3.265625, "learning_rate": 4.980838703756874e-05, "loss": 0.8807, "step": 2388 }, { "epoch": 0.04236518255582588, "grad_norm": 3.015625, "learning_rate": 4.980804176497471e-05, "loss": 0.8972, "step": 2390 }, { "epoch": 0.04240063459143745, "grad_norm": 2.84375, "learning_rate": 4.980769618278149e-05, "loss": 0.8419, "step": 2392 }, { "epoch": 0.042436086627049015, "grad_norm": 2.75, "learning_rate": 4.980735029099338e-05, "loss": 0.8692, "step": 2394 }, { "epoch": 0.04247153866266059, "grad_norm": 2.609375, "learning_rate": 4.9807004089614714e-05, "loss": 0.8991, "step": 2396 }, { "epoch": 0.04250699069827216, "grad_norm": 2.890625, "learning_rate": 4.98066575786498e-05, "loss": 0.9319, "step": 2398 }, { "epoch": 0.042542442733883724, "grad_norm": 2.734375, "learning_rate": 4.980631075810297e-05, "loss": 0.8623, "step": 2400 }, { "epoch": 0.0425778947694953, "grad_norm": 2.8125, "learning_rate": 4.980596362797855e-05, "loss": 0.8915, "step": 2402 }, { "epoch": 0.042613346805106866, "grad_norm": 2.703125, "learning_rate": 4.9805616188280855e-05, "loss": 0.843, "step": 2404 }, { "epoch": 0.042648798840718434, "grad_norm": 3.1875, "learning_rate": 4.980526843901425e-05, "loss": 0.9275, "step": 2406 }, { "epoch": 0.04268425087633001, "grad_norm": 3.28125, "learning_rate": 4.9804920380183064e-05, "loss": 0.8761, "step": 2408 }, { "epoch": 0.042719702911941576, "grad_norm": 2.859375, "learning_rate": 4.980457201179163e-05, "loss": 0.8647, "step": 2410 }, { "epoch": 0.042755154947553144, "grad_norm": 3.171875, "learning_rate": 4.980422333384431e-05, "loss": 0.8317, "step": 2412 }, { "epoch": 0.04279060698316471, "grad_norm": 2.625, "learning_rate": 4.9803874346345445e-05, "loss": 0.8614, "step": 2414 }, { "epoch": 0.042826059018776286, "grad_norm": 3.0625, "learning_rate": 4.980352504929939e-05, "loss": 0.8696, "step": 2416 }, { "epoch": 0.04286151105438785, "grad_norm": 3.203125, "learning_rate": 4.980317544271052e-05, "loss": 0.8983, "step": 2418 }, { "epoch": 0.04289696308999942, "grad_norm": 2.96875, "learning_rate": 4.980282552658318e-05, "loss": 0.8849, "step": 2420 }, { "epoch": 0.042932415125610995, "grad_norm": 2.6875, "learning_rate": 4.9802475300921744e-05, "loss": 0.8875, "step": 2422 }, { "epoch": 0.04296786716122256, "grad_norm": 2.84375, "learning_rate": 4.9802124765730586e-05, "loss": 0.8473, "step": 2424 }, { "epoch": 0.04300331919683413, "grad_norm": 2.953125, "learning_rate": 4.980177392101407e-05, "loss": 0.8613, "step": 2426 }, { "epoch": 0.043038771232445705, "grad_norm": 3.03125, "learning_rate": 4.980142276677658e-05, "loss": 0.8858, "step": 2428 }, { "epoch": 0.04307422326805727, "grad_norm": 2.796875, "learning_rate": 4.9801071303022514e-05, "loss": 0.8649, "step": 2430 }, { "epoch": 0.04310967530366884, "grad_norm": 2.84375, "learning_rate": 4.9800719529756236e-05, "loss": 0.8845, "step": 2432 }, { "epoch": 0.043145127339280415, "grad_norm": 2.734375, "learning_rate": 4.980036744698214e-05, "loss": 0.8627, "step": 2434 }, { "epoch": 0.04318057937489198, "grad_norm": 3.171875, "learning_rate": 4.9800015054704625e-05, "loss": 0.878, "step": 2436 }, { "epoch": 0.04321603141050355, "grad_norm": 3.3125, "learning_rate": 4.979966235292809e-05, "loss": 0.8916, "step": 2438 }, { "epoch": 0.043251483446115124, "grad_norm": 2.84375, "learning_rate": 4.979930934165693e-05, "loss": 0.8307, "step": 2440 }, { "epoch": 0.04328693548172669, "grad_norm": 2.75, "learning_rate": 4.979895602089556e-05, "loss": 0.8751, "step": 2442 }, { "epoch": 0.04332238751733826, "grad_norm": 2.9375, "learning_rate": 4.979860239064838e-05, "loss": 0.8464, "step": 2444 }, { "epoch": 0.043357839552949834, "grad_norm": 3.375, "learning_rate": 4.9798248450919804e-05, "loss": 0.8977, "step": 2446 }, { "epoch": 0.0433932915885614, "grad_norm": 2.828125, "learning_rate": 4.9797894201714266e-05, "loss": 0.8583, "step": 2448 }, { "epoch": 0.04342874362417297, "grad_norm": 2.84375, "learning_rate": 4.979753964303616e-05, "loss": 0.9041, "step": 2450 }, { "epoch": 0.043464195659784544, "grad_norm": 3.390625, "learning_rate": 4.9797184774889926e-05, "loss": 0.8823, "step": 2452 }, { "epoch": 0.04349964769539611, "grad_norm": 2.765625, "learning_rate": 4.979682959728e-05, "loss": 0.8753, "step": 2454 }, { "epoch": 0.04353509973100768, "grad_norm": 2.84375, "learning_rate": 4.97964741102108e-05, "loss": 0.9116, "step": 2456 }, { "epoch": 0.043570551766619246, "grad_norm": 2.765625, "learning_rate": 4.9796118313686765e-05, "loss": 0.8646, "step": 2458 }, { "epoch": 0.04360600380223082, "grad_norm": 2.859375, "learning_rate": 4.979576220771234e-05, "loss": 0.852, "step": 2460 }, { "epoch": 0.04364145583784239, "grad_norm": 3.015625, "learning_rate": 4.979540579229197e-05, "loss": 0.8815, "step": 2462 }, { "epoch": 0.043676907873453956, "grad_norm": 3.078125, "learning_rate": 4.979504906743009e-05, "loss": 0.8904, "step": 2464 }, { "epoch": 0.04371235990906553, "grad_norm": 2.578125, "learning_rate": 4.9794692033131176e-05, "loss": 0.8501, "step": 2466 }, { "epoch": 0.0437478119446771, "grad_norm": 2.890625, "learning_rate": 4.979433468939966e-05, "loss": 0.8315, "step": 2468 }, { "epoch": 0.043783263980288666, "grad_norm": 2.875, "learning_rate": 4.9793977036240015e-05, "loss": 0.8595, "step": 2470 }, { "epoch": 0.04381871601590024, "grad_norm": 2.84375, "learning_rate": 4.979361907365671e-05, "loss": 0.8838, "step": 2472 }, { "epoch": 0.04385416805151181, "grad_norm": 2.765625, "learning_rate": 4.979326080165419e-05, "loss": 0.8831, "step": 2474 }, { "epoch": 0.043889620087123375, "grad_norm": 2.5, "learning_rate": 4.9792902220236936e-05, "loss": 0.8185, "step": 2476 }, { "epoch": 0.04392507212273495, "grad_norm": 2.921875, "learning_rate": 4.979254332940944e-05, "loss": 0.8773, "step": 2478 }, { "epoch": 0.04396052415834652, "grad_norm": 3.0625, "learning_rate": 4.9792184129176153e-05, "loss": 0.8821, "step": 2480 }, { "epoch": 0.043995976193958085, "grad_norm": 2.859375, "learning_rate": 4.979182461954158e-05, "loss": 0.8751, "step": 2482 }, { "epoch": 0.04403142822956966, "grad_norm": 3.25, "learning_rate": 4.97914648005102e-05, "loss": 0.9332, "step": 2484 }, { "epoch": 0.04406688026518123, "grad_norm": 2.890625, "learning_rate": 4.979110467208651e-05, "loss": 0.8437, "step": 2486 }, { "epoch": 0.044102332300792794, "grad_norm": 3.1875, "learning_rate": 4.9790744234274986e-05, "loss": 0.8874, "step": 2488 }, { "epoch": 0.04413778433640437, "grad_norm": 3.015625, "learning_rate": 4.979038348708014e-05, "loss": 0.8942, "step": 2490 }, { "epoch": 0.04417323637201594, "grad_norm": 3.25, "learning_rate": 4.9790022430506463e-05, "loss": 0.8691, "step": 2492 }, { "epoch": 0.044208688407627504, "grad_norm": 2.84375, "learning_rate": 4.978966106455848e-05, "loss": 0.9051, "step": 2494 }, { "epoch": 0.04424414044323908, "grad_norm": 2.984375, "learning_rate": 4.9789299389240694e-05, "loss": 0.8664, "step": 2496 }, { "epoch": 0.044279592478850646, "grad_norm": 2.984375, "learning_rate": 4.9788937404557615e-05, "loss": 0.8914, "step": 2498 }, { "epoch": 0.044315044514462214, "grad_norm": 2.921875, "learning_rate": 4.9788575110513755e-05, "loss": 0.8917, "step": 2500 }, { "epoch": 0.04435049655007378, "grad_norm": 2.890625, "learning_rate": 4.978821250711364e-05, "loss": 0.8945, "step": 2502 }, { "epoch": 0.044385948585685356, "grad_norm": 2.671875, "learning_rate": 4.978784959436179e-05, "loss": 0.8685, "step": 2504 }, { "epoch": 0.04442140062129692, "grad_norm": 2.859375, "learning_rate": 4.9787486372262746e-05, "loss": 0.8572, "step": 2506 }, { "epoch": 0.04445685265690849, "grad_norm": 2.703125, "learning_rate": 4.978712284082104e-05, "loss": 0.8378, "step": 2508 }, { "epoch": 0.044492304692520065, "grad_norm": 3.015625, "learning_rate": 4.9786759000041197e-05, "loss": 0.8705, "step": 2510 }, { "epoch": 0.04452775672813163, "grad_norm": 3.234375, "learning_rate": 4.978639484992777e-05, "loss": 0.874, "step": 2512 }, { "epoch": 0.0445632087637432, "grad_norm": 3.078125, "learning_rate": 4.9786030390485295e-05, "loss": 0.9042, "step": 2514 }, { "epoch": 0.044598660799354775, "grad_norm": 3.171875, "learning_rate": 4.9785665621718325e-05, "loss": 0.8862, "step": 2516 }, { "epoch": 0.04463411283496634, "grad_norm": 2.578125, "learning_rate": 4.978530054363141e-05, "loss": 0.8207, "step": 2518 }, { "epoch": 0.04466956487057791, "grad_norm": 2.828125, "learning_rate": 4.97849351562291e-05, "loss": 0.8353, "step": 2520 }, { "epoch": 0.044705016906189485, "grad_norm": 2.875, "learning_rate": 4.978456945951597e-05, "loss": 0.8698, "step": 2522 }, { "epoch": 0.04474046894180105, "grad_norm": 2.6875, "learning_rate": 4.978420345349657e-05, "loss": 0.9051, "step": 2524 }, { "epoch": 0.04477592097741262, "grad_norm": 3.140625, "learning_rate": 4.978383713817548e-05, "loss": 0.8803, "step": 2526 }, { "epoch": 0.044811373013024194, "grad_norm": 2.71875, "learning_rate": 4.9783470513557255e-05, "loss": 0.8891, "step": 2528 }, { "epoch": 0.04484682504863576, "grad_norm": 3.03125, "learning_rate": 4.978310357964648e-05, "loss": 0.8661, "step": 2530 }, { "epoch": 0.04488227708424733, "grad_norm": 3.25, "learning_rate": 4.978273633644775e-05, "loss": 0.8592, "step": 2532 }, { "epoch": 0.044917729119858904, "grad_norm": 3.125, "learning_rate": 4.978236878396562e-05, "loss": 0.8831, "step": 2534 }, { "epoch": 0.04495318115547047, "grad_norm": 2.8125, "learning_rate": 4.978200092220469e-05, "loss": 0.9059, "step": 2536 }, { "epoch": 0.04498863319108204, "grad_norm": 3.1875, "learning_rate": 4.978163275116955e-05, "loss": 0.9341, "step": 2538 }, { "epoch": 0.045024085226693614, "grad_norm": 2.625, "learning_rate": 4.978126427086479e-05, "loss": 0.8293, "step": 2540 }, { "epoch": 0.04505953726230518, "grad_norm": 3.09375, "learning_rate": 4.978089548129502e-05, "loss": 0.8728, "step": 2542 }, { "epoch": 0.04509498929791675, "grad_norm": 2.515625, "learning_rate": 4.9780526382464846e-05, "loss": 0.8605, "step": 2544 }, { "epoch": 0.045130441333528316, "grad_norm": 2.859375, "learning_rate": 4.978015697437884e-05, "loss": 0.9002, "step": 2546 }, { "epoch": 0.04516589336913989, "grad_norm": 2.8125, "learning_rate": 4.977978725704165e-05, "loss": 0.9107, "step": 2548 }, { "epoch": 0.04520134540475146, "grad_norm": 2.875, "learning_rate": 4.977941723045788e-05, "loss": 0.8659, "step": 2550 }, { "epoch": 0.045236797440363026, "grad_norm": 2.546875, "learning_rate": 4.9779046894632145e-05, "loss": 0.8158, "step": 2552 }, { "epoch": 0.0452722494759746, "grad_norm": 2.953125, "learning_rate": 4.9778676249569055e-05, "loss": 0.8675, "step": 2554 }, { "epoch": 0.04530770151158617, "grad_norm": 3.15625, "learning_rate": 4.977830529527325e-05, "loss": 0.8723, "step": 2556 }, { "epoch": 0.045343153547197736, "grad_norm": 3.0, "learning_rate": 4.977793403174936e-05, "loss": 0.8759, "step": 2558 }, { "epoch": 0.04537860558280931, "grad_norm": 2.921875, "learning_rate": 4.9777562459002016e-05, "loss": 0.8769, "step": 2560 }, { "epoch": 0.04541405761842088, "grad_norm": 2.65625, "learning_rate": 4.977719057703585e-05, "loss": 0.8584, "step": 2562 }, { "epoch": 0.045449509654032445, "grad_norm": 2.84375, "learning_rate": 4.977681838585551e-05, "loss": 0.8723, "step": 2564 }, { "epoch": 0.04548496168964402, "grad_norm": 2.84375, "learning_rate": 4.977644588546563e-05, "loss": 0.8643, "step": 2566 }, { "epoch": 0.04552041372525559, "grad_norm": 3.125, "learning_rate": 4.977607307587087e-05, "loss": 0.9124, "step": 2568 }, { "epoch": 0.045555865760867155, "grad_norm": 2.921875, "learning_rate": 4.977569995707588e-05, "loss": 0.8738, "step": 2570 }, { "epoch": 0.04559131779647873, "grad_norm": 2.78125, "learning_rate": 4.977532652908531e-05, "loss": 0.8377, "step": 2572 }, { "epoch": 0.0456267698320903, "grad_norm": 3.015625, "learning_rate": 4.977495279190383e-05, "loss": 0.8863, "step": 2574 }, { "epoch": 0.045662221867701865, "grad_norm": 2.8125, "learning_rate": 4.9774578745536095e-05, "loss": 0.8753, "step": 2576 }, { "epoch": 0.04569767390331344, "grad_norm": 2.6875, "learning_rate": 4.9774204389986776e-05, "loss": 0.8668, "step": 2578 }, { "epoch": 0.04573312593892501, "grad_norm": 2.46875, "learning_rate": 4.977382972526056e-05, "loss": 0.8861, "step": 2580 }, { "epoch": 0.045768577974536574, "grad_norm": 2.796875, "learning_rate": 4.9773454751362095e-05, "loss": 0.8983, "step": 2582 }, { "epoch": 0.04580403001014814, "grad_norm": 3.0, "learning_rate": 4.977307946829608e-05, "loss": 0.8544, "step": 2584 }, { "epoch": 0.045839482045759716, "grad_norm": 3.171875, "learning_rate": 4.977270387606719e-05, "loss": 0.8897, "step": 2586 }, { "epoch": 0.045874934081371284, "grad_norm": 3.171875, "learning_rate": 4.977232797468012e-05, "loss": 0.8848, "step": 2588 }, { "epoch": 0.04591038611698285, "grad_norm": 2.953125, "learning_rate": 4.977195176413955e-05, "loss": 0.9034, "step": 2590 }, { "epoch": 0.045945838152594426, "grad_norm": 2.796875, "learning_rate": 4.9771575244450186e-05, "loss": 0.8224, "step": 2592 }, { "epoch": 0.045981290188205994, "grad_norm": 2.984375, "learning_rate": 4.977119841561672e-05, "loss": 0.8883, "step": 2594 }, { "epoch": 0.04601674222381756, "grad_norm": 2.390625, "learning_rate": 4.9770821277643867e-05, "loss": 0.8868, "step": 2596 }, { "epoch": 0.046052194259429136, "grad_norm": 2.96875, "learning_rate": 4.977044383053631e-05, "loss": 0.8422, "step": 2598 }, { "epoch": 0.0460876462950407, "grad_norm": 2.828125, "learning_rate": 4.977006607429878e-05, "loss": 0.8772, "step": 2600 }, { "epoch": 0.04612309833065227, "grad_norm": 3.015625, "learning_rate": 4.976968800893598e-05, "loss": 0.8761, "step": 2602 }, { "epoch": 0.046158550366263845, "grad_norm": 2.78125, "learning_rate": 4.9769309634452644e-05, "loss": 0.8523, "step": 2604 }, { "epoch": 0.04619400240187541, "grad_norm": 2.921875, "learning_rate": 4.976893095085347e-05, "loss": 0.8927, "step": 2606 }, { "epoch": 0.04622945443748698, "grad_norm": 2.859375, "learning_rate": 4.976855195814321e-05, "loss": 0.8732, "step": 2608 }, { "epoch": 0.046264906473098555, "grad_norm": 2.828125, "learning_rate": 4.976817265632657e-05, "loss": 0.8469, "step": 2610 }, { "epoch": 0.04630035850871012, "grad_norm": 2.703125, "learning_rate": 4.9767793045408293e-05, "loss": 0.8568, "step": 2612 }, { "epoch": 0.04633581054432169, "grad_norm": 2.84375, "learning_rate": 4.976741312539313e-05, "loss": 0.8712, "step": 2614 }, { "epoch": 0.046371262579933265, "grad_norm": 2.96875, "learning_rate": 4.976703289628579e-05, "loss": 0.8554, "step": 2616 }, { "epoch": 0.04640671461554483, "grad_norm": 2.796875, "learning_rate": 4.9766652358091054e-05, "loss": 0.8811, "step": 2618 }, { "epoch": 0.0464421666511564, "grad_norm": 2.984375, "learning_rate": 4.976627151081365e-05, "loss": 0.9108, "step": 2620 }, { "epoch": 0.046477618686767974, "grad_norm": 2.671875, "learning_rate": 4.9765890354458335e-05, "loss": 0.8402, "step": 2622 }, { "epoch": 0.04651307072237954, "grad_norm": 2.9375, "learning_rate": 4.976550888902987e-05, "loss": 0.8537, "step": 2624 }, { "epoch": 0.04654852275799111, "grad_norm": 2.671875, "learning_rate": 4.976512711453301e-05, "loss": 0.8806, "step": 2626 }, { "epoch": 0.04658397479360268, "grad_norm": 2.9375, "learning_rate": 4.976474503097252e-05, "loss": 0.8581, "step": 2628 }, { "epoch": 0.04661942682921425, "grad_norm": 2.734375, "learning_rate": 4.976436263835317e-05, "loss": 0.8177, "step": 2630 }, { "epoch": 0.04665487886482582, "grad_norm": 2.828125, "learning_rate": 4.976397993667974e-05, "loss": 0.8971, "step": 2632 }, { "epoch": 0.046690330900437386, "grad_norm": 3.046875, "learning_rate": 4.976359692595699e-05, "loss": 0.8278, "step": 2634 }, { "epoch": 0.04672578293604896, "grad_norm": 3.046875, "learning_rate": 4.976321360618972e-05, "loss": 0.8896, "step": 2636 }, { "epoch": 0.04676123497166053, "grad_norm": 2.859375, "learning_rate": 4.976282997738269e-05, "loss": 0.8968, "step": 2638 }, { "epoch": 0.046796687007272096, "grad_norm": 2.890625, "learning_rate": 4.97624460395407e-05, "loss": 0.8867, "step": 2640 }, { "epoch": 0.04683213904288367, "grad_norm": 3.078125, "learning_rate": 4.9762061792668546e-05, "loss": 0.8456, "step": 2642 }, { "epoch": 0.04686759107849524, "grad_norm": 2.703125, "learning_rate": 4.976167723677102e-05, "loss": 0.8924, "step": 2644 }, { "epoch": 0.046903043114106806, "grad_norm": 3.0, "learning_rate": 4.976129237185291e-05, "loss": 0.8889, "step": 2646 }, { "epoch": 0.04693849514971838, "grad_norm": 3.328125, "learning_rate": 4.976090719791904e-05, "loss": 0.8419, "step": 2648 }, { "epoch": 0.04697394718532995, "grad_norm": 2.765625, "learning_rate": 4.97605217149742e-05, "loss": 0.8749, "step": 2650 }, { "epoch": 0.047009399220941515, "grad_norm": 3.203125, "learning_rate": 4.97601359230232e-05, "loss": 0.8336, "step": 2652 }, { "epoch": 0.04704485125655309, "grad_norm": 3.21875, "learning_rate": 4.9759749822070864e-05, "loss": 0.8508, "step": 2654 }, { "epoch": 0.04708030329216466, "grad_norm": 2.8125, "learning_rate": 4.975936341212202e-05, "loss": 0.8548, "step": 2656 }, { "epoch": 0.047115755327776225, "grad_norm": 2.828125, "learning_rate": 4.9758976693181464e-05, "loss": 0.8688, "step": 2658 }, { "epoch": 0.0471512073633878, "grad_norm": 2.8125, "learning_rate": 4.9758589665254044e-05, "loss": 0.9214, "step": 2660 }, { "epoch": 0.04718665939899937, "grad_norm": 2.734375, "learning_rate": 4.975820232834457e-05, "loss": 0.8447, "step": 2662 }, { "epoch": 0.047222111434610935, "grad_norm": 2.640625, "learning_rate": 4.97578146824579e-05, "loss": 0.8716, "step": 2664 }, { "epoch": 0.04725756347022251, "grad_norm": 2.65625, "learning_rate": 4.975742672759885e-05, "loss": 0.86, "step": 2666 }, { "epoch": 0.04729301550583408, "grad_norm": 3.03125, "learning_rate": 4.975703846377228e-05, "loss": 0.8478, "step": 2668 }, { "epoch": 0.047328467541445644, "grad_norm": 2.59375, "learning_rate": 4.975664989098302e-05, "loss": 0.842, "step": 2670 }, { "epoch": 0.04736391957705721, "grad_norm": 2.546875, "learning_rate": 4.975626100923593e-05, "loss": 0.8574, "step": 2672 }, { "epoch": 0.047399371612668786, "grad_norm": 2.8125, "learning_rate": 4.975587181853586e-05, "loss": 0.8438, "step": 2674 }, { "epoch": 0.047434823648280354, "grad_norm": 2.53125, "learning_rate": 4.9755482318887656e-05, "loss": 0.8721, "step": 2676 }, { "epoch": 0.04747027568389192, "grad_norm": 2.953125, "learning_rate": 4.9755092510296204e-05, "loss": 0.8788, "step": 2678 }, { "epoch": 0.047505727719503496, "grad_norm": 2.84375, "learning_rate": 4.975470239276634e-05, "loss": 0.8623, "step": 2680 }, { "epoch": 0.047541179755115064, "grad_norm": 2.75, "learning_rate": 4.975431196630296e-05, "loss": 0.8361, "step": 2682 }, { "epoch": 0.04757663179072663, "grad_norm": 3.03125, "learning_rate": 4.975392123091091e-05, "loss": 0.8689, "step": 2684 }, { "epoch": 0.047612083826338206, "grad_norm": 2.75, "learning_rate": 4.975353018659508e-05, "loss": 0.8697, "step": 2686 }, { "epoch": 0.04764753586194977, "grad_norm": 2.59375, "learning_rate": 4.975313883336036e-05, "loss": 0.8277, "step": 2688 }, { "epoch": 0.04768298789756134, "grad_norm": 3.0, "learning_rate": 4.9752747171211614e-05, "loss": 0.8991, "step": 2690 }, { "epoch": 0.047718439933172915, "grad_norm": 2.640625, "learning_rate": 4.9752355200153735e-05, "loss": 0.8643, "step": 2692 }, { "epoch": 0.04775389196878448, "grad_norm": 2.84375, "learning_rate": 4.975196292019163e-05, "loss": 0.8467, "step": 2694 }, { "epoch": 0.04778934400439605, "grad_norm": 3.328125, "learning_rate": 4.9751570331330176e-05, "loss": 0.8837, "step": 2696 }, { "epoch": 0.047824796040007625, "grad_norm": 2.671875, "learning_rate": 4.975117743357428e-05, "loss": 0.8534, "step": 2698 }, { "epoch": 0.04786024807561919, "grad_norm": 2.921875, "learning_rate": 4.975078422692884e-05, "loss": 0.8852, "step": 2700 }, { "epoch": 0.04789570011123076, "grad_norm": 2.921875, "learning_rate": 4.975039071139878e-05, "loss": 0.886, "step": 2702 }, { "epoch": 0.047931152146842335, "grad_norm": 2.828125, "learning_rate": 4.9749996886988994e-05, "loss": 0.88, "step": 2704 }, { "epoch": 0.0479666041824539, "grad_norm": 3.015625, "learning_rate": 4.974960275370439e-05, "loss": 0.8631, "step": 2706 }, { "epoch": 0.04800205621806547, "grad_norm": 3.0625, "learning_rate": 4.9749208311549916e-05, "loss": 0.9029, "step": 2708 }, { "epoch": 0.048037508253677044, "grad_norm": 2.703125, "learning_rate": 4.974881356053047e-05, "loss": 0.8651, "step": 2710 }, { "epoch": 0.04807296028928861, "grad_norm": 2.578125, "learning_rate": 4.974841850065098e-05, "loss": 0.8155, "step": 2712 }, { "epoch": 0.04810841232490018, "grad_norm": 2.921875, "learning_rate": 4.97480231319164e-05, "loss": 0.8669, "step": 2714 }, { "epoch": 0.04814386436051175, "grad_norm": 3.125, "learning_rate": 4.974762745433163e-05, "loss": 0.8886, "step": 2716 }, { "epoch": 0.04817931639612332, "grad_norm": 3.0, "learning_rate": 4.9747231467901625e-05, "loss": 0.868, "step": 2718 }, { "epoch": 0.04821476843173489, "grad_norm": 2.984375, "learning_rate": 4.974683517263134e-05, "loss": 0.8811, "step": 2720 }, { "epoch": 0.04825022046734646, "grad_norm": 3.109375, "learning_rate": 4.974643856852569e-05, "loss": 0.8448, "step": 2722 }, { "epoch": 0.04828567250295803, "grad_norm": 3.609375, "learning_rate": 4.9746041655589644e-05, "loss": 0.8653, "step": 2724 }, { "epoch": 0.0483211245385696, "grad_norm": 2.875, "learning_rate": 4.974564443382815e-05, "loss": 0.8171, "step": 2726 }, { "epoch": 0.048356576574181166, "grad_norm": 2.6875, "learning_rate": 4.974524690324618e-05, "loss": 0.8275, "step": 2728 }, { "epoch": 0.04839202860979274, "grad_norm": 3.109375, "learning_rate": 4.974484906384867e-05, "loss": 0.8725, "step": 2730 }, { "epoch": 0.04842748064540431, "grad_norm": 2.703125, "learning_rate": 4.9744450915640605e-05, "loss": 0.8374, "step": 2732 }, { "epoch": 0.048462932681015876, "grad_norm": 2.78125, "learning_rate": 4.9744052458626944e-05, "loss": 0.9077, "step": 2734 }, { "epoch": 0.04849838471662745, "grad_norm": 2.71875, "learning_rate": 4.9743653692812657e-05, "loss": 0.8366, "step": 2736 }, { "epoch": 0.04853383675223902, "grad_norm": 3.0, "learning_rate": 4.974325461820273e-05, "loss": 0.8669, "step": 2738 }, { "epoch": 0.048569288787850586, "grad_norm": 2.984375, "learning_rate": 4.9742855234802146e-05, "loss": 0.904, "step": 2740 }, { "epoch": 0.04860474082346216, "grad_norm": 2.8125, "learning_rate": 4.9742455542615876e-05, "loss": 0.854, "step": 2742 }, { "epoch": 0.04864019285907373, "grad_norm": 2.828125, "learning_rate": 4.9742055541648916e-05, "loss": 0.8762, "step": 2744 }, { "epoch": 0.048675644894685295, "grad_norm": 2.71875, "learning_rate": 4.9741655231906246e-05, "loss": 0.8533, "step": 2746 }, { "epoch": 0.04871109693029687, "grad_norm": 2.65625, "learning_rate": 4.9741254613392887e-05, "loss": 0.8636, "step": 2748 }, { "epoch": 0.04874654896590844, "grad_norm": 2.75, "learning_rate": 4.974085368611381e-05, "loss": 0.8573, "step": 2750 }, { "epoch": 0.048782001001520005, "grad_norm": 2.8125, "learning_rate": 4.9740452450074044e-05, "loss": 0.9177, "step": 2752 }, { "epoch": 0.04881745303713157, "grad_norm": 2.75, "learning_rate": 4.9740050905278577e-05, "loss": 0.8821, "step": 2754 }, { "epoch": 0.04885290507274315, "grad_norm": 2.640625, "learning_rate": 4.973964905173243e-05, "loss": 0.8554, "step": 2756 }, { "epoch": 0.048888357108354714, "grad_norm": 2.6875, "learning_rate": 4.973924688944061e-05, "loss": 0.8452, "step": 2758 }, { "epoch": 0.04892380914396628, "grad_norm": 2.953125, "learning_rate": 4.973884441840816e-05, "loss": 0.8852, "step": 2760 }, { "epoch": 0.048959261179577857, "grad_norm": 2.828125, "learning_rate": 4.9738441638640064e-05, "loss": 0.8495, "step": 2762 }, { "epoch": 0.048994713215189424, "grad_norm": 2.875, "learning_rate": 4.973803855014138e-05, "loss": 0.883, "step": 2764 }, { "epoch": 0.04903016525080099, "grad_norm": 3.109375, "learning_rate": 4.973763515291713e-05, "loss": 0.8985, "step": 2766 }, { "epoch": 0.049065617286412566, "grad_norm": 3.09375, "learning_rate": 4.9737231446972334e-05, "loss": 0.8508, "step": 2768 }, { "epoch": 0.049101069322024134, "grad_norm": 3.140625, "learning_rate": 4.973682743231205e-05, "loss": 0.8939, "step": 2770 }, { "epoch": 0.0491365213576357, "grad_norm": 3.21875, "learning_rate": 4.973642310894131e-05, "loss": 0.8824, "step": 2772 }, { "epoch": 0.049171973393247276, "grad_norm": 3.109375, "learning_rate": 4.9736018476865165e-05, "loss": 0.9063, "step": 2774 }, { "epoch": 0.04920742542885884, "grad_norm": 2.75, "learning_rate": 4.973561353608866e-05, "loss": 0.8409, "step": 2776 }, { "epoch": 0.04924287746447041, "grad_norm": 2.984375, "learning_rate": 4.973520828661684e-05, "loss": 0.8812, "step": 2778 }, { "epoch": 0.049278329500081985, "grad_norm": 2.859375, "learning_rate": 4.973480272845479e-05, "loss": 0.8498, "step": 2780 }, { "epoch": 0.04931378153569355, "grad_norm": 2.6875, "learning_rate": 4.9734396861607543e-05, "loss": 0.8471, "step": 2782 }, { "epoch": 0.04934923357130512, "grad_norm": 2.65625, "learning_rate": 4.973399068608018e-05, "loss": 0.8652, "step": 2784 }, { "epoch": 0.049384685606916695, "grad_norm": 2.828125, "learning_rate": 4.973358420187776e-05, "loss": 0.8479, "step": 2786 }, { "epoch": 0.04942013764252826, "grad_norm": 2.5625, "learning_rate": 4.973317740900536e-05, "loss": 0.8945, "step": 2788 }, { "epoch": 0.04945558967813983, "grad_norm": 2.640625, "learning_rate": 4.973277030746806e-05, "loss": 0.85, "step": 2790 }, { "epoch": 0.049491041713751405, "grad_norm": 3.09375, "learning_rate": 4.973236289727094e-05, "loss": 0.8745, "step": 2792 }, { "epoch": 0.04952649374936297, "grad_norm": 2.515625, "learning_rate": 4.9731955178419075e-05, "loss": 0.842, "step": 2794 }, { "epoch": 0.04956194578497454, "grad_norm": 2.859375, "learning_rate": 4.9731547150917566e-05, "loss": 0.9042, "step": 2796 }, { "epoch": 0.04959739782058611, "grad_norm": 3.046875, "learning_rate": 4.973113881477151e-05, "loss": 0.885, "step": 2798 }, { "epoch": 0.04963284985619768, "grad_norm": 2.765625, "learning_rate": 4.973073016998598e-05, "loss": 0.8983, "step": 2800 }, { "epoch": 0.04966830189180925, "grad_norm": 2.609375, "learning_rate": 4.973032121656609e-05, "loss": 0.8418, "step": 2802 }, { "epoch": 0.04970375392742082, "grad_norm": 2.78125, "learning_rate": 4.9729911954516946e-05, "loss": 0.8868, "step": 2804 }, { "epoch": 0.04973920596303239, "grad_norm": 2.78125, "learning_rate": 4.9729502383843643e-05, "loss": 0.838, "step": 2806 }, { "epoch": 0.04977465799864396, "grad_norm": 3.015625, "learning_rate": 4.972909250455131e-05, "loss": 0.8609, "step": 2808 }, { "epoch": 0.04981011003425553, "grad_norm": 3.03125, "learning_rate": 4.972868231664505e-05, "loss": 0.8558, "step": 2810 }, { "epoch": 0.0498455620698671, "grad_norm": 3.015625, "learning_rate": 4.972827182012999e-05, "loss": 0.858, "step": 2812 }, { "epoch": 0.04988101410547867, "grad_norm": 2.875, "learning_rate": 4.972786101501125e-05, "loss": 0.8594, "step": 2814 }, { "epoch": 0.049916466141090236, "grad_norm": 2.9375, "learning_rate": 4.972744990129394e-05, "loss": 0.9087, "step": 2816 }, { "epoch": 0.04995191817670181, "grad_norm": 2.75, "learning_rate": 4.972703847898321e-05, "loss": 0.8705, "step": 2818 }, { "epoch": 0.04998737021231338, "grad_norm": 2.8125, "learning_rate": 4.97266267480842e-05, "loss": 0.8763, "step": 2820 }, { "epoch": 0.050022822247924946, "grad_norm": 2.828125, "learning_rate": 4.972621470860204e-05, "loss": 0.8277, "step": 2822 }, { "epoch": 0.05005827428353652, "grad_norm": 2.75, "learning_rate": 4.9725802360541854e-05, "loss": 0.8925, "step": 2824 }, { "epoch": 0.05009372631914809, "grad_norm": 2.765625, "learning_rate": 4.972538970390882e-05, "loss": 0.8644, "step": 2826 }, { "epoch": 0.050129178354759656, "grad_norm": 3.375, "learning_rate": 4.972497673870806e-05, "loss": 0.895, "step": 2828 }, { "epoch": 0.05016463039037123, "grad_norm": 2.71875, "learning_rate": 4.9724563464944754e-05, "loss": 0.8475, "step": 2830 }, { "epoch": 0.0502000824259828, "grad_norm": 2.875, "learning_rate": 4.972414988262404e-05, "loss": 0.8468, "step": 2832 }, { "epoch": 0.050235534461594365, "grad_norm": 2.78125, "learning_rate": 4.9723735991751076e-05, "loss": 0.9013, "step": 2834 }, { "epoch": 0.05027098649720594, "grad_norm": 2.953125, "learning_rate": 4.972332179233105e-05, "loss": 0.9036, "step": 2836 }, { "epoch": 0.05030643853281751, "grad_norm": 3.03125, "learning_rate": 4.972290728436911e-05, "loss": 0.8596, "step": 2838 }, { "epoch": 0.050341890568429075, "grad_norm": 2.875, "learning_rate": 4.9722492467870434e-05, "loss": 0.8977, "step": 2840 }, { "epoch": 0.05037734260404064, "grad_norm": 2.78125, "learning_rate": 4.9722077342840204e-05, "loss": 0.8696, "step": 2842 }, { "epoch": 0.05041279463965222, "grad_norm": 2.984375, "learning_rate": 4.97216619092836e-05, "loss": 0.8877, "step": 2844 }, { "epoch": 0.050448246675263785, "grad_norm": 3.1875, "learning_rate": 4.97212461672058e-05, "loss": 0.8741, "step": 2846 }, { "epoch": 0.05048369871087535, "grad_norm": 2.921875, "learning_rate": 4.9720830116612004e-05, "loss": 0.877, "step": 2848 }, { "epoch": 0.05051915074648693, "grad_norm": 2.875, "learning_rate": 4.97204137575074e-05, "loss": 0.8109, "step": 2850 }, { "epoch": 0.050554602782098494, "grad_norm": 2.90625, "learning_rate": 4.9719997089897176e-05, "loss": 0.8455, "step": 2852 }, { "epoch": 0.05059005481771006, "grad_norm": 2.734375, "learning_rate": 4.971958011378653e-05, "loss": 0.8942, "step": 2854 }, { "epoch": 0.050625506853321636, "grad_norm": 3.109375, "learning_rate": 4.9719162829180684e-05, "loss": 0.8563, "step": 2856 }, { "epoch": 0.050660958888933204, "grad_norm": 2.921875, "learning_rate": 4.971874523608483e-05, "loss": 0.8732, "step": 2858 }, { "epoch": 0.05069641092454477, "grad_norm": 2.859375, "learning_rate": 4.9718327334504175e-05, "loss": 0.8292, "step": 2860 }, { "epoch": 0.050731862960156346, "grad_norm": 2.734375, "learning_rate": 4.971790912444396e-05, "loss": 0.9058, "step": 2862 }, { "epoch": 0.05076731499576791, "grad_norm": 2.796875, "learning_rate": 4.971749060590938e-05, "loss": 0.883, "step": 2864 }, { "epoch": 0.05080276703137948, "grad_norm": 2.890625, "learning_rate": 4.9717071778905667e-05, "loss": 0.8377, "step": 2866 }, { "epoch": 0.050838219066991056, "grad_norm": 3.125, "learning_rate": 4.971665264343804e-05, "loss": 0.8734, "step": 2868 }, { "epoch": 0.05087367110260262, "grad_norm": 2.796875, "learning_rate": 4.971623319951174e-05, "loss": 0.8644, "step": 2870 }, { "epoch": 0.05090912313821419, "grad_norm": 2.390625, "learning_rate": 4.9715813447132e-05, "loss": 0.8551, "step": 2872 }, { "epoch": 0.050944575173825765, "grad_norm": 2.78125, "learning_rate": 4.9715393386304056e-05, "loss": 0.8708, "step": 2874 }, { "epoch": 0.05098002720943733, "grad_norm": 3.109375, "learning_rate": 4.971497301703315e-05, "loss": 0.8547, "step": 2876 }, { "epoch": 0.0510154792450489, "grad_norm": 3.328125, "learning_rate": 4.9714552339324525e-05, "loss": 0.8751, "step": 2878 }, { "epoch": 0.051050931280660475, "grad_norm": 2.84375, "learning_rate": 4.9714131353183435e-05, "loss": 0.8485, "step": 2880 }, { "epoch": 0.05108638331627204, "grad_norm": 3.140625, "learning_rate": 4.971371005861514e-05, "loss": 0.8416, "step": 2882 }, { "epoch": 0.05112183535188361, "grad_norm": 2.8125, "learning_rate": 4.971328845562488e-05, "loss": 0.9008, "step": 2884 }, { "epoch": 0.05115728738749518, "grad_norm": 2.46875, "learning_rate": 4.971286654421793e-05, "loss": 0.822, "step": 2886 }, { "epoch": 0.05119273942310675, "grad_norm": 2.953125, "learning_rate": 4.971244432439956e-05, "loss": 0.8887, "step": 2888 }, { "epoch": 0.05122819145871832, "grad_norm": 2.78125, "learning_rate": 4.971202179617502e-05, "loss": 0.8879, "step": 2890 }, { "epoch": 0.05126364349432989, "grad_norm": 2.84375, "learning_rate": 4.971159895954961e-05, "loss": 0.8847, "step": 2892 }, { "epoch": 0.05129909552994146, "grad_norm": 2.828125, "learning_rate": 4.971117581452859e-05, "loss": 0.872, "step": 2894 }, { "epoch": 0.05133454756555303, "grad_norm": 2.921875, "learning_rate": 4.971075236111724e-05, "loss": 0.861, "step": 2896 }, { "epoch": 0.0513699996011646, "grad_norm": 2.671875, "learning_rate": 4.9710328599320846e-05, "loss": 0.8889, "step": 2898 }, { "epoch": 0.05140545163677617, "grad_norm": 2.78125, "learning_rate": 4.970990452914469e-05, "loss": 0.8878, "step": 2900 }, { "epoch": 0.05144090367238774, "grad_norm": 2.609375, "learning_rate": 4.970948015059408e-05, "loss": 0.8651, "step": 2902 }, { "epoch": 0.051476355707999306, "grad_norm": 2.921875, "learning_rate": 4.9709055463674304e-05, "loss": 0.8522, "step": 2904 }, { "epoch": 0.05151180774361088, "grad_norm": 2.78125, "learning_rate": 4.970863046839066e-05, "loss": 0.8409, "step": 2906 }, { "epoch": 0.05154725977922245, "grad_norm": 3.0, "learning_rate": 4.970820516474846e-05, "loss": 0.8737, "step": 2908 }, { "epoch": 0.051582711814834016, "grad_norm": 3.09375, "learning_rate": 4.9707779552753e-05, "loss": 0.87, "step": 2910 }, { "epoch": 0.05161816385044559, "grad_norm": 2.78125, "learning_rate": 4.97073536324096e-05, "loss": 0.8184, "step": 2912 }, { "epoch": 0.05165361588605716, "grad_norm": 2.765625, "learning_rate": 4.9706927403723574e-05, "loss": 0.9204, "step": 2914 }, { "epoch": 0.051689067921668726, "grad_norm": 2.5625, "learning_rate": 4.970650086670024e-05, "loss": 0.8298, "step": 2916 }, { "epoch": 0.0517245199572803, "grad_norm": 3.015625, "learning_rate": 4.9706074021344916e-05, "loss": 0.8427, "step": 2918 }, { "epoch": 0.05175997199289187, "grad_norm": 2.78125, "learning_rate": 4.970564686766294e-05, "loss": 0.8631, "step": 2920 }, { "epoch": 0.051795424028503435, "grad_norm": 2.875, "learning_rate": 4.9705219405659635e-05, "loss": 0.8462, "step": 2922 }, { "epoch": 0.05183087606411501, "grad_norm": 2.890625, "learning_rate": 4.970479163534034e-05, "loss": 0.8948, "step": 2924 }, { "epoch": 0.05186632809972658, "grad_norm": 2.546875, "learning_rate": 4.970436355671039e-05, "loss": 0.8498, "step": 2926 }, { "epoch": 0.051901780135338145, "grad_norm": 2.765625, "learning_rate": 4.970393516977513e-05, "loss": 0.885, "step": 2928 }, { "epoch": 0.05193723217094971, "grad_norm": 2.59375, "learning_rate": 4.9703506474539894e-05, "loss": 0.8458, "step": 2930 }, { "epoch": 0.05197268420656129, "grad_norm": 3.015625, "learning_rate": 4.970307747101005e-05, "loss": 0.8869, "step": 2932 }, { "epoch": 0.052008136242172855, "grad_norm": 2.625, "learning_rate": 4.9702648159190944e-05, "loss": 0.8571, "step": 2934 }, { "epoch": 0.05204358827778442, "grad_norm": 2.9375, "learning_rate": 4.970221853908794e-05, "loss": 0.8827, "step": 2936 }, { "epoch": 0.052079040313396, "grad_norm": 2.765625, "learning_rate": 4.9701788610706384e-05, "loss": 0.8422, "step": 2938 }, { "epoch": 0.052114492349007564, "grad_norm": 2.6875, "learning_rate": 4.970135837405166e-05, "loss": 0.8821, "step": 2940 }, { "epoch": 0.05214994438461913, "grad_norm": 2.890625, "learning_rate": 4.970092782912912e-05, "loss": 0.8914, "step": 2942 }, { "epoch": 0.052185396420230706, "grad_norm": 2.734375, "learning_rate": 4.970049697594415e-05, "loss": 0.8454, "step": 2944 }, { "epoch": 0.052220848455842274, "grad_norm": 2.796875, "learning_rate": 4.9700065814502125e-05, "loss": 0.8119, "step": 2946 }, { "epoch": 0.05225630049145384, "grad_norm": 2.796875, "learning_rate": 4.9699634344808425e-05, "loss": 0.904, "step": 2948 }, { "epoch": 0.052291752527065416, "grad_norm": 2.921875, "learning_rate": 4.969920256686842e-05, "loss": 0.8324, "step": 2950 }, { "epoch": 0.052327204562676984, "grad_norm": 2.734375, "learning_rate": 4.9698770480687515e-05, "loss": 0.8968, "step": 2952 }, { "epoch": 0.05236265659828855, "grad_norm": 3.109375, "learning_rate": 4.9698338086271114e-05, "loss": 0.8728, "step": 2954 }, { "epoch": 0.052398108633900126, "grad_norm": 2.90625, "learning_rate": 4.969790538362458e-05, "loss": 0.8537, "step": 2956 }, { "epoch": 0.05243356066951169, "grad_norm": 2.8125, "learning_rate": 4.969747237275334e-05, "loss": 0.8752, "step": 2958 }, { "epoch": 0.05246901270512326, "grad_norm": 3.015625, "learning_rate": 4.9697039053662785e-05, "loss": 0.8249, "step": 2960 }, { "epoch": 0.052504464740734835, "grad_norm": 2.421875, "learning_rate": 4.969660542635833e-05, "loss": 0.8757, "step": 2962 }, { "epoch": 0.0525399167763464, "grad_norm": 2.96875, "learning_rate": 4.969617149084538e-05, "loss": 0.8594, "step": 2964 }, { "epoch": 0.05257536881195797, "grad_norm": 2.65625, "learning_rate": 4.969573724712936e-05, "loss": 0.8425, "step": 2966 }, { "epoch": 0.05261082084756954, "grad_norm": 2.8125, "learning_rate": 4.969530269521568e-05, "loss": 0.8623, "step": 2968 }, { "epoch": 0.05264627288318111, "grad_norm": 2.953125, "learning_rate": 4.969486783510976e-05, "loss": 0.8533, "step": 2970 }, { "epoch": 0.05268172491879268, "grad_norm": 2.9375, "learning_rate": 4.9694432666817036e-05, "loss": 0.8297, "step": 2972 }, { "epoch": 0.05271717695440425, "grad_norm": 2.296875, "learning_rate": 4.969399719034295e-05, "loss": 0.8356, "step": 2974 }, { "epoch": 0.05275262899001582, "grad_norm": 3.3125, "learning_rate": 4.96935614056929e-05, "loss": 0.8644, "step": 2976 }, { "epoch": 0.05278808102562739, "grad_norm": 3.375, "learning_rate": 4.9693125312872356e-05, "loss": 0.8769, "step": 2978 }, { "epoch": 0.05282353306123896, "grad_norm": 3.28125, "learning_rate": 4.969268891188676e-05, "loss": 0.8532, "step": 2980 }, { "epoch": 0.05285898509685053, "grad_norm": 3.109375, "learning_rate": 4.969225220274154e-05, "loss": 0.8512, "step": 2982 }, { "epoch": 0.0528944371324621, "grad_norm": 3.1875, "learning_rate": 4.9691815185442155e-05, "loss": 0.869, "step": 2984 }, { "epoch": 0.05292988916807367, "grad_norm": 2.65625, "learning_rate": 4.9691377859994056e-05, "loss": 0.8594, "step": 2986 }, { "epoch": 0.05296534120368524, "grad_norm": 2.765625, "learning_rate": 4.9690940226402716e-05, "loss": 0.8524, "step": 2988 }, { "epoch": 0.05300079323929681, "grad_norm": 3.09375, "learning_rate": 4.969050228467358e-05, "loss": 0.8984, "step": 2990 }, { "epoch": 0.05303624527490838, "grad_norm": 3.03125, "learning_rate": 4.9690064034812114e-05, "loss": 0.8926, "step": 2992 }, { "epoch": 0.05307169731051995, "grad_norm": 2.71875, "learning_rate": 4.9689625476823795e-05, "loss": 0.8046, "step": 2994 }, { "epoch": 0.05310714934613152, "grad_norm": 2.796875, "learning_rate": 4.968918661071409e-05, "loss": 0.8651, "step": 2996 }, { "epoch": 0.053142601381743086, "grad_norm": 3.21875, "learning_rate": 4.968874743648848e-05, "loss": 0.913, "step": 2998 }, { "epoch": 0.05317805341735466, "grad_norm": 2.78125, "learning_rate": 4.968830795415245e-05, "loss": 0.8606, "step": 3000 }, { "epoch": 0.05321350545296623, "grad_norm": 3.40625, "learning_rate": 4.9687868163711474e-05, "loss": 0.8876, "step": 3002 }, { "epoch": 0.053248957488577796, "grad_norm": 2.828125, "learning_rate": 4.968742806517104e-05, "loss": 0.8648, "step": 3004 }, { "epoch": 0.05328440952418937, "grad_norm": 2.9375, "learning_rate": 4.9686987658536646e-05, "loss": 0.8341, "step": 3006 }, { "epoch": 0.05331986155980094, "grad_norm": 2.921875, "learning_rate": 4.968654694381379e-05, "loss": 0.8511, "step": 3008 }, { "epoch": 0.053355313595412505, "grad_norm": 3.171875, "learning_rate": 4.968610592100797e-05, "loss": 0.8549, "step": 3010 }, { "epoch": 0.05339076563102407, "grad_norm": 2.984375, "learning_rate": 4.96856645901247e-05, "loss": 0.8929, "step": 3012 }, { "epoch": 0.05342621766663565, "grad_norm": 2.71875, "learning_rate": 4.968522295116947e-05, "loss": 0.8663, "step": 3014 }, { "epoch": 0.053461669702247215, "grad_norm": 2.609375, "learning_rate": 4.9684781004147795e-05, "loss": 0.8973, "step": 3016 }, { "epoch": 0.05349712173785878, "grad_norm": 2.625, "learning_rate": 4.96843387490652e-05, "loss": 0.8569, "step": 3018 }, { "epoch": 0.05353257377347036, "grad_norm": 2.828125, "learning_rate": 4.96838961859272e-05, "loss": 0.8066, "step": 3020 }, { "epoch": 0.053568025809081925, "grad_norm": 2.796875, "learning_rate": 4.9683453314739314e-05, "loss": 0.9235, "step": 3022 }, { "epoch": 0.05360347784469349, "grad_norm": 2.890625, "learning_rate": 4.968301013550707e-05, "loss": 0.8878, "step": 3024 }, { "epoch": 0.05363892988030507, "grad_norm": 2.765625, "learning_rate": 4.9682566648236007e-05, "loss": 0.894, "step": 3026 }, { "epoch": 0.053674381915916634, "grad_norm": 2.53125, "learning_rate": 4.968212285293165e-05, "loss": 0.8596, "step": 3028 }, { "epoch": 0.0537098339515282, "grad_norm": 3.03125, "learning_rate": 4.9681678749599536e-05, "loss": 0.8688, "step": 3030 }, { "epoch": 0.053745285987139776, "grad_norm": 2.75, "learning_rate": 4.9681234338245214e-05, "loss": 0.872, "step": 3032 }, { "epoch": 0.053780738022751344, "grad_norm": 2.671875, "learning_rate": 4.968078961887423e-05, "loss": 0.859, "step": 3034 }, { "epoch": 0.05381619005836291, "grad_norm": 2.796875, "learning_rate": 4.968034459149213e-05, "loss": 0.8821, "step": 3036 }, { "epoch": 0.053851642093974486, "grad_norm": 2.78125, "learning_rate": 4.967989925610447e-05, "loss": 0.8507, "step": 3038 }, { "epoch": 0.053887094129586054, "grad_norm": 3.453125, "learning_rate": 4.9679453612716816e-05, "loss": 0.8604, "step": 3040 }, { "epoch": 0.05392254616519762, "grad_norm": 2.71875, "learning_rate": 4.967900766133471e-05, "loss": 0.8521, "step": 3042 }, { "epoch": 0.053957998200809196, "grad_norm": 2.984375, "learning_rate": 4.9678561401963736e-05, "loss": 0.9058, "step": 3044 }, { "epoch": 0.05399345023642076, "grad_norm": 2.671875, "learning_rate": 4.967811483460946e-05, "loss": 0.8202, "step": 3046 }, { "epoch": 0.05402890227203233, "grad_norm": 3.09375, "learning_rate": 4.967766795927744e-05, "loss": 0.8616, "step": 3048 }, { "epoch": 0.054064354307643905, "grad_norm": 2.78125, "learning_rate": 4.967722077597327e-05, "loss": 0.8462, "step": 3050 }, { "epoch": 0.05409980634325547, "grad_norm": 2.6875, "learning_rate": 4.967677328470252e-05, "loss": 0.8751, "step": 3052 }, { "epoch": 0.05413525837886704, "grad_norm": 3.03125, "learning_rate": 4.967632548547078e-05, "loss": 0.9041, "step": 3054 }, { "epoch": 0.05417071041447861, "grad_norm": 2.59375, "learning_rate": 4.967587737828364e-05, "loss": 0.8535, "step": 3056 }, { "epoch": 0.05420616245009018, "grad_norm": 3.078125, "learning_rate": 4.967542896314669e-05, "loss": 0.823, "step": 3058 }, { "epoch": 0.05424161448570175, "grad_norm": 2.5625, "learning_rate": 4.967498024006553e-05, "loss": 0.847, "step": 3060 }, { "epoch": 0.05427706652131332, "grad_norm": 2.859375, "learning_rate": 4.967453120904575e-05, "loss": 0.8793, "step": 3062 }, { "epoch": 0.05431251855692489, "grad_norm": 3.03125, "learning_rate": 4.967408187009296e-05, "loss": 0.87, "step": 3064 }, { "epoch": 0.05434797059253646, "grad_norm": 2.6875, "learning_rate": 4.967363222321277e-05, "loss": 0.8522, "step": 3066 }, { "epoch": 0.05438342262814803, "grad_norm": 2.671875, "learning_rate": 4.967318226841079e-05, "loss": 0.8765, "step": 3068 }, { "epoch": 0.0544188746637596, "grad_norm": 3.0625, "learning_rate": 4.967273200569263e-05, "loss": 0.8881, "step": 3070 }, { "epoch": 0.05445432669937117, "grad_norm": 2.734375, "learning_rate": 4.9672281435063915e-05, "loss": 0.8436, "step": 3072 }, { "epoch": 0.05448977873498274, "grad_norm": 2.8125, "learning_rate": 4.967183055653027e-05, "loss": 0.8846, "step": 3074 }, { "epoch": 0.05452523077059431, "grad_norm": 2.75, "learning_rate": 4.9671379370097314e-05, "loss": 0.8671, "step": 3076 }, { "epoch": 0.05456068280620588, "grad_norm": 3.25, "learning_rate": 4.967092787577068e-05, "loss": 0.8623, "step": 3078 }, { "epoch": 0.05459613484181745, "grad_norm": 3.171875, "learning_rate": 4.9670476073556015e-05, "loss": 0.8926, "step": 3080 }, { "epoch": 0.05463158687742902, "grad_norm": 2.9375, "learning_rate": 4.967002396345894e-05, "loss": 0.8009, "step": 3082 }, { "epoch": 0.05466703891304059, "grad_norm": 3.0625, "learning_rate": 4.966957154548511e-05, "loss": 0.8722, "step": 3084 }, { "epoch": 0.054702490948652156, "grad_norm": 2.9375, "learning_rate": 4.966911881964016e-05, "loss": 0.8625, "step": 3086 }, { "epoch": 0.05473794298426373, "grad_norm": 2.734375, "learning_rate": 4.9668665785929744e-05, "loss": 0.8557, "step": 3088 }, { "epoch": 0.0547733950198753, "grad_norm": 2.71875, "learning_rate": 4.966821244435952e-05, "loss": 0.8461, "step": 3090 }, { "epoch": 0.054808847055486866, "grad_norm": 2.640625, "learning_rate": 4.966775879493514e-05, "loss": 0.8348, "step": 3092 }, { "epoch": 0.05484429909109844, "grad_norm": 2.796875, "learning_rate": 4.9667304837662265e-05, "loss": 0.8429, "step": 3094 }, { "epoch": 0.05487975112671001, "grad_norm": 2.8125, "learning_rate": 4.9666850572546575e-05, "loss": 0.8614, "step": 3096 }, { "epoch": 0.054915203162321576, "grad_norm": 3.109375, "learning_rate": 4.966639599959372e-05, "loss": 0.9079, "step": 3098 }, { "epoch": 0.05495065519793314, "grad_norm": 2.640625, "learning_rate": 4.9665941118809375e-05, "loss": 0.8368, "step": 3100 }, { "epoch": 0.05498610723354472, "grad_norm": 2.859375, "learning_rate": 4.966548593019923e-05, "loss": 0.8453, "step": 3102 }, { "epoch": 0.055021559269156285, "grad_norm": 2.84375, "learning_rate": 4.966503043376896e-05, "loss": 0.9079, "step": 3104 }, { "epoch": 0.05505701130476785, "grad_norm": 2.515625, "learning_rate": 4.966457462952424e-05, "loss": 0.8182, "step": 3106 }, { "epoch": 0.05509246334037943, "grad_norm": 2.71875, "learning_rate": 4.966411851747078e-05, "loss": 0.8326, "step": 3108 }, { "epoch": 0.055127915375990995, "grad_norm": 2.78125, "learning_rate": 4.9663662097614245e-05, "loss": 0.8604, "step": 3110 }, { "epoch": 0.05516336741160256, "grad_norm": 2.703125, "learning_rate": 4.9663205369960345e-05, "loss": 0.8305, "step": 3112 }, { "epoch": 0.05519881944721414, "grad_norm": 2.4375, "learning_rate": 4.9662748334514784e-05, "loss": 0.8346, "step": 3114 }, { "epoch": 0.055234271482825704, "grad_norm": 2.78125, "learning_rate": 4.9662290991283254e-05, "loss": 0.914, "step": 3116 }, { "epoch": 0.05526972351843727, "grad_norm": 2.828125, "learning_rate": 4.966183334027148e-05, "loss": 0.8923, "step": 3118 }, { "epoch": 0.05530517555404885, "grad_norm": 3.125, "learning_rate": 4.966137538148515e-05, "loss": 0.878, "step": 3120 }, { "epoch": 0.055340627589660414, "grad_norm": 2.609375, "learning_rate": 4.966091711493e-05, "loss": 0.8524, "step": 3122 }, { "epoch": 0.05537607962527198, "grad_norm": 2.5, "learning_rate": 4.966045854061174e-05, "loss": 0.8129, "step": 3124 }, { "epoch": 0.055411531660883556, "grad_norm": 2.859375, "learning_rate": 4.965999965853609e-05, "loss": 0.8476, "step": 3126 }, { "epoch": 0.055446983696495124, "grad_norm": 3.09375, "learning_rate": 4.965954046870879e-05, "loss": 0.8393, "step": 3128 }, { "epoch": 0.05548243573210669, "grad_norm": 3.15625, "learning_rate": 4.965908097113555e-05, "loss": 0.8137, "step": 3130 }, { "epoch": 0.055517887767718266, "grad_norm": 2.859375, "learning_rate": 4.965862116582212e-05, "loss": 0.8436, "step": 3132 }, { "epoch": 0.05555333980332983, "grad_norm": 2.703125, "learning_rate": 4.965816105277423e-05, "loss": 0.8907, "step": 3134 }, { "epoch": 0.0555887918389414, "grad_norm": 3.015625, "learning_rate": 4.965770063199763e-05, "loss": 0.8394, "step": 3136 }, { "epoch": 0.05562424387455297, "grad_norm": 2.765625, "learning_rate": 4.9657239903498064e-05, "loss": 0.848, "step": 3138 }, { "epoch": 0.05565969591016454, "grad_norm": 2.984375, "learning_rate": 4.965677886728128e-05, "loss": 0.8919, "step": 3140 }, { "epoch": 0.05569514794577611, "grad_norm": 3.046875, "learning_rate": 4.965631752335302e-05, "loss": 0.8882, "step": 3142 }, { "epoch": 0.05573059998138768, "grad_norm": 2.765625, "learning_rate": 4.965585587171907e-05, "loss": 0.8359, "step": 3144 }, { "epoch": 0.05576605201699925, "grad_norm": 2.625, "learning_rate": 4.9655393912385164e-05, "loss": 0.8831, "step": 3146 }, { "epoch": 0.05580150405261082, "grad_norm": 2.734375, "learning_rate": 4.9654931645357075e-05, "loss": 0.8672, "step": 3148 }, { "epoch": 0.05583695608822239, "grad_norm": 2.65625, "learning_rate": 4.965446907064059e-05, "loss": 0.8515, "step": 3150 }, { "epoch": 0.05587240812383396, "grad_norm": 3.203125, "learning_rate": 4.965400618824145e-05, "loss": 0.9172, "step": 3152 }, { "epoch": 0.05590786015944553, "grad_norm": 2.703125, "learning_rate": 4.965354299816545e-05, "loss": 0.8372, "step": 3154 }, { "epoch": 0.0559433121950571, "grad_norm": 2.625, "learning_rate": 4.965307950041837e-05, "loss": 0.8744, "step": 3156 }, { "epoch": 0.05597876423066867, "grad_norm": 3.03125, "learning_rate": 4.9652615695006e-05, "loss": 0.8757, "step": 3158 }, { "epoch": 0.05601421626628024, "grad_norm": 3.25, "learning_rate": 4.965215158193411e-05, "loss": 0.8774, "step": 3160 }, { "epoch": 0.05604966830189181, "grad_norm": 2.9375, "learning_rate": 4.9651687161208505e-05, "loss": 0.8498, "step": 3162 }, { "epoch": 0.05608512033750338, "grad_norm": 3.46875, "learning_rate": 4.9651222432834985e-05, "loss": 0.9136, "step": 3164 }, { "epoch": 0.05612057237311495, "grad_norm": 3.09375, "learning_rate": 4.9650757396819335e-05, "loss": 0.8381, "step": 3166 }, { "epoch": 0.05615602440872652, "grad_norm": 3.0, "learning_rate": 4.965029205316737e-05, "loss": 0.8501, "step": 3168 }, { "epoch": 0.05619147644433809, "grad_norm": 2.953125, "learning_rate": 4.9649826401884904e-05, "loss": 0.8755, "step": 3170 }, { "epoch": 0.05622692847994966, "grad_norm": 3.03125, "learning_rate": 4.964936044297773e-05, "loss": 0.9018, "step": 3172 }, { "epoch": 0.056262380515561226, "grad_norm": 2.953125, "learning_rate": 4.964889417645166e-05, "loss": 0.8797, "step": 3174 }, { "epoch": 0.0562978325511728, "grad_norm": 2.6875, "learning_rate": 4.964842760231254e-05, "loss": 0.8833, "step": 3176 }, { "epoch": 0.05633328458678437, "grad_norm": 2.9375, "learning_rate": 4.964796072056618e-05, "loss": 0.8543, "step": 3178 }, { "epoch": 0.056368736622395936, "grad_norm": 2.796875, "learning_rate": 4.96474935312184e-05, "loss": 0.9254, "step": 3180 }, { "epoch": 0.056404188658007504, "grad_norm": 3.234375, "learning_rate": 4.964702603427504e-05, "loss": 0.8885, "step": 3182 }, { "epoch": 0.05643964069361908, "grad_norm": 3.0, "learning_rate": 4.964655822974191e-05, "loss": 0.8349, "step": 3184 }, { "epoch": 0.056475092729230646, "grad_norm": 2.703125, "learning_rate": 4.964609011762488e-05, "loss": 0.8539, "step": 3186 }, { "epoch": 0.05651054476484221, "grad_norm": 2.6875, "learning_rate": 4.964562169792978e-05, "loss": 0.8605, "step": 3188 }, { "epoch": 0.05654599680045379, "grad_norm": 2.703125, "learning_rate": 4.964515297066245e-05, "loss": 0.8885, "step": 3190 }, { "epoch": 0.056581448836065355, "grad_norm": 2.34375, "learning_rate": 4.964468393582875e-05, "loss": 0.812, "step": 3192 }, { "epoch": 0.05661690087167692, "grad_norm": 2.5, "learning_rate": 4.964421459343452e-05, "loss": 0.8265, "step": 3194 }, { "epoch": 0.0566523529072885, "grad_norm": 2.5625, "learning_rate": 4.9643744943485626e-05, "loss": 0.866, "step": 3196 }, { "epoch": 0.056687804942900065, "grad_norm": 2.6875, "learning_rate": 4.964327498598793e-05, "loss": 0.8846, "step": 3198 }, { "epoch": 0.05672325697851163, "grad_norm": 2.734375, "learning_rate": 4.96428047209473e-05, "loss": 0.8741, "step": 3200 }, { "epoch": 0.05675870901412321, "grad_norm": 3.03125, "learning_rate": 4.9642334148369595e-05, "loss": 0.8554, "step": 3202 }, { "epoch": 0.056794161049734775, "grad_norm": 3.078125, "learning_rate": 4.964186326826069e-05, "loss": 0.8448, "step": 3204 }, { "epoch": 0.05682961308534634, "grad_norm": 2.6875, "learning_rate": 4.964139208062647e-05, "loss": 0.8586, "step": 3206 }, { "epoch": 0.05686506512095792, "grad_norm": 2.96875, "learning_rate": 4.964092058547281e-05, "loss": 0.9298, "step": 3208 }, { "epoch": 0.056900517156569484, "grad_norm": 2.90625, "learning_rate": 4.964044878280558e-05, "loss": 0.8457, "step": 3210 }, { "epoch": 0.05693596919218105, "grad_norm": 2.703125, "learning_rate": 4.963997667263069e-05, "loss": 0.9313, "step": 3212 }, { "epoch": 0.056971421227792626, "grad_norm": 2.765625, "learning_rate": 4.9639504254954026e-05, "loss": 0.8426, "step": 3214 }, { "epoch": 0.057006873263404194, "grad_norm": 2.984375, "learning_rate": 4.963903152978148e-05, "loss": 0.8391, "step": 3216 }, { "epoch": 0.05704232529901576, "grad_norm": 2.65625, "learning_rate": 4.9638558497118956e-05, "loss": 0.8089, "step": 3218 }, { "epoch": 0.057077777334627336, "grad_norm": 2.9375, "learning_rate": 4.963808515697235e-05, "loss": 0.8812, "step": 3220 }, { "epoch": 0.057113229370238904, "grad_norm": 2.5625, "learning_rate": 4.963761150934757e-05, "loss": 0.831, "step": 3222 }, { "epoch": 0.05714868140585047, "grad_norm": 2.953125, "learning_rate": 4.9637137554250535e-05, "loss": 0.9013, "step": 3224 }, { "epoch": 0.05718413344146204, "grad_norm": 2.984375, "learning_rate": 4.963666329168715e-05, "loss": 0.8752, "step": 3226 }, { "epoch": 0.05721958547707361, "grad_norm": 2.921875, "learning_rate": 4.963618872166334e-05, "loss": 0.882, "step": 3228 }, { "epoch": 0.05725503751268518, "grad_norm": 3.0625, "learning_rate": 4.9635713844185025e-05, "loss": 0.9392, "step": 3230 }, { "epoch": 0.05729048954829675, "grad_norm": 2.515625, "learning_rate": 4.9635238659258136e-05, "loss": 0.7893, "step": 3232 }, { "epoch": 0.05732594158390832, "grad_norm": 2.796875, "learning_rate": 4.96347631668886e-05, "loss": 0.8776, "step": 3234 }, { "epoch": 0.05736139361951989, "grad_norm": 2.671875, "learning_rate": 4.9634287367082346e-05, "loss": 0.8508, "step": 3236 }, { "epoch": 0.05739684565513146, "grad_norm": 2.515625, "learning_rate": 4.963381125984532e-05, "loss": 0.8219, "step": 3238 }, { "epoch": 0.05743229769074303, "grad_norm": 2.859375, "learning_rate": 4.963333484518346e-05, "loss": 0.8711, "step": 3240 }, { "epoch": 0.0574677497263546, "grad_norm": 2.953125, "learning_rate": 4.963285812310271e-05, "loss": 0.892, "step": 3242 }, { "epoch": 0.05750320176196617, "grad_norm": 2.921875, "learning_rate": 4.9632381093609024e-05, "loss": 0.8696, "step": 3244 }, { "epoch": 0.05753865379757774, "grad_norm": 2.921875, "learning_rate": 4.963190375670835e-05, "loss": 0.8536, "step": 3246 }, { "epoch": 0.05757410583318931, "grad_norm": 2.921875, "learning_rate": 4.963142611240665e-05, "loss": 0.883, "step": 3248 }, { "epoch": 0.05760955786880088, "grad_norm": 2.859375, "learning_rate": 4.963094816070988e-05, "loss": 0.9322, "step": 3250 }, { "epoch": 0.05764500990441245, "grad_norm": 2.75, "learning_rate": 4.963046990162401e-05, "loss": 0.8714, "step": 3252 }, { "epoch": 0.05768046194002402, "grad_norm": 2.8125, "learning_rate": 4.9629991335155e-05, "loss": 0.8551, "step": 3254 }, { "epoch": 0.05771591397563559, "grad_norm": 2.703125, "learning_rate": 4.962951246130884e-05, "loss": 0.9051, "step": 3256 }, { "epoch": 0.05775136601124716, "grad_norm": 2.71875, "learning_rate": 4.962903328009149e-05, "loss": 0.8693, "step": 3258 }, { "epoch": 0.05778681804685873, "grad_norm": 2.609375, "learning_rate": 4.962855379150893e-05, "loss": 0.8398, "step": 3260 }, { "epoch": 0.057822270082470296, "grad_norm": 2.90625, "learning_rate": 4.962807399556715e-05, "loss": 0.8445, "step": 3262 }, { "epoch": 0.05785772211808187, "grad_norm": 2.671875, "learning_rate": 4.962759389227213e-05, "loss": 0.8498, "step": 3264 }, { "epoch": 0.05789317415369344, "grad_norm": 3.0, "learning_rate": 4.962711348162987e-05, "loss": 0.8548, "step": 3266 }, { "epoch": 0.057928626189305006, "grad_norm": 2.671875, "learning_rate": 4.962663276364637e-05, "loss": 0.8758, "step": 3268 }, { "epoch": 0.057964078224916574, "grad_norm": 2.78125, "learning_rate": 4.962615173832762e-05, "loss": 0.8511, "step": 3270 }, { "epoch": 0.05799953026052815, "grad_norm": 2.84375, "learning_rate": 4.9625670405679626e-05, "loss": 0.8197, "step": 3272 }, { "epoch": 0.058034982296139716, "grad_norm": 2.8125, "learning_rate": 4.96251887657084e-05, "loss": 0.8447, "step": 3274 }, { "epoch": 0.05807043433175128, "grad_norm": 2.96875, "learning_rate": 4.962470681841993e-05, "loss": 0.8452, "step": 3276 }, { "epoch": 0.05810588636736286, "grad_norm": 2.890625, "learning_rate": 4.962422456382026e-05, "loss": 0.8332, "step": 3278 }, { "epoch": 0.058141338402974425, "grad_norm": 2.859375, "learning_rate": 4.96237420019154e-05, "loss": 0.8803, "step": 3280 }, { "epoch": 0.05817679043858599, "grad_norm": 2.8125, "learning_rate": 4.9623259132711365e-05, "loss": 0.7956, "step": 3282 }, { "epoch": 0.05821224247419757, "grad_norm": 2.953125, "learning_rate": 4.9622775956214187e-05, "loss": 0.8399, "step": 3284 }, { "epoch": 0.058247694509809135, "grad_norm": 2.75, "learning_rate": 4.962229247242989e-05, "loss": 0.8275, "step": 3286 }, { "epoch": 0.0582831465454207, "grad_norm": 3.265625, "learning_rate": 4.9621808681364506e-05, "loss": 0.8737, "step": 3288 }, { "epoch": 0.05831859858103228, "grad_norm": 2.859375, "learning_rate": 4.9621324583024085e-05, "loss": 0.8652, "step": 3290 }, { "epoch": 0.058354050616643845, "grad_norm": 2.65625, "learning_rate": 4.962084017741466e-05, "loss": 0.8329, "step": 3292 }, { "epoch": 0.05838950265225541, "grad_norm": 2.859375, "learning_rate": 4.962035546454228e-05, "loss": 0.8641, "step": 3294 }, { "epoch": 0.05842495468786699, "grad_norm": 2.78125, "learning_rate": 4.961987044441299e-05, "loss": 0.8403, "step": 3296 }, { "epoch": 0.058460406723478554, "grad_norm": 2.796875, "learning_rate": 4.961938511703284e-05, "loss": 0.854, "step": 3298 }, { "epoch": 0.05849585875909012, "grad_norm": 2.984375, "learning_rate": 4.96188994824079e-05, "loss": 0.8576, "step": 3300 }, { "epoch": 0.058531310794701696, "grad_norm": 2.890625, "learning_rate": 4.961841354054422e-05, "loss": 0.8808, "step": 3302 }, { "epoch": 0.058566762830313264, "grad_norm": 2.671875, "learning_rate": 4.961792729144786e-05, "loss": 0.8476, "step": 3304 }, { "epoch": 0.05860221486592483, "grad_norm": 2.890625, "learning_rate": 4.96174407351249e-05, "loss": 0.8627, "step": 3306 }, { "epoch": 0.058637666901536406, "grad_norm": 2.65625, "learning_rate": 4.9616953871581406e-05, "loss": 0.8171, "step": 3308 }, { "epoch": 0.058673118937147974, "grad_norm": 2.875, "learning_rate": 4.9616466700823455e-05, "loss": 0.8769, "step": 3310 }, { "epoch": 0.05870857097275954, "grad_norm": 2.765625, "learning_rate": 4.961597922285712e-05, "loss": 0.8673, "step": 3312 }, { "epoch": 0.05874402300837111, "grad_norm": 3.4375, "learning_rate": 4.96154914376885e-05, "loss": 0.8796, "step": 3314 }, { "epoch": 0.05877947504398268, "grad_norm": 2.875, "learning_rate": 4.961500334532368e-05, "loss": 0.8608, "step": 3316 }, { "epoch": 0.05881492707959425, "grad_norm": 3.15625, "learning_rate": 4.9614514945768734e-05, "loss": 0.8851, "step": 3318 }, { "epoch": 0.05885037911520582, "grad_norm": 2.984375, "learning_rate": 4.961402623902978e-05, "loss": 0.8761, "step": 3320 }, { "epoch": 0.05888583115081739, "grad_norm": 2.90625, "learning_rate": 4.9613537225112893e-05, "loss": 0.8832, "step": 3322 }, { "epoch": 0.05892128318642896, "grad_norm": 3.15625, "learning_rate": 4.961304790402419e-05, "loss": 0.8012, "step": 3324 }, { "epoch": 0.05895673522204053, "grad_norm": 2.953125, "learning_rate": 4.961255827576978e-05, "loss": 0.8719, "step": 3326 }, { "epoch": 0.0589921872576521, "grad_norm": 2.71875, "learning_rate": 4.9612068340355766e-05, "loss": 0.9218, "step": 3328 }, { "epoch": 0.05902763929326367, "grad_norm": 2.96875, "learning_rate": 4.961157809778827e-05, "loss": 0.862, "step": 3330 }, { "epoch": 0.05906309132887524, "grad_norm": 2.6875, "learning_rate": 4.96110875480734e-05, "loss": 0.8544, "step": 3332 }, { "epoch": 0.05909854336448681, "grad_norm": 2.875, "learning_rate": 4.9610596691217284e-05, "loss": 0.8294, "step": 3334 }, { "epoch": 0.05913399540009838, "grad_norm": 2.71875, "learning_rate": 4.961010552722605e-05, "loss": 0.8704, "step": 3336 }, { "epoch": 0.05916944743570995, "grad_norm": 2.40625, "learning_rate": 4.960961405610582e-05, "loss": 0.7835, "step": 3338 }, { "epoch": 0.05920489947132152, "grad_norm": 2.9375, "learning_rate": 4.960912227786274e-05, "loss": 0.8676, "step": 3340 }, { "epoch": 0.05924035150693309, "grad_norm": 2.90625, "learning_rate": 4.9608630192502935e-05, "loss": 0.86, "step": 3342 }, { "epoch": 0.05927580354254466, "grad_norm": 2.921875, "learning_rate": 4.960813780003256e-05, "loss": 0.7854, "step": 3344 }, { "epoch": 0.05931125557815623, "grad_norm": 2.453125, "learning_rate": 4.960764510045774e-05, "loss": 0.8139, "step": 3346 }, { "epoch": 0.0593467076137678, "grad_norm": 2.765625, "learning_rate": 4.960715209378464e-05, "loss": 0.8455, "step": 3348 }, { "epoch": 0.05938215964937937, "grad_norm": 2.90625, "learning_rate": 4.96066587800194e-05, "loss": 0.9059, "step": 3350 }, { "epoch": 0.059417611684990934, "grad_norm": 2.640625, "learning_rate": 4.960616515916819e-05, "loss": 0.8427, "step": 3352 }, { "epoch": 0.05945306372060251, "grad_norm": 2.953125, "learning_rate": 4.960567123123716e-05, "loss": 0.8606, "step": 3354 }, { "epoch": 0.059488515756214076, "grad_norm": 3.015625, "learning_rate": 4.960517699623248e-05, "loss": 0.9067, "step": 3356 }, { "epoch": 0.059523967791825644, "grad_norm": 2.953125, "learning_rate": 4.960468245416032e-05, "loss": 0.873, "step": 3358 }, { "epoch": 0.05955941982743722, "grad_norm": 2.734375, "learning_rate": 4.9604187605026845e-05, "loss": 0.8933, "step": 3360 }, { "epoch": 0.059594871863048786, "grad_norm": 3.265625, "learning_rate": 4.960369244883823e-05, "loss": 0.8775, "step": 3362 }, { "epoch": 0.05963032389866035, "grad_norm": 2.828125, "learning_rate": 4.960319698560066e-05, "loss": 0.8348, "step": 3364 }, { "epoch": 0.05966577593427193, "grad_norm": 2.53125, "learning_rate": 4.960270121532031e-05, "loss": 0.8505, "step": 3366 }, { "epoch": 0.059701227969883496, "grad_norm": 2.84375, "learning_rate": 4.960220513800339e-05, "loss": 0.8609, "step": 3368 }, { "epoch": 0.05973668000549506, "grad_norm": 2.796875, "learning_rate": 4.960170875365606e-05, "loss": 0.8479, "step": 3370 }, { "epoch": 0.05977213204110664, "grad_norm": 2.765625, "learning_rate": 4.960121206228453e-05, "loss": 0.8407, "step": 3372 }, { "epoch": 0.059807584076718205, "grad_norm": 3.046875, "learning_rate": 4.9600715063895e-05, "loss": 0.8408, "step": 3374 }, { "epoch": 0.05984303611232977, "grad_norm": 2.765625, "learning_rate": 4.960021775849367e-05, "loss": 0.8333, "step": 3376 }, { "epoch": 0.05987848814794135, "grad_norm": 2.875, "learning_rate": 4.959972014608675e-05, "loss": 0.8421, "step": 3378 }, { "epoch": 0.059913940183552915, "grad_norm": 2.796875, "learning_rate": 4.959922222668044e-05, "loss": 0.8293, "step": 3380 }, { "epoch": 0.05994939221916448, "grad_norm": 2.6875, "learning_rate": 4.959872400028096e-05, "loss": 0.8708, "step": 3382 }, { "epoch": 0.05998484425477606, "grad_norm": 2.625, "learning_rate": 4.959822546689453e-05, "loss": 0.8583, "step": 3384 }, { "epoch": 0.060020296290387624, "grad_norm": 2.78125, "learning_rate": 4.959772662652737e-05, "loss": 0.8624, "step": 3386 }, { "epoch": 0.06005574832599919, "grad_norm": 2.96875, "learning_rate": 4.959722747918571e-05, "loss": 0.879, "step": 3388 }, { "epoch": 0.060091200361610767, "grad_norm": 2.75, "learning_rate": 4.9596728024875774e-05, "loss": 0.8485, "step": 3390 }, { "epoch": 0.060126652397222334, "grad_norm": 2.875, "learning_rate": 4.959622826360378e-05, "loss": 0.8543, "step": 3392 }, { "epoch": 0.0601621044328339, "grad_norm": 2.96875, "learning_rate": 4.9595728195375996e-05, "loss": 0.8681, "step": 3394 }, { "epoch": 0.06019755646844547, "grad_norm": 2.890625, "learning_rate": 4.959522782019864e-05, "loss": 0.8927, "step": 3396 }, { "epoch": 0.060233008504057044, "grad_norm": 2.59375, "learning_rate": 4.9594727138077967e-05, "loss": 0.8581, "step": 3398 }, { "epoch": 0.06026846053966861, "grad_norm": 2.625, "learning_rate": 4.9594226149020226e-05, "loss": 0.8652, "step": 3400 }, { "epoch": 0.06030391257528018, "grad_norm": 2.890625, "learning_rate": 4.959372485303165e-05, "loss": 0.8713, "step": 3402 }, { "epoch": 0.06033936461089175, "grad_norm": 2.96875, "learning_rate": 4.9593223250118524e-05, "loss": 0.8531, "step": 3404 }, { "epoch": 0.06037481664650332, "grad_norm": 3.203125, "learning_rate": 4.95927213402871e-05, "loss": 0.869, "step": 3406 }, { "epoch": 0.06041026868211489, "grad_norm": 2.734375, "learning_rate": 4.959221912354362e-05, "loss": 0.8642, "step": 3408 }, { "epoch": 0.06044572071772646, "grad_norm": 2.734375, "learning_rate": 4.959171659989438e-05, "loss": 0.8605, "step": 3410 }, { "epoch": 0.06048117275333803, "grad_norm": 2.703125, "learning_rate": 4.959121376934563e-05, "loss": 0.858, "step": 3412 }, { "epoch": 0.0605166247889496, "grad_norm": 2.75, "learning_rate": 4.959071063190366e-05, "loss": 0.8314, "step": 3414 }, { "epoch": 0.06055207682456117, "grad_norm": 2.84375, "learning_rate": 4.959020718757474e-05, "loss": 0.8443, "step": 3416 }, { "epoch": 0.06058752886017274, "grad_norm": 2.609375, "learning_rate": 4.9589703436365156e-05, "loss": 0.8542, "step": 3418 }, { "epoch": 0.06062298089578431, "grad_norm": 2.90625, "learning_rate": 4.9589199378281194e-05, "loss": 0.844, "step": 3420 }, { "epoch": 0.06065843293139588, "grad_norm": 2.828125, "learning_rate": 4.958869501332914e-05, "loss": 0.8895, "step": 3422 }, { "epoch": 0.06069388496700745, "grad_norm": 2.921875, "learning_rate": 4.958819034151531e-05, "loss": 0.8338, "step": 3424 }, { "epoch": 0.06072933700261902, "grad_norm": 2.609375, "learning_rate": 4.958768536284597e-05, "loss": 0.8165, "step": 3426 }, { "epoch": 0.06076478903823059, "grad_norm": 2.625, "learning_rate": 4.9587180077327444e-05, "loss": 0.7967, "step": 3428 }, { "epoch": 0.06080024107384216, "grad_norm": 2.671875, "learning_rate": 4.958667448496604e-05, "loss": 0.8665, "step": 3430 }, { "epoch": 0.06083569310945373, "grad_norm": 2.890625, "learning_rate": 4.958616858576804e-05, "loss": 0.8432, "step": 3432 }, { "epoch": 0.0608711451450653, "grad_norm": 2.734375, "learning_rate": 4.9585662379739796e-05, "loss": 0.8507, "step": 3434 }, { "epoch": 0.06090659718067687, "grad_norm": 2.921875, "learning_rate": 4.95851558668876e-05, "loss": 0.8548, "step": 3436 }, { "epoch": 0.06094204921628844, "grad_norm": 2.78125, "learning_rate": 4.958464904721778e-05, "loss": 0.8712, "step": 3438 }, { "epoch": 0.060977501251900004, "grad_norm": 2.9375, "learning_rate": 4.9584141920736656e-05, "loss": 0.8419, "step": 3440 }, { "epoch": 0.06101295328751158, "grad_norm": 2.765625, "learning_rate": 4.9583634487450565e-05, "loss": 0.8738, "step": 3442 }, { "epoch": 0.061048405323123146, "grad_norm": 2.6875, "learning_rate": 4.9583126747365834e-05, "loss": 0.8648, "step": 3444 }, { "epoch": 0.061083857358734714, "grad_norm": 2.6875, "learning_rate": 4.9582618700488805e-05, "loss": 0.8645, "step": 3446 }, { "epoch": 0.06111930939434629, "grad_norm": 2.8125, "learning_rate": 4.9582110346825814e-05, "loss": 0.8694, "step": 3448 }, { "epoch": 0.061154761429957856, "grad_norm": 2.890625, "learning_rate": 4.95816016863832e-05, "loss": 0.8641, "step": 3450 }, { "epoch": 0.061190213465569424, "grad_norm": 3.171875, "learning_rate": 4.9581092719167324e-05, "loss": 0.8866, "step": 3452 }, { "epoch": 0.061225665501181, "grad_norm": 3.015625, "learning_rate": 4.958058344518452e-05, "loss": 0.895, "step": 3454 }, { "epoch": 0.061261117536792566, "grad_norm": 2.609375, "learning_rate": 4.958007386444117e-05, "loss": 0.8642, "step": 3456 }, { "epoch": 0.06129656957240413, "grad_norm": 2.46875, "learning_rate": 4.957956397694361e-05, "loss": 0.8568, "step": 3458 }, { "epoch": 0.06133202160801571, "grad_norm": 3.015625, "learning_rate": 4.957905378269821e-05, "loss": 0.8839, "step": 3460 }, { "epoch": 0.061367473643627275, "grad_norm": 2.703125, "learning_rate": 4.9578543281711345e-05, "loss": 0.842, "step": 3462 }, { "epoch": 0.06140292567923884, "grad_norm": 2.578125, "learning_rate": 4.9578032473989364e-05, "loss": 0.8433, "step": 3464 }, { "epoch": 0.06143837771485042, "grad_norm": 2.8125, "learning_rate": 4.957752135953867e-05, "loss": 0.8188, "step": 3466 }, { "epoch": 0.061473829750461985, "grad_norm": 3.0, "learning_rate": 4.9577009938365624e-05, "loss": 0.8758, "step": 3468 }, { "epoch": 0.06150928178607355, "grad_norm": 3.203125, "learning_rate": 4.957649821047662e-05, "loss": 0.8724, "step": 3470 }, { "epoch": 0.06154473382168513, "grad_norm": 2.96875, "learning_rate": 4.957598617587803e-05, "loss": 0.8746, "step": 3472 }, { "epoch": 0.061580185857296695, "grad_norm": 2.9375, "learning_rate": 4.957547383457625e-05, "loss": 0.8618, "step": 3474 }, { "epoch": 0.06161563789290826, "grad_norm": 2.828125, "learning_rate": 4.957496118657768e-05, "loss": 0.8622, "step": 3476 }, { "epoch": 0.06165108992851984, "grad_norm": 2.890625, "learning_rate": 4.957444823188871e-05, "loss": 0.8822, "step": 3478 }, { "epoch": 0.061686541964131404, "grad_norm": 2.84375, "learning_rate": 4.9573934970515744e-05, "loss": 0.8869, "step": 3480 }, { "epoch": 0.06172199399974297, "grad_norm": 2.84375, "learning_rate": 4.957342140246519e-05, "loss": 0.8593, "step": 3482 }, { "epoch": 0.06175744603535454, "grad_norm": 2.65625, "learning_rate": 4.957290752774346e-05, "loss": 0.8421, "step": 3484 }, { "epoch": 0.061792898070966114, "grad_norm": 2.78125, "learning_rate": 4.957239334635696e-05, "loss": 0.8195, "step": 3486 }, { "epoch": 0.06182835010657768, "grad_norm": 2.71875, "learning_rate": 4.95718788583121e-05, "loss": 0.8544, "step": 3488 }, { "epoch": 0.06186380214218925, "grad_norm": 2.6875, "learning_rate": 4.957136406361532e-05, "loss": 0.8591, "step": 3490 }, { "epoch": 0.06189925417780082, "grad_norm": 2.421875, "learning_rate": 4.957084896227303e-05, "loss": 0.8311, "step": 3492 }, { "epoch": 0.06193470621341239, "grad_norm": 3.125, "learning_rate": 4.957033355429166e-05, "loss": 0.8216, "step": 3494 }, { "epoch": 0.06197015824902396, "grad_norm": 3.265625, "learning_rate": 4.9569817839677646e-05, "loss": 0.8608, "step": 3496 }, { "epoch": 0.06200561028463553, "grad_norm": 2.953125, "learning_rate": 4.956930181843742e-05, "loss": 0.8534, "step": 3498 }, { "epoch": 0.0620410623202471, "grad_norm": 2.8125, "learning_rate": 4.956878549057743e-05, "loss": 0.8135, "step": 3500 }, { "epoch": 0.06207651435585867, "grad_norm": 2.640625, "learning_rate": 4.956826885610412e-05, "loss": 0.8465, "step": 3502 }, { "epoch": 0.06211196639147024, "grad_norm": 2.6875, "learning_rate": 4.9567751915023925e-05, "loss": 0.8692, "step": 3504 }, { "epoch": 0.06214741842708181, "grad_norm": 3.546875, "learning_rate": 4.9567234667343305e-05, "loss": 0.8903, "step": 3506 }, { "epoch": 0.06218287046269338, "grad_norm": 2.84375, "learning_rate": 4.9566717113068715e-05, "loss": 0.8463, "step": 3508 }, { "epoch": 0.06221832249830495, "grad_norm": 2.65625, "learning_rate": 4.9566199252206605e-05, "loss": 0.8344, "step": 3510 }, { "epoch": 0.06225377453391652, "grad_norm": 2.546875, "learning_rate": 4.956568108476345e-05, "loss": 0.8576, "step": 3512 }, { "epoch": 0.06228922656952809, "grad_norm": 2.890625, "learning_rate": 4.956516261074571e-05, "loss": 0.8552, "step": 3514 }, { "epoch": 0.06232467860513966, "grad_norm": 2.953125, "learning_rate": 4.956464383015986e-05, "loss": 0.8446, "step": 3516 }, { "epoch": 0.06236013064075123, "grad_norm": 2.71875, "learning_rate": 4.956412474301237e-05, "loss": 0.8456, "step": 3518 }, { "epoch": 0.0623955826763628, "grad_norm": 2.78125, "learning_rate": 4.9563605349309714e-05, "loss": 0.8691, "step": 3520 }, { "epoch": 0.062431034711974365, "grad_norm": 2.8125, "learning_rate": 4.9563085649058395e-05, "loss": 0.8493, "step": 3522 }, { "epoch": 0.06246648674758594, "grad_norm": 2.9375, "learning_rate": 4.956256564226487e-05, "loss": 0.8059, "step": 3524 }, { "epoch": 0.06250193878319751, "grad_norm": 2.640625, "learning_rate": 4.9562045328935644e-05, "loss": 0.8029, "step": 3526 }, { "epoch": 0.06253739081880907, "grad_norm": 2.734375, "learning_rate": 4.9561524709077215e-05, "loss": 0.8365, "step": 3528 }, { "epoch": 0.06257284285442065, "grad_norm": 2.703125, "learning_rate": 4.9561003782696055e-05, "loss": 0.8374, "step": 3530 }, { "epoch": 0.06260829489003222, "grad_norm": 2.828125, "learning_rate": 4.95604825497987e-05, "loss": 0.8928, "step": 3532 }, { "epoch": 0.06264374692564378, "grad_norm": 2.578125, "learning_rate": 4.955996101039164e-05, "loss": 0.8294, "step": 3534 }, { "epoch": 0.06267919896125536, "grad_norm": 3.0, "learning_rate": 4.955943916448137e-05, "loss": 0.85, "step": 3536 }, { "epoch": 0.06271465099686693, "grad_norm": 2.78125, "learning_rate": 4.9558917012074425e-05, "loss": 0.8115, "step": 3538 }, { "epoch": 0.0627501030324785, "grad_norm": 3.0, "learning_rate": 4.955839455317731e-05, "loss": 0.8667, "step": 3540 }, { "epoch": 0.06278555506809007, "grad_norm": 2.625, "learning_rate": 4.955787178779654e-05, "loss": 0.8485, "step": 3542 }, { "epoch": 0.06282100710370163, "grad_norm": 3.109375, "learning_rate": 4.9557348715938646e-05, "loss": 0.8938, "step": 3544 }, { "epoch": 0.0628564591393132, "grad_norm": 2.625, "learning_rate": 4.9556825337610156e-05, "loss": 0.8341, "step": 3546 }, { "epoch": 0.06289191117492478, "grad_norm": 2.921875, "learning_rate": 4.9556301652817604e-05, "loss": 0.8989, "step": 3548 }, { "epoch": 0.06292736321053634, "grad_norm": 2.90625, "learning_rate": 4.955577766156752e-05, "loss": 0.853, "step": 3550 }, { "epoch": 0.06296281524614791, "grad_norm": 3.03125, "learning_rate": 4.955525336386645e-05, "loss": 0.8993, "step": 3552 }, { "epoch": 0.06299826728175949, "grad_norm": 2.484375, "learning_rate": 4.9554728759720925e-05, "loss": 0.8373, "step": 3554 }, { "epoch": 0.06303371931737105, "grad_norm": 2.703125, "learning_rate": 4.95542038491375e-05, "loss": 0.8574, "step": 3556 }, { "epoch": 0.06306917135298262, "grad_norm": 3.0, "learning_rate": 4.9553678632122724e-05, "loss": 0.9052, "step": 3558 }, { "epoch": 0.0631046233885942, "grad_norm": 3.0625, "learning_rate": 4.955315310868316e-05, "loss": 0.8823, "step": 3560 }, { "epoch": 0.06314007542420576, "grad_norm": 3.046875, "learning_rate": 4.955262727882536e-05, "loss": 0.8426, "step": 3562 }, { "epoch": 0.06317552745981733, "grad_norm": 2.65625, "learning_rate": 4.9552101142555874e-05, "loss": 0.8356, "step": 3564 }, { "epoch": 0.0632109794954289, "grad_norm": 2.84375, "learning_rate": 4.9551574699881285e-05, "loss": 0.8112, "step": 3566 }, { "epoch": 0.06324643153104047, "grad_norm": 2.734375, "learning_rate": 4.955104795080816e-05, "loss": 0.8955, "step": 3568 }, { "epoch": 0.06328188356665204, "grad_norm": 2.921875, "learning_rate": 4.955052089534308e-05, "loss": 0.8834, "step": 3570 }, { "epoch": 0.06331733560226362, "grad_norm": 2.5625, "learning_rate": 4.9549993533492595e-05, "loss": 0.8221, "step": 3572 }, { "epoch": 0.06335278763787518, "grad_norm": 2.640625, "learning_rate": 4.9549465865263314e-05, "loss": 0.9124, "step": 3574 }, { "epoch": 0.06338823967348675, "grad_norm": 2.671875, "learning_rate": 4.954893789066181e-05, "loss": 0.8662, "step": 3576 }, { "epoch": 0.06342369170909833, "grad_norm": 3.0, "learning_rate": 4.954840960969467e-05, "loss": 0.8301, "step": 3578 }, { "epoch": 0.06345914374470989, "grad_norm": 2.8125, "learning_rate": 4.9547881022368495e-05, "loss": 0.8467, "step": 3580 }, { "epoch": 0.06349459578032146, "grad_norm": 2.90625, "learning_rate": 4.954735212868988e-05, "loss": 0.8899, "step": 3582 }, { "epoch": 0.06353004781593304, "grad_norm": 2.96875, "learning_rate": 4.954682292866542e-05, "loss": 0.8437, "step": 3584 }, { "epoch": 0.0635654998515446, "grad_norm": 3.0625, "learning_rate": 4.9546293422301724e-05, "loss": 0.8412, "step": 3586 }, { "epoch": 0.06360095188715617, "grad_norm": 2.8125, "learning_rate": 4.95457636096054e-05, "loss": 0.7977, "step": 3588 }, { "epoch": 0.06363640392276775, "grad_norm": 2.703125, "learning_rate": 4.9545233490583057e-05, "loss": 0.8696, "step": 3590 }, { "epoch": 0.0636718559583793, "grad_norm": 2.453125, "learning_rate": 4.954470306524131e-05, "loss": 0.8215, "step": 3592 }, { "epoch": 0.06370730799399088, "grad_norm": 2.796875, "learning_rate": 4.954417233358678e-05, "loss": 0.8238, "step": 3594 }, { "epoch": 0.06374276002960245, "grad_norm": 2.796875, "learning_rate": 4.9543641295626096e-05, "loss": 0.8648, "step": 3596 }, { "epoch": 0.06377821206521402, "grad_norm": 2.4375, "learning_rate": 4.9543109951365886e-05, "loss": 0.8195, "step": 3598 }, { "epoch": 0.06381366410082559, "grad_norm": 2.640625, "learning_rate": 4.954257830081276e-05, "loss": 0.8593, "step": 3600 }, { "epoch": 0.06384911613643716, "grad_norm": 3.03125, "learning_rate": 4.954204634397338e-05, "loss": 0.8781, "step": 3602 }, { "epoch": 0.06388456817204873, "grad_norm": 2.84375, "learning_rate": 4.9541514080854375e-05, "loss": 0.8603, "step": 3604 }, { "epoch": 0.0639200202076603, "grad_norm": 2.8125, "learning_rate": 4.954098151146238e-05, "loss": 0.8483, "step": 3606 }, { "epoch": 0.06395547224327187, "grad_norm": 2.734375, "learning_rate": 4.954044863580405e-05, "loss": 0.8716, "step": 3608 }, { "epoch": 0.06399092427888343, "grad_norm": 2.8125, "learning_rate": 4.953991545388603e-05, "loss": 0.8673, "step": 3610 }, { "epoch": 0.06402637631449501, "grad_norm": 3.125, "learning_rate": 4.953938196571498e-05, "loss": 0.8617, "step": 3612 }, { "epoch": 0.06406182835010658, "grad_norm": 3.109375, "learning_rate": 4.953884817129755e-05, "loss": 0.857, "step": 3614 }, { "epoch": 0.06409728038571814, "grad_norm": 2.8125, "learning_rate": 4.953831407064041e-05, "loss": 0.853, "step": 3616 }, { "epoch": 0.06413273242132972, "grad_norm": 2.921875, "learning_rate": 4.9537779663750225e-05, "loss": 0.8622, "step": 3618 }, { "epoch": 0.0641681844569413, "grad_norm": 2.578125, "learning_rate": 4.953724495063365e-05, "loss": 0.8242, "step": 3620 }, { "epoch": 0.06420363649255285, "grad_norm": 2.34375, "learning_rate": 4.953670993129738e-05, "loss": 0.8159, "step": 3622 }, { "epoch": 0.06423908852816443, "grad_norm": 2.671875, "learning_rate": 4.953617460574807e-05, "loss": 0.8065, "step": 3624 }, { "epoch": 0.064274540563776, "grad_norm": 2.84375, "learning_rate": 4.9535638973992416e-05, "loss": 0.8516, "step": 3626 }, { "epoch": 0.06430999259938756, "grad_norm": 2.90625, "learning_rate": 4.95351030360371e-05, "loss": 0.8336, "step": 3628 }, { "epoch": 0.06434544463499914, "grad_norm": 2.765625, "learning_rate": 4.9534566791888804e-05, "loss": 0.8426, "step": 3630 }, { "epoch": 0.0643808966706107, "grad_norm": 2.9375, "learning_rate": 4.953403024155423e-05, "loss": 0.849, "step": 3632 }, { "epoch": 0.06441634870622227, "grad_norm": 2.9375, "learning_rate": 4.9533493385040067e-05, "loss": 0.8788, "step": 3634 }, { "epoch": 0.06445180074183385, "grad_norm": 2.65625, "learning_rate": 4.9532956222353014e-05, "loss": 0.8311, "step": 3636 }, { "epoch": 0.06448725277744541, "grad_norm": 2.796875, "learning_rate": 4.953241875349978e-05, "loss": 0.8983, "step": 3638 }, { "epoch": 0.06452270481305698, "grad_norm": 2.9375, "learning_rate": 4.9531880978487065e-05, "loss": 0.8764, "step": 3640 }, { "epoch": 0.06455815684866856, "grad_norm": 2.765625, "learning_rate": 4.953134289732159e-05, "loss": 0.835, "step": 3642 }, { "epoch": 0.06459360888428012, "grad_norm": 2.796875, "learning_rate": 4.9530804510010065e-05, "loss": 0.8383, "step": 3644 }, { "epoch": 0.06462906091989169, "grad_norm": 2.859375, "learning_rate": 4.953026581655921e-05, "loss": 0.856, "step": 3646 }, { "epoch": 0.06466451295550327, "grad_norm": 2.921875, "learning_rate": 4.952972681697574e-05, "loss": 0.8197, "step": 3648 }, { "epoch": 0.06469996499111483, "grad_norm": 2.765625, "learning_rate": 4.9529187511266395e-05, "loss": 0.8752, "step": 3650 }, { "epoch": 0.0647354170267264, "grad_norm": 2.84375, "learning_rate": 4.9528647899437894e-05, "loss": 0.8281, "step": 3652 }, { "epoch": 0.06477086906233798, "grad_norm": 2.875, "learning_rate": 4.9528107981496985e-05, "loss": 0.8465, "step": 3654 }, { "epoch": 0.06480632109794954, "grad_norm": 2.9375, "learning_rate": 4.952756775745039e-05, "loss": 0.8236, "step": 3656 }, { "epoch": 0.06484177313356111, "grad_norm": 3.03125, "learning_rate": 4.952702722730486e-05, "loss": 0.8802, "step": 3658 }, { "epoch": 0.06487722516917269, "grad_norm": 3.0625, "learning_rate": 4.952648639106714e-05, "loss": 0.8799, "step": 3660 }, { "epoch": 0.06491267720478425, "grad_norm": 2.796875, "learning_rate": 4.9525945248743974e-05, "loss": 0.8126, "step": 3662 }, { "epoch": 0.06494812924039582, "grad_norm": 2.453125, "learning_rate": 4.952540380034212e-05, "loss": 0.842, "step": 3664 }, { "epoch": 0.0649835812760074, "grad_norm": 2.765625, "learning_rate": 4.952486204586834e-05, "loss": 0.8368, "step": 3666 }, { "epoch": 0.06501903331161896, "grad_norm": 2.765625, "learning_rate": 4.952431998532939e-05, "loss": 0.8623, "step": 3668 }, { "epoch": 0.06505448534723053, "grad_norm": 2.8125, "learning_rate": 4.952377761873203e-05, "loss": 0.8137, "step": 3670 }, { "epoch": 0.0650899373828421, "grad_norm": 2.828125, "learning_rate": 4.952323494608303e-05, "loss": 0.8041, "step": 3672 }, { "epoch": 0.06512538941845367, "grad_norm": 2.6875, "learning_rate": 4.9522691967389175e-05, "loss": 0.8597, "step": 3674 }, { "epoch": 0.06516084145406524, "grad_norm": 2.875, "learning_rate": 4.952214868265723e-05, "loss": 0.8554, "step": 3676 }, { "epoch": 0.06519629348967682, "grad_norm": 2.921875, "learning_rate": 4.952160509189397e-05, "loss": 0.8432, "step": 3678 }, { "epoch": 0.06523174552528838, "grad_norm": 2.953125, "learning_rate": 4.952106119510619e-05, "loss": 0.8919, "step": 3680 }, { "epoch": 0.06526719756089995, "grad_norm": 2.9375, "learning_rate": 4.9520516992300675e-05, "loss": 0.862, "step": 3682 }, { "epoch": 0.06530264959651153, "grad_norm": 2.5625, "learning_rate": 4.9519972483484214e-05, "loss": 0.8214, "step": 3684 }, { "epoch": 0.06533810163212309, "grad_norm": 2.71875, "learning_rate": 4.9519427668663603e-05, "loss": 0.8427, "step": 3686 }, { "epoch": 0.06537355366773466, "grad_norm": 2.953125, "learning_rate": 4.9518882547845645e-05, "loss": 0.8962, "step": 3688 }, { "epoch": 0.06540900570334623, "grad_norm": 2.921875, "learning_rate": 4.951833712103714e-05, "loss": 0.8666, "step": 3690 }, { "epoch": 0.0654444577389578, "grad_norm": 2.71875, "learning_rate": 4.951779138824489e-05, "loss": 0.8367, "step": 3692 }, { "epoch": 0.06547990977456937, "grad_norm": 2.703125, "learning_rate": 4.951724534947571e-05, "loss": 0.8496, "step": 3694 }, { "epoch": 0.06551536181018094, "grad_norm": 2.859375, "learning_rate": 4.9516699004736415e-05, "loss": 0.878, "step": 3696 }, { "epoch": 0.0655508138457925, "grad_norm": 2.421875, "learning_rate": 4.9516152354033826e-05, "loss": 0.8568, "step": 3698 }, { "epoch": 0.06558626588140408, "grad_norm": 3.09375, "learning_rate": 4.9515605397374765e-05, "loss": 0.8316, "step": 3700 }, { "epoch": 0.06562171791701565, "grad_norm": 2.65625, "learning_rate": 4.951505813476605e-05, "loss": 0.8079, "step": 3702 }, { "epoch": 0.06565716995262721, "grad_norm": 2.5, "learning_rate": 4.951451056621451e-05, "loss": 0.9063, "step": 3704 }, { "epoch": 0.06569262198823879, "grad_norm": 2.890625, "learning_rate": 4.9513962691726986e-05, "loss": 0.8466, "step": 3706 }, { "epoch": 0.06572807402385036, "grad_norm": 2.796875, "learning_rate": 4.9513414511310325e-05, "loss": 0.917, "step": 3708 }, { "epoch": 0.06576352605946192, "grad_norm": 2.75, "learning_rate": 4.951286602497135e-05, "loss": 0.8531, "step": 3710 }, { "epoch": 0.0657989780950735, "grad_norm": 2.828125, "learning_rate": 4.951231723271691e-05, "loss": 0.8474, "step": 3712 }, { "epoch": 0.06583443013068506, "grad_norm": 2.4375, "learning_rate": 4.951176813455386e-05, "loss": 0.839, "step": 3714 }, { "epoch": 0.06586988216629663, "grad_norm": 2.6875, "learning_rate": 4.951121873048905e-05, "loss": 0.8619, "step": 3716 }, { "epoch": 0.06590533420190821, "grad_norm": 2.90625, "learning_rate": 4.951066902052933e-05, "loss": 0.8007, "step": 3718 }, { "epoch": 0.06594078623751977, "grad_norm": 3.21875, "learning_rate": 4.951011900468157e-05, "loss": 0.9208, "step": 3720 }, { "epoch": 0.06597623827313134, "grad_norm": 2.96875, "learning_rate": 4.9509568682952627e-05, "loss": 0.8499, "step": 3722 }, { "epoch": 0.06601169030874292, "grad_norm": 2.875, "learning_rate": 4.9509018055349374e-05, "loss": 0.8741, "step": 3724 }, { "epoch": 0.06604714234435448, "grad_norm": 2.828125, "learning_rate": 4.950846712187868e-05, "loss": 0.8479, "step": 3726 }, { "epoch": 0.06608259437996605, "grad_norm": 2.734375, "learning_rate": 4.950791588254742e-05, "loss": 0.8415, "step": 3728 }, { "epoch": 0.06611804641557763, "grad_norm": 3.03125, "learning_rate": 4.950736433736248e-05, "loss": 0.9057, "step": 3730 }, { "epoch": 0.06615349845118919, "grad_norm": 2.96875, "learning_rate": 4.9506812486330734e-05, "loss": 0.8189, "step": 3732 }, { "epoch": 0.06618895048680076, "grad_norm": 2.828125, "learning_rate": 4.950626032945907e-05, "loss": 0.884, "step": 3734 }, { "epoch": 0.06622440252241234, "grad_norm": 2.859375, "learning_rate": 4.950570786675438e-05, "loss": 0.8709, "step": 3736 }, { "epoch": 0.0662598545580239, "grad_norm": 2.796875, "learning_rate": 4.9505155098223565e-05, "loss": 0.845, "step": 3738 }, { "epoch": 0.06629530659363547, "grad_norm": 2.8125, "learning_rate": 4.9504602023873514e-05, "loss": 0.8461, "step": 3740 }, { "epoch": 0.06633075862924705, "grad_norm": 3.125, "learning_rate": 4.950404864371114e-05, "loss": 0.8842, "step": 3742 }, { "epoch": 0.06636621066485861, "grad_norm": 2.796875, "learning_rate": 4.950349495774333e-05, "loss": 0.8245, "step": 3744 }, { "epoch": 0.06640166270047018, "grad_norm": 2.796875, "learning_rate": 4.9502940965977026e-05, "loss": 0.8499, "step": 3746 }, { "epoch": 0.06643711473608176, "grad_norm": 2.8125, "learning_rate": 4.950238666841911e-05, "loss": 0.8319, "step": 3748 }, { "epoch": 0.06647256677169332, "grad_norm": 3.140625, "learning_rate": 4.950183206507651e-05, "loss": 0.8837, "step": 3750 }, { "epoch": 0.06650801880730489, "grad_norm": 2.65625, "learning_rate": 4.9501277155956164e-05, "loss": 0.8457, "step": 3752 }, { "epoch": 0.06654347084291647, "grad_norm": 2.4375, "learning_rate": 4.9500721941064964e-05, "loss": 0.8314, "step": 3754 }, { "epoch": 0.06657892287852803, "grad_norm": 3.09375, "learning_rate": 4.9500166420409866e-05, "loss": 0.8802, "step": 3756 }, { "epoch": 0.0666143749141396, "grad_norm": 2.796875, "learning_rate": 4.949961059399779e-05, "loss": 0.8275, "step": 3758 }, { "epoch": 0.06664982694975118, "grad_norm": 2.75, "learning_rate": 4.9499054461835684e-05, "loss": 0.8344, "step": 3760 }, { "epoch": 0.06668527898536274, "grad_norm": 2.953125, "learning_rate": 4.949849802393047e-05, "loss": 0.8534, "step": 3762 }, { "epoch": 0.06672073102097431, "grad_norm": 3.015625, "learning_rate": 4.9497941280289116e-05, "loss": 0.8407, "step": 3764 }, { "epoch": 0.06675618305658589, "grad_norm": 2.921875, "learning_rate": 4.949738423091855e-05, "loss": 0.8525, "step": 3766 }, { "epoch": 0.06679163509219745, "grad_norm": 2.8125, "learning_rate": 4.949682687582573e-05, "loss": 0.8505, "step": 3768 }, { "epoch": 0.06682708712780902, "grad_norm": 2.609375, "learning_rate": 4.9496269215017624e-05, "loss": 0.829, "step": 3770 }, { "epoch": 0.0668625391634206, "grad_norm": 2.890625, "learning_rate": 4.949571124850116e-05, "loss": 0.8904, "step": 3772 }, { "epoch": 0.06689799119903216, "grad_norm": 2.828125, "learning_rate": 4.949515297628334e-05, "loss": 0.8788, "step": 3774 }, { "epoch": 0.06693344323464373, "grad_norm": 2.765625, "learning_rate": 4.949459439837111e-05, "loss": 0.817, "step": 3776 }, { "epoch": 0.0669688952702553, "grad_norm": 2.625, "learning_rate": 4.949403551477144e-05, "loss": 0.8703, "step": 3778 }, { "epoch": 0.06700434730586687, "grad_norm": 2.515625, "learning_rate": 4.9493476325491306e-05, "loss": 0.8258, "step": 3780 }, { "epoch": 0.06703979934147844, "grad_norm": 2.71875, "learning_rate": 4.949291683053769e-05, "loss": 0.859, "step": 3782 }, { "epoch": 0.06707525137709001, "grad_norm": 2.90625, "learning_rate": 4.949235702991757e-05, "loss": 0.8668, "step": 3784 }, { "epoch": 0.06711070341270158, "grad_norm": 2.859375, "learning_rate": 4.9491796923637945e-05, "loss": 0.8673, "step": 3786 }, { "epoch": 0.06714615544831315, "grad_norm": 3.0625, "learning_rate": 4.949123651170579e-05, "loss": 0.8853, "step": 3788 }, { "epoch": 0.06718160748392472, "grad_norm": 2.984375, "learning_rate": 4.9490675794128105e-05, "loss": 0.8029, "step": 3790 }, { "epoch": 0.06721705951953628, "grad_norm": 3.0625, "learning_rate": 4.9490114770911886e-05, "loss": 0.858, "step": 3792 }, { "epoch": 0.06725251155514786, "grad_norm": 2.75, "learning_rate": 4.948955344206414e-05, "loss": 0.8789, "step": 3794 }, { "epoch": 0.06728796359075943, "grad_norm": 2.75, "learning_rate": 4.948899180759187e-05, "loss": 0.8451, "step": 3796 }, { "epoch": 0.067323415626371, "grad_norm": 2.796875, "learning_rate": 4.948842986750207e-05, "loss": 0.8629, "step": 3798 }, { "epoch": 0.06735886766198257, "grad_norm": 2.8125, "learning_rate": 4.948786762180178e-05, "loss": 0.8486, "step": 3800 }, { "epoch": 0.06739431969759413, "grad_norm": 2.890625, "learning_rate": 4.9487305070498e-05, "loss": 0.8094, "step": 3802 }, { "epoch": 0.0674297717332057, "grad_norm": 2.71875, "learning_rate": 4.9486742213597745e-05, "loss": 0.8793, "step": 3804 }, { "epoch": 0.06746522376881728, "grad_norm": 2.65625, "learning_rate": 4.9486179051108054e-05, "loss": 0.8378, "step": 3806 }, { "epoch": 0.06750067580442884, "grad_norm": 2.703125, "learning_rate": 4.9485615583035946e-05, "loss": 0.8348, "step": 3808 }, { "epoch": 0.06753612784004041, "grad_norm": 2.5625, "learning_rate": 4.948505180938846e-05, "loss": 0.83, "step": 3810 }, { "epoch": 0.06757157987565199, "grad_norm": 2.671875, "learning_rate": 4.9484487730172624e-05, "loss": 0.8492, "step": 3812 }, { "epoch": 0.06760703191126355, "grad_norm": 2.875, "learning_rate": 4.948392334539548e-05, "loss": 0.9011, "step": 3814 }, { "epoch": 0.06764248394687512, "grad_norm": 2.828125, "learning_rate": 4.948335865506407e-05, "loss": 0.8411, "step": 3816 }, { "epoch": 0.0676779359824867, "grad_norm": 2.8125, "learning_rate": 4.948279365918544e-05, "loss": 0.8459, "step": 3818 }, { "epoch": 0.06771338801809826, "grad_norm": 2.671875, "learning_rate": 4.948222835776666e-05, "loss": 0.8186, "step": 3820 }, { "epoch": 0.06774884005370983, "grad_norm": 2.90625, "learning_rate": 4.948166275081476e-05, "loss": 0.8643, "step": 3822 }, { "epoch": 0.06778429208932141, "grad_norm": 3.125, "learning_rate": 4.9481096838336804e-05, "loss": 0.8403, "step": 3824 }, { "epoch": 0.06781974412493297, "grad_norm": 2.859375, "learning_rate": 4.948053062033986e-05, "loss": 0.8526, "step": 3826 }, { "epoch": 0.06785519616054454, "grad_norm": 2.890625, "learning_rate": 4.9479964096831e-05, "loss": 0.8543, "step": 3828 }, { "epoch": 0.06789064819615612, "grad_norm": 2.734375, "learning_rate": 4.947939726781729e-05, "loss": 0.8389, "step": 3830 }, { "epoch": 0.06792610023176768, "grad_norm": 2.6875, "learning_rate": 4.947883013330579e-05, "loss": 0.8496, "step": 3832 }, { "epoch": 0.06796155226737925, "grad_norm": 2.59375, "learning_rate": 4.947826269330359e-05, "loss": 0.8223, "step": 3834 }, { "epoch": 0.06799700430299083, "grad_norm": 2.625, "learning_rate": 4.947769494781777e-05, "loss": 0.8621, "step": 3836 }, { "epoch": 0.06803245633860239, "grad_norm": 2.703125, "learning_rate": 4.947712689685542e-05, "loss": 0.903, "step": 3838 }, { "epoch": 0.06806790837421396, "grad_norm": 2.59375, "learning_rate": 4.947655854042362e-05, "loss": 0.845, "step": 3840 }, { "epoch": 0.06810336040982554, "grad_norm": 2.828125, "learning_rate": 4.947598987852947e-05, "loss": 0.858, "step": 3842 }, { "epoch": 0.0681388124454371, "grad_norm": 2.625, "learning_rate": 4.947542091118006e-05, "loss": 0.8404, "step": 3844 }, { "epoch": 0.06817426448104867, "grad_norm": 2.84375, "learning_rate": 4.9474851638382504e-05, "loss": 0.8334, "step": 3846 }, { "epoch": 0.06820971651666025, "grad_norm": 2.96875, "learning_rate": 4.9474282060143885e-05, "loss": 0.8613, "step": 3848 }, { "epoch": 0.0682451685522718, "grad_norm": 2.984375, "learning_rate": 4.947371217647133e-05, "loss": 0.8584, "step": 3850 }, { "epoch": 0.06828062058788338, "grad_norm": 3.046875, "learning_rate": 4.947314198737195e-05, "loss": 0.8718, "step": 3852 }, { "epoch": 0.06831607262349496, "grad_norm": 2.953125, "learning_rate": 4.947257149285285e-05, "loss": 0.8363, "step": 3854 }, { "epoch": 0.06835152465910652, "grad_norm": 2.609375, "learning_rate": 4.947200069292115e-05, "loss": 0.8623, "step": 3856 }, { "epoch": 0.06838697669471809, "grad_norm": 2.84375, "learning_rate": 4.9471429587583985e-05, "loss": 0.8581, "step": 3858 }, { "epoch": 0.06842242873032967, "grad_norm": 2.625, "learning_rate": 4.947085817684848e-05, "loss": 0.85, "step": 3860 }, { "epoch": 0.06845788076594123, "grad_norm": 2.765625, "learning_rate": 4.947028646072175e-05, "loss": 0.8351, "step": 3862 }, { "epoch": 0.0684933328015528, "grad_norm": 2.9375, "learning_rate": 4.9469714439210954e-05, "loss": 0.8576, "step": 3864 }, { "epoch": 0.06852878483716437, "grad_norm": 2.625, "learning_rate": 4.946914211232321e-05, "loss": 0.8174, "step": 3866 }, { "epoch": 0.06856423687277594, "grad_norm": 2.921875, "learning_rate": 4.946856948006567e-05, "loss": 0.8789, "step": 3868 }, { "epoch": 0.06859968890838751, "grad_norm": 2.828125, "learning_rate": 4.946799654244548e-05, "loss": 0.8533, "step": 3870 }, { "epoch": 0.06863514094399908, "grad_norm": 2.8125, "learning_rate": 4.9467423299469796e-05, "loss": 0.8777, "step": 3872 }, { "epoch": 0.06867059297961065, "grad_norm": 2.796875, "learning_rate": 4.9466849751145754e-05, "loss": 0.8299, "step": 3874 }, { "epoch": 0.06870604501522222, "grad_norm": 3.078125, "learning_rate": 4.946627589748053e-05, "loss": 0.8746, "step": 3876 }, { "epoch": 0.0687414970508338, "grad_norm": 2.8125, "learning_rate": 4.9465701738481276e-05, "loss": 0.8488, "step": 3878 }, { "epoch": 0.06877694908644535, "grad_norm": 2.90625, "learning_rate": 4.9465127274155165e-05, "loss": 0.8468, "step": 3880 }, { "epoch": 0.06881240112205693, "grad_norm": 2.953125, "learning_rate": 4.9464552504509353e-05, "loss": 0.8292, "step": 3882 }, { "epoch": 0.0688478531576685, "grad_norm": 2.8125, "learning_rate": 4.946397742955103e-05, "loss": 0.8555, "step": 3884 }, { "epoch": 0.06888330519328006, "grad_norm": 2.671875, "learning_rate": 4.946340204928736e-05, "loss": 0.857, "step": 3886 }, { "epoch": 0.06891875722889164, "grad_norm": 3.203125, "learning_rate": 4.946282636372553e-05, "loss": 0.8918, "step": 3888 }, { "epoch": 0.0689542092645032, "grad_norm": 2.6875, "learning_rate": 4.946225037287272e-05, "loss": 0.8305, "step": 3890 }, { "epoch": 0.06898966130011477, "grad_norm": 2.875, "learning_rate": 4.946167407673612e-05, "loss": 0.9015, "step": 3892 }, { "epoch": 0.06902511333572635, "grad_norm": 2.921875, "learning_rate": 4.9461097475322925e-05, "loss": 0.8525, "step": 3894 }, { "epoch": 0.06906056537133791, "grad_norm": 2.671875, "learning_rate": 4.946052056864032e-05, "loss": 0.8384, "step": 3896 }, { "epoch": 0.06909601740694948, "grad_norm": 2.65625, "learning_rate": 4.945994335669552e-05, "loss": 0.7948, "step": 3898 }, { "epoch": 0.06913146944256106, "grad_norm": 3.25, "learning_rate": 4.945936583949573e-05, "loss": 0.8752, "step": 3900 }, { "epoch": 0.06916692147817262, "grad_norm": 2.671875, "learning_rate": 4.945878801704814e-05, "loss": 0.8423, "step": 3902 }, { "epoch": 0.0692023735137842, "grad_norm": 2.390625, "learning_rate": 4.945820988935997e-05, "loss": 0.7475, "step": 3904 }, { "epoch": 0.06923782554939577, "grad_norm": 3.109375, "learning_rate": 4.945763145643844e-05, "loss": 0.8427, "step": 3906 }, { "epoch": 0.06927327758500733, "grad_norm": 2.59375, "learning_rate": 4.9457052718290756e-05, "loss": 0.8373, "step": 3908 }, { "epoch": 0.0693087296206189, "grad_norm": 2.953125, "learning_rate": 4.945647367492415e-05, "loss": 0.8304, "step": 3910 }, { "epoch": 0.06934418165623048, "grad_norm": 2.765625, "learning_rate": 4.945589432634584e-05, "loss": 0.8281, "step": 3912 }, { "epoch": 0.06937963369184204, "grad_norm": 2.8125, "learning_rate": 4.945531467256307e-05, "loss": 0.8817, "step": 3914 }, { "epoch": 0.06941508572745361, "grad_norm": 2.578125, "learning_rate": 4.9454734713583075e-05, "loss": 0.8514, "step": 3916 }, { "epoch": 0.06945053776306519, "grad_norm": 2.890625, "learning_rate": 4.945415444941307e-05, "loss": 0.8647, "step": 3918 }, { "epoch": 0.06948598979867675, "grad_norm": 2.90625, "learning_rate": 4.9453573880060324e-05, "loss": 0.8591, "step": 3920 }, { "epoch": 0.06952144183428832, "grad_norm": 2.734375, "learning_rate": 4.945299300553206e-05, "loss": 0.8554, "step": 3922 }, { "epoch": 0.0695568938698999, "grad_norm": 2.78125, "learning_rate": 4.945241182583554e-05, "loss": 0.8347, "step": 3924 }, { "epoch": 0.06959234590551146, "grad_norm": 2.640625, "learning_rate": 4.9451830340978014e-05, "loss": 0.8306, "step": 3926 }, { "epoch": 0.06962779794112303, "grad_norm": 2.765625, "learning_rate": 4.945124855096673e-05, "loss": 0.8073, "step": 3928 }, { "epoch": 0.0696632499767346, "grad_norm": 2.734375, "learning_rate": 4.9450666455808965e-05, "loss": 0.8574, "step": 3930 }, { "epoch": 0.06969870201234617, "grad_norm": 3.03125, "learning_rate": 4.945008405551197e-05, "loss": 0.8346, "step": 3932 }, { "epoch": 0.06973415404795774, "grad_norm": 2.78125, "learning_rate": 4.9449501350083024e-05, "loss": 0.8432, "step": 3934 }, { "epoch": 0.06976960608356932, "grad_norm": 2.875, "learning_rate": 4.944891833952939e-05, "loss": 0.8358, "step": 3936 }, { "epoch": 0.06980505811918088, "grad_norm": 2.734375, "learning_rate": 4.944833502385835e-05, "loss": 0.8209, "step": 3938 }, { "epoch": 0.06984051015479245, "grad_norm": 2.84375, "learning_rate": 4.944775140307718e-05, "loss": 0.8509, "step": 3940 }, { "epoch": 0.06987596219040403, "grad_norm": 2.828125, "learning_rate": 4.944716747719317e-05, "loss": 0.8755, "step": 3942 }, { "epoch": 0.06991141422601559, "grad_norm": 3.15625, "learning_rate": 4.9446583246213594e-05, "loss": 0.8793, "step": 3944 }, { "epoch": 0.06994686626162716, "grad_norm": 2.625, "learning_rate": 4.944599871014576e-05, "loss": 0.8635, "step": 3946 }, { "epoch": 0.06998231829723874, "grad_norm": 2.515625, "learning_rate": 4.944541386899694e-05, "loss": 0.8418, "step": 3948 }, { "epoch": 0.0700177703328503, "grad_norm": 2.671875, "learning_rate": 4.9444828722774455e-05, "loss": 0.8428, "step": 3950 }, { "epoch": 0.07005322236846187, "grad_norm": 2.9375, "learning_rate": 4.94442432714856e-05, "loss": 0.9022, "step": 3952 }, { "epoch": 0.07008867440407344, "grad_norm": 2.9375, "learning_rate": 4.9443657515137674e-05, "loss": 0.836, "step": 3954 }, { "epoch": 0.070124126439685, "grad_norm": 2.828125, "learning_rate": 4.9443071453738e-05, "loss": 0.8242, "step": 3956 }, { "epoch": 0.07015957847529658, "grad_norm": 2.703125, "learning_rate": 4.9442485087293886e-05, "loss": 0.8566, "step": 3958 }, { "epoch": 0.07019503051090815, "grad_norm": 2.96875, "learning_rate": 4.944189841581265e-05, "loss": 0.8319, "step": 3960 }, { "epoch": 0.07023048254651972, "grad_norm": 2.640625, "learning_rate": 4.944131143930161e-05, "loss": 0.8564, "step": 3962 }, { "epoch": 0.07026593458213129, "grad_norm": 2.6875, "learning_rate": 4.944072415776809e-05, "loss": 0.862, "step": 3964 }, { "epoch": 0.07030138661774286, "grad_norm": 3.015625, "learning_rate": 4.944013657121942e-05, "loss": 0.8417, "step": 3966 }, { "epoch": 0.07033683865335442, "grad_norm": 2.828125, "learning_rate": 4.943954867966295e-05, "loss": 0.845, "step": 3968 }, { "epoch": 0.070372290688966, "grad_norm": 2.765625, "learning_rate": 4.943896048310599e-05, "loss": 0.8121, "step": 3970 }, { "epoch": 0.07040774272457756, "grad_norm": 2.828125, "learning_rate": 4.94383719815559e-05, "loss": 0.8761, "step": 3972 }, { "epoch": 0.07044319476018913, "grad_norm": 3.421875, "learning_rate": 4.9437783175020015e-05, "loss": 0.8621, "step": 3974 }, { "epoch": 0.07047864679580071, "grad_norm": 3.03125, "learning_rate": 4.943719406350569e-05, "loss": 0.8443, "step": 3976 }, { "epoch": 0.07051409883141227, "grad_norm": 2.75, "learning_rate": 4.943660464702027e-05, "loss": 0.8103, "step": 3978 }, { "epoch": 0.07054955086702384, "grad_norm": 2.5625, "learning_rate": 4.943601492557112e-05, "loss": 0.8744, "step": 3980 }, { "epoch": 0.07058500290263542, "grad_norm": 2.859375, "learning_rate": 4.9435424899165586e-05, "loss": 0.8371, "step": 3982 }, { "epoch": 0.07062045493824698, "grad_norm": 3.234375, "learning_rate": 4.943483456781104e-05, "loss": 0.8318, "step": 3984 }, { "epoch": 0.07065590697385855, "grad_norm": 2.859375, "learning_rate": 4.943424393151485e-05, "loss": 0.8621, "step": 3986 }, { "epoch": 0.07069135900947013, "grad_norm": 2.90625, "learning_rate": 4.9433652990284375e-05, "loss": 0.8897, "step": 3988 }, { "epoch": 0.07072681104508169, "grad_norm": 2.875, "learning_rate": 4.943306174412701e-05, "loss": 0.8527, "step": 3990 }, { "epoch": 0.07076226308069326, "grad_norm": 2.8125, "learning_rate": 4.943247019305012e-05, "loss": 0.831, "step": 3992 }, { "epoch": 0.07079771511630484, "grad_norm": 2.859375, "learning_rate": 4.943187833706109e-05, "loss": 0.8465, "step": 3994 }, { "epoch": 0.0708331671519164, "grad_norm": 2.875, "learning_rate": 4.943128617616731e-05, "loss": 0.865, "step": 3996 }, { "epoch": 0.07086861918752797, "grad_norm": 2.9375, "learning_rate": 4.943069371037618e-05, "loss": 0.8672, "step": 3998 }, { "epoch": 0.07090407122313955, "grad_norm": 2.953125, "learning_rate": 4.943010093969506e-05, "loss": 0.833, "step": 4000 }, { "epoch": 0.07093952325875111, "grad_norm": 2.890625, "learning_rate": 4.9429507864131375e-05, "loss": 0.8364, "step": 4002 }, { "epoch": 0.07097497529436268, "grad_norm": 2.765625, "learning_rate": 4.942891448369252e-05, "loss": 0.8505, "step": 4004 }, { "epoch": 0.07101042732997426, "grad_norm": 2.984375, "learning_rate": 4.942832079838591e-05, "loss": 0.8213, "step": 4006 }, { "epoch": 0.07104587936558582, "grad_norm": 3.0, "learning_rate": 4.9427726808218935e-05, "loss": 0.8397, "step": 4008 }, { "epoch": 0.07108133140119739, "grad_norm": 3.390625, "learning_rate": 4.9427132513199015e-05, "loss": 0.8628, "step": 4010 }, { "epoch": 0.07111678343680897, "grad_norm": 2.84375, "learning_rate": 4.942653791333357e-05, "loss": 0.8477, "step": 4012 }, { "epoch": 0.07115223547242053, "grad_norm": 2.765625, "learning_rate": 4.942594300863003e-05, "loss": 0.9068, "step": 4014 }, { "epoch": 0.0711876875080321, "grad_norm": 2.90625, "learning_rate": 4.94253477990958e-05, "loss": 0.8385, "step": 4016 }, { "epoch": 0.07122313954364368, "grad_norm": 2.6875, "learning_rate": 4.942475228473832e-05, "loss": 0.8585, "step": 4018 }, { "epoch": 0.07125859157925524, "grad_norm": 2.875, "learning_rate": 4.942415646556501e-05, "loss": 0.8674, "step": 4020 }, { "epoch": 0.07129404361486681, "grad_norm": 2.78125, "learning_rate": 4.9423560341583325e-05, "loss": 0.841, "step": 4022 }, { "epoch": 0.07132949565047839, "grad_norm": 2.953125, "learning_rate": 4.942296391280069e-05, "loss": 0.8507, "step": 4024 }, { "epoch": 0.07136494768608995, "grad_norm": 2.890625, "learning_rate": 4.9422367179224555e-05, "loss": 0.8421, "step": 4026 }, { "epoch": 0.07140039972170152, "grad_norm": 2.8125, "learning_rate": 4.942177014086236e-05, "loss": 0.8639, "step": 4028 }, { "epoch": 0.0714358517573131, "grad_norm": 2.65625, "learning_rate": 4.9421172797721566e-05, "loss": 0.8645, "step": 4030 }, { "epoch": 0.07147130379292466, "grad_norm": 2.96875, "learning_rate": 4.942057514980962e-05, "loss": 0.8383, "step": 4032 }, { "epoch": 0.07150675582853623, "grad_norm": 2.921875, "learning_rate": 4.9419977197133984e-05, "loss": 0.8567, "step": 4034 }, { "epoch": 0.0715422078641478, "grad_norm": 2.578125, "learning_rate": 4.941937893970211e-05, "loss": 0.8575, "step": 4036 }, { "epoch": 0.07157765989975937, "grad_norm": 2.96875, "learning_rate": 4.941878037752148e-05, "loss": 0.8374, "step": 4038 }, { "epoch": 0.07161311193537094, "grad_norm": 2.671875, "learning_rate": 4.941818151059956e-05, "loss": 0.8745, "step": 4040 }, { "epoch": 0.07164856397098252, "grad_norm": 2.71875, "learning_rate": 4.9417582338943815e-05, "loss": 0.8833, "step": 4042 }, { "epoch": 0.07168401600659408, "grad_norm": 2.796875, "learning_rate": 4.9416982862561726e-05, "loss": 0.8612, "step": 4044 }, { "epoch": 0.07171946804220565, "grad_norm": 2.671875, "learning_rate": 4.941638308146078e-05, "loss": 0.819, "step": 4046 }, { "epoch": 0.07175492007781722, "grad_norm": 3.296875, "learning_rate": 4.941578299564846e-05, "loss": 0.837, "step": 4048 }, { "epoch": 0.07179037211342879, "grad_norm": 2.96875, "learning_rate": 4.9415182605132255e-05, "loss": 0.895, "step": 4050 }, { "epoch": 0.07182582414904036, "grad_norm": 2.625, "learning_rate": 4.9414581909919656e-05, "loss": 0.8425, "step": 4052 }, { "epoch": 0.07186127618465193, "grad_norm": 2.484375, "learning_rate": 4.941398091001815e-05, "loss": 0.8123, "step": 4054 }, { "epoch": 0.0718967282202635, "grad_norm": 2.609375, "learning_rate": 4.9413379605435264e-05, "loss": 0.8288, "step": 4056 }, { "epoch": 0.07193218025587507, "grad_norm": 2.625, "learning_rate": 4.9412777996178474e-05, "loss": 0.8381, "step": 4058 }, { "epoch": 0.07196763229148663, "grad_norm": 2.765625, "learning_rate": 4.9412176082255304e-05, "loss": 0.9139, "step": 4060 }, { "epoch": 0.0720030843270982, "grad_norm": 2.78125, "learning_rate": 4.941157386367326e-05, "loss": 0.8468, "step": 4062 }, { "epoch": 0.07203853636270978, "grad_norm": 2.859375, "learning_rate": 4.941097134043986e-05, "loss": 0.8892, "step": 4064 }, { "epoch": 0.07207398839832134, "grad_norm": 2.734375, "learning_rate": 4.9410368512562624e-05, "loss": 0.8498, "step": 4066 }, { "epoch": 0.07210944043393291, "grad_norm": 2.53125, "learning_rate": 4.940976538004907e-05, "loss": 0.8358, "step": 4068 }, { "epoch": 0.07214489246954449, "grad_norm": 2.875, "learning_rate": 4.9409161942906724e-05, "loss": 0.8611, "step": 4070 }, { "epoch": 0.07218034450515605, "grad_norm": 2.8125, "learning_rate": 4.940855820114312e-05, "loss": 0.8264, "step": 4072 }, { "epoch": 0.07221579654076762, "grad_norm": 2.859375, "learning_rate": 4.94079541547658e-05, "loss": 0.9031, "step": 4074 }, { "epoch": 0.0722512485763792, "grad_norm": 2.5, "learning_rate": 4.94073498037823e-05, "loss": 0.7923, "step": 4076 }, { "epoch": 0.07228670061199076, "grad_norm": 3.171875, "learning_rate": 4.940674514820015e-05, "loss": 0.8308, "step": 4078 }, { "epoch": 0.07232215264760233, "grad_norm": 2.84375, "learning_rate": 4.9406140188026905e-05, "loss": 0.8739, "step": 4080 }, { "epoch": 0.07235760468321391, "grad_norm": 2.984375, "learning_rate": 4.940553492327012e-05, "loss": 0.8497, "step": 4082 }, { "epoch": 0.07239305671882547, "grad_norm": 2.71875, "learning_rate": 4.9404929353937336e-05, "loss": 0.8896, "step": 4084 }, { "epoch": 0.07242850875443704, "grad_norm": 2.765625, "learning_rate": 4.9404323480036116e-05, "loss": 0.8515, "step": 4086 }, { "epoch": 0.07246396079004862, "grad_norm": 2.625, "learning_rate": 4.940371730157403e-05, "loss": 0.831, "step": 4088 }, { "epoch": 0.07249941282566018, "grad_norm": 2.890625, "learning_rate": 4.940311081855863e-05, "loss": 0.8463, "step": 4090 }, { "epoch": 0.07253486486127175, "grad_norm": 2.9375, "learning_rate": 4.940250403099749e-05, "loss": 0.8772, "step": 4092 }, { "epoch": 0.07257031689688333, "grad_norm": 2.765625, "learning_rate": 4.9401896938898185e-05, "loss": 0.837, "step": 4094 }, { "epoch": 0.07260576893249489, "grad_norm": 3.03125, "learning_rate": 4.940128954226828e-05, "loss": 0.863, "step": 4096 }, { "epoch": 0.07264122096810646, "grad_norm": 2.75, "learning_rate": 4.940068184111537e-05, "loss": 0.8407, "step": 4098 }, { "epoch": 0.07267667300371804, "grad_norm": 2.71875, "learning_rate": 4.9400073835447035e-05, "loss": 0.8389, "step": 4100 }, { "epoch": 0.0727121250393296, "grad_norm": 2.96875, "learning_rate": 4.939946552527086e-05, "loss": 0.8972, "step": 4102 }, { "epoch": 0.07274757707494117, "grad_norm": 2.703125, "learning_rate": 4.939885691059444e-05, "loss": 0.8153, "step": 4104 }, { "epoch": 0.07278302911055275, "grad_norm": 3.1875, "learning_rate": 4.939824799142536e-05, "loss": 0.8642, "step": 4106 }, { "epoch": 0.07281848114616431, "grad_norm": 2.890625, "learning_rate": 4.939763876777122e-05, "loss": 0.8415, "step": 4108 }, { "epoch": 0.07285393318177588, "grad_norm": 2.53125, "learning_rate": 4.939702923963965e-05, "loss": 0.819, "step": 4110 }, { "epoch": 0.07288938521738746, "grad_norm": 3.078125, "learning_rate": 4.9396419407038226e-05, "loss": 0.863, "step": 4112 }, { "epoch": 0.07292483725299902, "grad_norm": 2.765625, "learning_rate": 4.939580926997457e-05, "loss": 0.8575, "step": 4114 }, { "epoch": 0.07296028928861059, "grad_norm": 3.15625, "learning_rate": 4.9395198828456294e-05, "loss": 0.8643, "step": 4116 }, { "epoch": 0.07299574132422217, "grad_norm": 2.703125, "learning_rate": 4.9394588082491024e-05, "loss": 0.8993, "step": 4118 }, { "epoch": 0.07303119335983373, "grad_norm": 2.515625, "learning_rate": 4.939397703208637e-05, "loss": 0.8818, "step": 4120 }, { "epoch": 0.0730666453954453, "grad_norm": 3.046875, "learning_rate": 4.939336567724996e-05, "loss": 0.8545, "step": 4122 }, { "epoch": 0.07310209743105688, "grad_norm": 2.6875, "learning_rate": 4.9392754017989435e-05, "loss": 0.8493, "step": 4124 }, { "epoch": 0.07313754946666844, "grad_norm": 2.625, "learning_rate": 4.9392142054312416e-05, "loss": 0.83, "step": 4126 }, { "epoch": 0.07317300150228001, "grad_norm": 3.046875, "learning_rate": 4.939152978622655e-05, "loss": 0.9047, "step": 4128 }, { "epoch": 0.07320845353789159, "grad_norm": 2.375, "learning_rate": 4.939091721373946e-05, "loss": 0.8372, "step": 4130 }, { "epoch": 0.07324390557350315, "grad_norm": 2.8125, "learning_rate": 4.9390304336858814e-05, "loss": 0.8822, "step": 4132 }, { "epoch": 0.07327935760911472, "grad_norm": 2.53125, "learning_rate": 4.9389691155592256e-05, "loss": 0.7915, "step": 4134 }, { "epoch": 0.0733148096447263, "grad_norm": 2.640625, "learning_rate": 4.938907766994742e-05, "loss": 0.8189, "step": 4136 }, { "epoch": 0.07335026168033786, "grad_norm": 2.671875, "learning_rate": 4.938846387993198e-05, "loss": 0.8671, "step": 4138 }, { "epoch": 0.07338571371594943, "grad_norm": 2.890625, "learning_rate": 4.9387849785553584e-05, "loss": 0.8696, "step": 4140 }, { "epoch": 0.07342116575156099, "grad_norm": 2.640625, "learning_rate": 4.938723538681991e-05, "loss": 0.7995, "step": 4142 }, { "epoch": 0.07345661778717257, "grad_norm": 2.53125, "learning_rate": 4.9386620683738616e-05, "loss": 0.8268, "step": 4144 }, { "epoch": 0.07349206982278414, "grad_norm": 2.890625, "learning_rate": 4.9386005676317385e-05, "loss": 0.7975, "step": 4146 }, { "epoch": 0.0735275218583957, "grad_norm": 2.6875, "learning_rate": 4.9385390364563864e-05, "loss": 0.8414, "step": 4148 }, { "epoch": 0.07356297389400727, "grad_norm": 3.09375, "learning_rate": 4.9384774748485764e-05, "loss": 0.8708, "step": 4150 }, { "epoch": 0.07359842592961885, "grad_norm": 3.015625, "learning_rate": 4.938415882809074e-05, "loss": 0.9021, "step": 4152 }, { "epoch": 0.07363387796523041, "grad_norm": 2.796875, "learning_rate": 4.938354260338651e-05, "loss": 0.8506, "step": 4154 }, { "epoch": 0.07366933000084198, "grad_norm": 2.578125, "learning_rate": 4.938292607438074e-05, "loss": 0.8132, "step": 4156 }, { "epoch": 0.07370478203645356, "grad_norm": 2.921875, "learning_rate": 4.938230924108113e-05, "loss": 0.8767, "step": 4158 }, { "epoch": 0.07374023407206512, "grad_norm": 2.84375, "learning_rate": 4.938169210349538e-05, "loss": 0.8324, "step": 4160 }, { "epoch": 0.0737756861076767, "grad_norm": 2.609375, "learning_rate": 4.938107466163119e-05, "loss": 0.8516, "step": 4162 }, { "epoch": 0.07381113814328827, "grad_norm": 2.5625, "learning_rate": 4.938045691549626e-05, "loss": 0.8218, "step": 4164 }, { "epoch": 0.07384659017889983, "grad_norm": 2.875, "learning_rate": 4.937983886509832e-05, "loss": 0.8524, "step": 4166 }, { "epoch": 0.0738820422145114, "grad_norm": 2.859375, "learning_rate": 4.937922051044506e-05, "loss": 0.8436, "step": 4168 }, { "epoch": 0.07391749425012298, "grad_norm": 2.765625, "learning_rate": 4.93786018515442e-05, "loss": 0.8616, "step": 4170 }, { "epoch": 0.07395294628573454, "grad_norm": 2.765625, "learning_rate": 4.937798288840347e-05, "loss": 0.8628, "step": 4172 }, { "epoch": 0.07398839832134611, "grad_norm": 2.875, "learning_rate": 4.9377363621030596e-05, "loss": 0.8646, "step": 4174 }, { "epoch": 0.07402385035695769, "grad_norm": 3.109375, "learning_rate": 4.93767440494333e-05, "loss": 0.8985, "step": 4176 }, { "epoch": 0.07405930239256925, "grad_norm": 2.640625, "learning_rate": 4.937612417361932e-05, "loss": 0.8776, "step": 4178 }, { "epoch": 0.07409475442818082, "grad_norm": 2.734375, "learning_rate": 4.937550399359638e-05, "loss": 0.8236, "step": 4180 }, { "epoch": 0.0741302064637924, "grad_norm": 2.78125, "learning_rate": 4.937488350937223e-05, "loss": 0.8464, "step": 4182 }, { "epoch": 0.07416565849940396, "grad_norm": 2.59375, "learning_rate": 4.937426272095461e-05, "loss": 0.8683, "step": 4184 }, { "epoch": 0.07420111053501553, "grad_norm": 2.875, "learning_rate": 4.937364162835127e-05, "loss": 0.8014, "step": 4186 }, { "epoch": 0.07423656257062711, "grad_norm": 2.71875, "learning_rate": 4.9373020231569956e-05, "loss": 0.8531, "step": 4188 }, { "epoch": 0.07427201460623867, "grad_norm": 3.0625, "learning_rate": 4.937239853061843e-05, "loss": 0.8679, "step": 4190 }, { "epoch": 0.07430746664185024, "grad_norm": 2.9375, "learning_rate": 4.9371776525504446e-05, "loss": 0.8294, "step": 4192 }, { "epoch": 0.07434291867746182, "grad_norm": 2.75, "learning_rate": 4.937115421623577e-05, "loss": 0.8347, "step": 4194 }, { "epoch": 0.07437837071307338, "grad_norm": 2.6875, "learning_rate": 4.937053160282016e-05, "loss": 0.843, "step": 4196 }, { "epoch": 0.07441382274868495, "grad_norm": 2.796875, "learning_rate": 4.936990868526539e-05, "loss": 0.8282, "step": 4198 }, { "epoch": 0.07444927478429653, "grad_norm": 2.71875, "learning_rate": 4.936928546357924e-05, "loss": 0.8278, "step": 4200 }, { "epoch": 0.07448472681990809, "grad_norm": 2.734375, "learning_rate": 4.9368661937769475e-05, "loss": 0.8537, "step": 4202 }, { "epoch": 0.07452017885551966, "grad_norm": 2.78125, "learning_rate": 4.936803810784389e-05, "loss": 0.8322, "step": 4204 }, { "epoch": 0.07455563089113124, "grad_norm": 2.9375, "learning_rate": 4.936741397381027e-05, "loss": 0.8855, "step": 4206 }, { "epoch": 0.0745910829267428, "grad_norm": 3.0, "learning_rate": 4.93667895356764e-05, "loss": 0.851, "step": 4208 }, { "epoch": 0.07462653496235437, "grad_norm": 2.703125, "learning_rate": 4.9366164793450066e-05, "loss": 0.8417, "step": 4210 }, { "epoch": 0.07466198699796595, "grad_norm": 2.71875, "learning_rate": 4.936553974713907e-05, "loss": 0.8607, "step": 4212 }, { "epoch": 0.0746974390335775, "grad_norm": 2.78125, "learning_rate": 4.936491439675122e-05, "loss": 0.816, "step": 4214 }, { "epoch": 0.07473289106918908, "grad_norm": 2.90625, "learning_rate": 4.9364288742294306e-05, "loss": 0.8346, "step": 4216 }, { "epoch": 0.07476834310480066, "grad_norm": 3.015625, "learning_rate": 4.9363662783776146e-05, "loss": 0.8284, "step": 4218 }, { "epoch": 0.07480379514041222, "grad_norm": 2.75, "learning_rate": 4.9363036521204546e-05, "loss": 0.8666, "step": 4220 }, { "epoch": 0.07483924717602379, "grad_norm": 2.5, "learning_rate": 4.936240995458733e-05, "loss": 0.8194, "step": 4222 }, { "epoch": 0.07487469921163536, "grad_norm": 2.640625, "learning_rate": 4.9361783083932304e-05, "loss": 0.8879, "step": 4224 }, { "epoch": 0.07491015124724693, "grad_norm": 2.8125, "learning_rate": 4.93611559092473e-05, "loss": 0.8462, "step": 4226 }, { "epoch": 0.0749456032828585, "grad_norm": 2.6875, "learning_rate": 4.936052843054015e-05, "loss": 0.8267, "step": 4228 }, { "epoch": 0.07498105531847006, "grad_norm": 2.671875, "learning_rate": 4.935990064781868e-05, "loss": 0.897, "step": 4230 }, { "epoch": 0.07501650735408164, "grad_norm": 2.640625, "learning_rate": 4.935927256109072e-05, "loss": 0.8587, "step": 4232 }, { "epoch": 0.07505195938969321, "grad_norm": 2.578125, "learning_rate": 4.935864417036412e-05, "loss": 0.8527, "step": 4234 }, { "epoch": 0.07508741142530477, "grad_norm": 2.8125, "learning_rate": 4.93580154756467e-05, "loss": 0.8459, "step": 4236 }, { "epoch": 0.07512286346091634, "grad_norm": 3.09375, "learning_rate": 4.9357386476946334e-05, "loss": 0.8307, "step": 4238 }, { "epoch": 0.07515831549652792, "grad_norm": 2.59375, "learning_rate": 4.935675717427085e-05, "loss": 0.8156, "step": 4240 }, { "epoch": 0.07519376753213948, "grad_norm": 2.90625, "learning_rate": 4.935612756762811e-05, "loss": 0.8588, "step": 4242 }, { "epoch": 0.07522921956775105, "grad_norm": 2.859375, "learning_rate": 4.935549765702597e-05, "loss": 0.8664, "step": 4244 }, { "epoch": 0.07526467160336263, "grad_norm": 2.890625, "learning_rate": 4.93548674424723e-05, "loss": 0.8555, "step": 4246 }, { "epoch": 0.07530012363897419, "grad_norm": 2.84375, "learning_rate": 4.935423692397495e-05, "loss": 0.8634, "step": 4248 }, { "epoch": 0.07533557567458576, "grad_norm": 2.71875, "learning_rate": 4.93536061015418e-05, "loss": 0.835, "step": 4250 }, { "epoch": 0.07537102771019734, "grad_norm": 2.78125, "learning_rate": 4.935297497518071e-05, "loss": 0.8366, "step": 4252 }, { "epoch": 0.0754064797458089, "grad_norm": 3.015625, "learning_rate": 4.935234354489958e-05, "loss": 0.7994, "step": 4254 }, { "epoch": 0.07544193178142047, "grad_norm": 3.03125, "learning_rate": 4.935171181070626e-05, "loss": 0.8496, "step": 4256 }, { "epoch": 0.07547738381703205, "grad_norm": 3.0, "learning_rate": 4.935107977260865e-05, "loss": 0.8336, "step": 4258 }, { "epoch": 0.07551283585264361, "grad_norm": 2.75, "learning_rate": 4.9350447430614647e-05, "loss": 0.8468, "step": 4260 }, { "epoch": 0.07554828788825518, "grad_norm": 3.125, "learning_rate": 4.934981478473213e-05, "loss": 0.8597, "step": 4262 }, { "epoch": 0.07558373992386676, "grad_norm": 2.96875, "learning_rate": 4.934918183496898e-05, "loss": 0.82, "step": 4264 }, { "epoch": 0.07561919195947832, "grad_norm": 2.703125, "learning_rate": 4.934854858133313e-05, "loss": 0.8744, "step": 4266 }, { "epoch": 0.07565464399508989, "grad_norm": 2.765625, "learning_rate": 4.934791502383246e-05, "loss": 0.8545, "step": 4268 }, { "epoch": 0.07569009603070147, "grad_norm": 3.0625, "learning_rate": 4.934728116247488e-05, "loss": 0.8635, "step": 4270 }, { "epoch": 0.07572554806631303, "grad_norm": 2.859375, "learning_rate": 4.93466469972683e-05, "loss": 0.8592, "step": 4272 }, { "epoch": 0.0757610001019246, "grad_norm": 2.96875, "learning_rate": 4.934601252822064e-05, "loss": 0.8474, "step": 4274 }, { "epoch": 0.07579645213753618, "grad_norm": 2.921875, "learning_rate": 4.9345377755339815e-05, "loss": 0.9021, "step": 4276 }, { "epoch": 0.07583190417314774, "grad_norm": 2.9375, "learning_rate": 4.934474267863375e-05, "loss": 0.8184, "step": 4278 }, { "epoch": 0.07586735620875931, "grad_norm": 2.78125, "learning_rate": 4.934410729811036e-05, "loss": 0.8187, "step": 4280 }, { "epoch": 0.07590280824437089, "grad_norm": 2.96875, "learning_rate": 4.9343471613777584e-05, "loss": 0.8448, "step": 4282 }, { "epoch": 0.07593826027998245, "grad_norm": 2.453125, "learning_rate": 4.934283562564335e-05, "loss": 0.8228, "step": 4284 }, { "epoch": 0.07597371231559402, "grad_norm": 2.671875, "learning_rate": 4.93421993337156e-05, "loss": 0.8537, "step": 4286 }, { "epoch": 0.0760091643512056, "grad_norm": 2.625, "learning_rate": 4.934156273800228e-05, "loss": 0.8121, "step": 4288 }, { "epoch": 0.07604461638681716, "grad_norm": 2.625, "learning_rate": 4.934092583851132e-05, "loss": 0.8435, "step": 4290 }, { "epoch": 0.07608006842242873, "grad_norm": 2.78125, "learning_rate": 4.934028863525067e-05, "loss": 0.7778, "step": 4292 }, { "epoch": 0.0761155204580403, "grad_norm": 3.203125, "learning_rate": 4.933965112822829e-05, "loss": 0.8711, "step": 4294 }, { "epoch": 0.07615097249365187, "grad_norm": 3.109375, "learning_rate": 4.9339013317452145e-05, "loss": 0.8524, "step": 4296 }, { "epoch": 0.07618642452926344, "grad_norm": 2.71875, "learning_rate": 4.933837520293017e-05, "loss": 0.842, "step": 4298 }, { "epoch": 0.07622187656487502, "grad_norm": 2.75, "learning_rate": 4.933773678467035e-05, "loss": 0.8656, "step": 4300 }, { "epoch": 0.07625732860048658, "grad_norm": 2.828125, "learning_rate": 4.9337098062680635e-05, "loss": 0.8061, "step": 4302 }, { "epoch": 0.07629278063609815, "grad_norm": 2.921875, "learning_rate": 4.933645903696901e-05, "loss": 0.8413, "step": 4304 }, { "epoch": 0.07632823267170973, "grad_norm": 2.5625, "learning_rate": 4.933581970754345e-05, "loss": 0.8499, "step": 4306 }, { "epoch": 0.07636368470732129, "grad_norm": 2.796875, "learning_rate": 4.9335180074411926e-05, "loss": 0.8621, "step": 4308 }, { "epoch": 0.07639913674293286, "grad_norm": 3.015625, "learning_rate": 4.933454013758242e-05, "loss": 0.8514, "step": 4310 }, { "epoch": 0.07643458877854442, "grad_norm": 2.859375, "learning_rate": 4.933389989706292e-05, "loss": 0.8217, "step": 4312 }, { "epoch": 0.076470040814156, "grad_norm": 2.734375, "learning_rate": 4.933325935286142e-05, "loss": 0.8471, "step": 4314 }, { "epoch": 0.07650549284976757, "grad_norm": 2.890625, "learning_rate": 4.933261850498592e-05, "loss": 0.8507, "step": 4316 }, { "epoch": 0.07654094488537913, "grad_norm": 2.71875, "learning_rate": 4.93319773534444e-05, "loss": 0.873, "step": 4318 }, { "epoch": 0.0765763969209907, "grad_norm": 2.84375, "learning_rate": 4.9331335898244866e-05, "loss": 0.8346, "step": 4320 }, { "epoch": 0.07661184895660228, "grad_norm": 2.9375, "learning_rate": 4.933069413939534e-05, "loss": 0.8501, "step": 4322 }, { "epoch": 0.07664730099221384, "grad_norm": 2.734375, "learning_rate": 4.933005207690381e-05, "loss": 0.8429, "step": 4324 }, { "epoch": 0.07668275302782541, "grad_norm": 2.5625, "learning_rate": 4.93294097107783e-05, "loss": 0.811, "step": 4326 }, { "epoch": 0.07671820506343699, "grad_norm": 2.6875, "learning_rate": 4.9328767041026824e-05, "loss": 0.8589, "step": 4328 }, { "epoch": 0.07675365709904855, "grad_norm": 2.875, "learning_rate": 4.9328124067657406e-05, "loss": 0.8443, "step": 4330 }, { "epoch": 0.07678910913466012, "grad_norm": 2.890625, "learning_rate": 4.932748079067806e-05, "loss": 0.8548, "step": 4332 }, { "epoch": 0.0768245611702717, "grad_norm": 2.515625, "learning_rate": 4.932683721009683e-05, "loss": 0.8519, "step": 4334 }, { "epoch": 0.07686001320588326, "grad_norm": 2.546875, "learning_rate": 4.9326193325921734e-05, "loss": 0.8667, "step": 4336 }, { "epoch": 0.07689546524149483, "grad_norm": 2.9375, "learning_rate": 4.932554913816081e-05, "loss": 0.815, "step": 4338 }, { "epoch": 0.07693091727710641, "grad_norm": 2.9375, "learning_rate": 4.9324904646822104e-05, "loss": 0.8384, "step": 4340 }, { "epoch": 0.07696636931271797, "grad_norm": 2.78125, "learning_rate": 4.932425985191365e-05, "loss": 0.8983, "step": 4342 }, { "epoch": 0.07700182134832954, "grad_norm": 2.5, "learning_rate": 4.9323614753443506e-05, "loss": 0.8064, "step": 4344 }, { "epoch": 0.07703727338394112, "grad_norm": 2.859375, "learning_rate": 4.932296935141971e-05, "loss": 0.8356, "step": 4346 }, { "epoch": 0.07707272541955268, "grad_norm": 2.921875, "learning_rate": 4.932232364585032e-05, "loss": 0.8279, "step": 4348 }, { "epoch": 0.07710817745516425, "grad_norm": 2.8125, "learning_rate": 4.93216776367434e-05, "loss": 0.8228, "step": 4350 }, { "epoch": 0.07714362949077583, "grad_norm": 2.78125, "learning_rate": 4.9321031324107016e-05, "loss": 0.8484, "step": 4352 }, { "epoch": 0.07717908152638739, "grad_norm": 2.734375, "learning_rate": 4.932038470794922e-05, "loss": 0.833, "step": 4354 }, { "epoch": 0.07721453356199896, "grad_norm": 2.953125, "learning_rate": 4.931973778827809e-05, "loss": 0.8317, "step": 4356 }, { "epoch": 0.07724998559761054, "grad_norm": 2.65625, "learning_rate": 4.931909056510169e-05, "loss": 0.8346, "step": 4358 }, { "epoch": 0.0772854376332221, "grad_norm": 2.609375, "learning_rate": 4.931844303842811e-05, "loss": 0.8479, "step": 4360 }, { "epoch": 0.07732088966883367, "grad_norm": 2.703125, "learning_rate": 4.931779520826543e-05, "loss": 0.8641, "step": 4362 }, { "epoch": 0.07735634170444525, "grad_norm": 2.703125, "learning_rate": 4.931714707462173e-05, "loss": 0.8309, "step": 4364 }, { "epoch": 0.07739179374005681, "grad_norm": 2.84375, "learning_rate": 4.93164986375051e-05, "loss": 0.9095, "step": 4366 }, { "epoch": 0.07742724577566838, "grad_norm": 2.765625, "learning_rate": 4.931584989692363e-05, "loss": 0.8253, "step": 4368 }, { "epoch": 0.07746269781127996, "grad_norm": 2.609375, "learning_rate": 4.9315200852885415e-05, "loss": 0.8257, "step": 4370 }, { "epoch": 0.07749814984689152, "grad_norm": 3.1875, "learning_rate": 4.931455150539856e-05, "loss": 0.8824, "step": 4372 }, { "epoch": 0.07753360188250309, "grad_norm": 2.578125, "learning_rate": 4.931390185447117e-05, "loss": 0.845, "step": 4374 }, { "epoch": 0.07756905391811467, "grad_norm": 3.0, "learning_rate": 4.9313251900111346e-05, "loss": 0.8355, "step": 4376 }, { "epoch": 0.07760450595372623, "grad_norm": 2.484375, "learning_rate": 4.93126016423272e-05, "loss": 0.8556, "step": 4378 }, { "epoch": 0.0776399579893378, "grad_norm": 2.53125, "learning_rate": 4.931195108112685e-05, "loss": 0.8141, "step": 4380 }, { "epoch": 0.07767541002494938, "grad_norm": 2.828125, "learning_rate": 4.9311300216518416e-05, "loss": 0.8393, "step": 4382 }, { "epoch": 0.07771086206056094, "grad_norm": 2.703125, "learning_rate": 4.931064904851003e-05, "loss": 0.8447, "step": 4384 }, { "epoch": 0.07774631409617251, "grad_norm": 2.671875, "learning_rate": 4.930999757710979e-05, "loss": 0.8567, "step": 4386 }, { "epoch": 0.07778176613178409, "grad_norm": 2.75, "learning_rate": 4.930934580232585e-05, "loss": 0.8612, "step": 4388 }, { "epoch": 0.07781721816739565, "grad_norm": 3.109375, "learning_rate": 4.930869372416634e-05, "loss": 0.8599, "step": 4390 }, { "epoch": 0.07785267020300722, "grad_norm": 2.734375, "learning_rate": 4.930804134263939e-05, "loss": 0.861, "step": 4392 }, { "epoch": 0.0778881222386188, "grad_norm": 2.546875, "learning_rate": 4.930738865775315e-05, "loss": 0.8451, "step": 4394 }, { "epoch": 0.07792357427423036, "grad_norm": 2.890625, "learning_rate": 4.930673566951577e-05, "loss": 0.8911, "step": 4396 }, { "epoch": 0.07795902630984193, "grad_norm": 2.8125, "learning_rate": 4.9306082377935384e-05, "loss": 0.8285, "step": 4398 }, { "epoch": 0.07799447834545349, "grad_norm": 3.109375, "learning_rate": 4.930542878302015e-05, "loss": 0.8523, "step": 4400 }, { "epoch": 0.07802993038106507, "grad_norm": 3.265625, "learning_rate": 4.9304774884778224e-05, "loss": 0.866, "step": 4402 }, { "epoch": 0.07806538241667664, "grad_norm": 2.953125, "learning_rate": 4.930412068321778e-05, "loss": 0.8531, "step": 4404 }, { "epoch": 0.0781008344522882, "grad_norm": 2.984375, "learning_rate": 4.930346617834697e-05, "loss": 0.8139, "step": 4406 }, { "epoch": 0.07813628648789978, "grad_norm": 2.84375, "learning_rate": 4.9302811370173966e-05, "loss": 0.836, "step": 4408 }, { "epoch": 0.07817173852351135, "grad_norm": 2.96875, "learning_rate": 4.930215625870693e-05, "loss": 0.8246, "step": 4410 }, { "epoch": 0.07820719055912291, "grad_norm": 2.40625, "learning_rate": 4.930150084395405e-05, "loss": 0.7908, "step": 4412 }, { "epoch": 0.07824264259473449, "grad_norm": 2.515625, "learning_rate": 4.93008451259235e-05, "loss": 0.8515, "step": 4414 }, { "epoch": 0.07827809463034606, "grad_norm": 2.640625, "learning_rate": 4.9300189104623466e-05, "loss": 0.8549, "step": 4416 }, { "epoch": 0.07831354666595762, "grad_norm": 2.609375, "learning_rate": 4.929953278006213e-05, "loss": 0.842, "step": 4418 }, { "epoch": 0.0783489987015692, "grad_norm": 2.9375, "learning_rate": 4.929887615224769e-05, "loss": 0.8326, "step": 4420 }, { "epoch": 0.07838445073718077, "grad_norm": 2.96875, "learning_rate": 4.9298219221188336e-05, "loss": 0.844, "step": 4422 }, { "epoch": 0.07841990277279233, "grad_norm": 2.90625, "learning_rate": 4.929756198689227e-05, "loss": 0.8625, "step": 4424 }, { "epoch": 0.0784553548084039, "grad_norm": 2.375, "learning_rate": 4.9296904449367685e-05, "loss": 0.8317, "step": 4426 }, { "epoch": 0.07849080684401548, "grad_norm": 2.609375, "learning_rate": 4.9296246608622795e-05, "loss": 0.795, "step": 4428 }, { "epoch": 0.07852625887962704, "grad_norm": 2.578125, "learning_rate": 4.929558846466581e-05, "loss": 0.8097, "step": 4430 }, { "epoch": 0.07856171091523861, "grad_norm": 3.09375, "learning_rate": 4.929493001750494e-05, "loss": 0.834, "step": 4432 }, { "epoch": 0.07859716295085019, "grad_norm": 2.609375, "learning_rate": 4.9294271267148405e-05, "loss": 0.8514, "step": 4434 }, { "epoch": 0.07863261498646175, "grad_norm": 2.984375, "learning_rate": 4.929361221360442e-05, "loss": 0.8629, "step": 4436 }, { "epoch": 0.07866806702207332, "grad_norm": 2.40625, "learning_rate": 4.929295285688122e-05, "loss": 0.854, "step": 4438 }, { "epoch": 0.0787035190576849, "grad_norm": 2.765625, "learning_rate": 4.929229319698703e-05, "loss": 0.8652, "step": 4440 }, { "epoch": 0.07873897109329646, "grad_norm": 2.703125, "learning_rate": 4.929163323393008e-05, "loss": 0.8475, "step": 4442 }, { "epoch": 0.07877442312890803, "grad_norm": 2.96875, "learning_rate": 4.92909729677186e-05, "loss": 0.8511, "step": 4444 }, { "epoch": 0.07880987516451961, "grad_norm": 2.5, "learning_rate": 4.929031239836084e-05, "loss": 0.7948, "step": 4446 }, { "epoch": 0.07884532720013117, "grad_norm": 3.25, "learning_rate": 4.9289651525865046e-05, "loss": 0.8679, "step": 4448 }, { "epoch": 0.07888077923574274, "grad_norm": 2.796875, "learning_rate": 4.928899035023945e-05, "loss": 0.8649, "step": 4450 }, { "epoch": 0.07891623127135432, "grad_norm": 2.765625, "learning_rate": 4.9288328871492315e-05, "loss": 0.8551, "step": 4452 }, { "epoch": 0.07895168330696588, "grad_norm": 2.71875, "learning_rate": 4.9287667089631904e-05, "loss": 0.7862, "step": 4454 }, { "epoch": 0.07898713534257745, "grad_norm": 2.578125, "learning_rate": 4.9287005004666465e-05, "loss": 0.8378, "step": 4456 }, { "epoch": 0.07902258737818903, "grad_norm": 2.796875, "learning_rate": 4.928634261660425e-05, "loss": 0.8194, "step": 4458 }, { "epoch": 0.07905803941380059, "grad_norm": 2.84375, "learning_rate": 4.9285679925453545e-05, "loss": 0.8504, "step": 4460 }, { "epoch": 0.07909349144941216, "grad_norm": 2.609375, "learning_rate": 4.928501693122262e-05, "loss": 0.877, "step": 4462 }, { "epoch": 0.07912894348502374, "grad_norm": 2.625, "learning_rate": 4.928435363391973e-05, "loss": 0.8103, "step": 4464 }, { "epoch": 0.0791643955206353, "grad_norm": 3.015625, "learning_rate": 4.9283690033553174e-05, "loss": 0.8439, "step": 4466 }, { "epoch": 0.07919984755624687, "grad_norm": 2.6875, "learning_rate": 4.928302613013122e-05, "loss": 0.7667, "step": 4468 }, { "epoch": 0.07923529959185845, "grad_norm": 2.765625, "learning_rate": 4.9282361923662156e-05, "loss": 0.8139, "step": 4470 }, { "epoch": 0.07927075162747, "grad_norm": 2.828125, "learning_rate": 4.928169741415428e-05, "loss": 0.8062, "step": 4472 }, { "epoch": 0.07930620366308158, "grad_norm": 2.890625, "learning_rate": 4.928103260161587e-05, "loss": 0.8375, "step": 4474 }, { "epoch": 0.07934165569869316, "grad_norm": 2.796875, "learning_rate": 4.928036748605523e-05, "loss": 0.874, "step": 4476 }, { "epoch": 0.07937710773430472, "grad_norm": 2.796875, "learning_rate": 4.927970206748067e-05, "loss": 0.8288, "step": 4478 }, { "epoch": 0.07941255976991629, "grad_norm": 2.734375, "learning_rate": 4.927903634590048e-05, "loss": 0.8458, "step": 4480 }, { "epoch": 0.07944801180552787, "grad_norm": 2.671875, "learning_rate": 4.927837032132297e-05, "loss": 0.8423, "step": 4482 }, { "epoch": 0.07948346384113943, "grad_norm": 2.859375, "learning_rate": 4.927770399375646e-05, "loss": 0.8363, "step": 4484 }, { "epoch": 0.079518915876751, "grad_norm": 2.78125, "learning_rate": 4.9277037363209256e-05, "loss": 0.8574, "step": 4486 }, { "epoch": 0.07955436791236256, "grad_norm": 2.65625, "learning_rate": 4.927637042968969e-05, "loss": 0.8135, "step": 4488 }, { "epoch": 0.07958981994797414, "grad_norm": 2.75, "learning_rate": 4.927570319320607e-05, "loss": 0.8335, "step": 4490 }, { "epoch": 0.07962527198358571, "grad_norm": 2.6875, "learning_rate": 4.9275035653766735e-05, "loss": 0.848, "step": 4492 }, { "epoch": 0.07966072401919727, "grad_norm": 2.71875, "learning_rate": 4.927436781138001e-05, "loss": 0.8307, "step": 4494 }, { "epoch": 0.07969617605480885, "grad_norm": 2.96875, "learning_rate": 4.927369966605423e-05, "loss": 0.8645, "step": 4496 }, { "epoch": 0.07973162809042042, "grad_norm": 2.875, "learning_rate": 4.927303121779773e-05, "loss": 0.8851, "step": 4498 }, { "epoch": 0.07976708012603198, "grad_norm": 2.6875, "learning_rate": 4.927236246661886e-05, "loss": 0.8013, "step": 4500 }, { "epoch": 0.07980253216164356, "grad_norm": 2.96875, "learning_rate": 4.927169341252596e-05, "loss": 0.8538, "step": 4502 }, { "epoch": 0.07983798419725513, "grad_norm": 2.609375, "learning_rate": 4.927102405552738e-05, "loss": 0.828, "step": 4504 }, { "epoch": 0.07987343623286669, "grad_norm": 2.9375, "learning_rate": 4.927035439563149e-05, "loss": 0.8303, "step": 4506 }, { "epoch": 0.07990888826847826, "grad_norm": 2.625, "learning_rate": 4.926968443284662e-05, "loss": 0.8525, "step": 4508 }, { "epoch": 0.07994434030408984, "grad_norm": 2.71875, "learning_rate": 4.926901416718114e-05, "loss": 0.8281, "step": 4510 }, { "epoch": 0.0799797923397014, "grad_norm": 2.515625, "learning_rate": 4.926834359864342e-05, "loss": 0.8465, "step": 4512 }, { "epoch": 0.08001524437531297, "grad_norm": 2.484375, "learning_rate": 4.9267672727241834e-05, "loss": 0.8424, "step": 4514 }, { "epoch": 0.08005069641092455, "grad_norm": 3.09375, "learning_rate": 4.926700155298474e-05, "loss": 0.792, "step": 4516 }, { "epoch": 0.08008614844653611, "grad_norm": 2.921875, "learning_rate": 4.926633007588053e-05, "loss": 0.8201, "step": 4518 }, { "epoch": 0.08012160048214768, "grad_norm": 2.75, "learning_rate": 4.926565829593756e-05, "loss": 0.8458, "step": 4520 }, { "epoch": 0.08015705251775926, "grad_norm": 2.484375, "learning_rate": 4.9264986213164235e-05, "loss": 0.8366, "step": 4522 }, { "epoch": 0.08019250455337082, "grad_norm": 2.71875, "learning_rate": 4.926431382756894e-05, "loss": 0.8485, "step": 4524 }, { "epoch": 0.0802279565889824, "grad_norm": 3.21875, "learning_rate": 4.926364113916006e-05, "loss": 0.8815, "step": 4526 }, { "epoch": 0.08026340862459397, "grad_norm": 3.328125, "learning_rate": 4.926296814794599e-05, "loss": 0.844, "step": 4528 }, { "epoch": 0.08029886066020553, "grad_norm": 2.875, "learning_rate": 4.926229485393513e-05, "loss": 0.7989, "step": 4530 }, { "epoch": 0.0803343126958171, "grad_norm": 2.625, "learning_rate": 4.926162125713589e-05, "loss": 0.8728, "step": 4532 }, { "epoch": 0.08036976473142868, "grad_norm": 2.625, "learning_rate": 4.9260947357556666e-05, "loss": 0.8449, "step": 4534 }, { "epoch": 0.08040521676704024, "grad_norm": 2.546875, "learning_rate": 4.926027315520588e-05, "loss": 0.8506, "step": 4536 }, { "epoch": 0.08044066880265181, "grad_norm": 3.078125, "learning_rate": 4.925959865009193e-05, "loss": 0.8575, "step": 4538 }, { "epoch": 0.08047612083826339, "grad_norm": 2.9375, "learning_rate": 4.925892384222324e-05, "loss": 0.8483, "step": 4540 }, { "epoch": 0.08051157287387495, "grad_norm": 2.859375, "learning_rate": 4.9258248731608235e-05, "loss": 0.8541, "step": 4542 }, { "epoch": 0.08054702490948652, "grad_norm": 2.765625, "learning_rate": 4.9257573318255344e-05, "loss": 0.8851, "step": 4544 }, { "epoch": 0.0805824769450981, "grad_norm": 2.859375, "learning_rate": 4.9256897602172986e-05, "loss": 0.8394, "step": 4546 }, { "epoch": 0.08061792898070966, "grad_norm": 2.640625, "learning_rate": 4.92562215833696e-05, "loss": 0.8504, "step": 4548 }, { "epoch": 0.08065338101632123, "grad_norm": 3.125, "learning_rate": 4.925554526185362e-05, "loss": 0.8731, "step": 4550 }, { "epoch": 0.0806888330519328, "grad_norm": 2.65625, "learning_rate": 4.925486863763349e-05, "loss": 0.8335, "step": 4552 }, { "epoch": 0.08072428508754437, "grad_norm": 2.734375, "learning_rate": 4.925419171071765e-05, "loss": 0.8089, "step": 4554 }, { "epoch": 0.08075973712315594, "grad_norm": 2.953125, "learning_rate": 4.9253514481114535e-05, "loss": 0.8675, "step": 4556 }, { "epoch": 0.08079518915876752, "grad_norm": 3.0, "learning_rate": 4.925283694883263e-05, "loss": 0.8822, "step": 4558 }, { "epoch": 0.08083064119437908, "grad_norm": 2.734375, "learning_rate": 4.9252159113880365e-05, "loss": 0.8938, "step": 4560 }, { "epoch": 0.08086609322999065, "grad_norm": 2.921875, "learning_rate": 4.925148097626621e-05, "loss": 0.8671, "step": 4562 }, { "epoch": 0.08090154526560223, "grad_norm": 2.546875, "learning_rate": 4.9250802535998605e-05, "loss": 0.8473, "step": 4564 }, { "epoch": 0.08093699730121379, "grad_norm": 2.65625, "learning_rate": 4.925012379308606e-05, "loss": 0.7768, "step": 4566 }, { "epoch": 0.08097244933682536, "grad_norm": 2.984375, "learning_rate": 4.924944474753701e-05, "loss": 0.8361, "step": 4568 }, { "epoch": 0.08100790137243692, "grad_norm": 3.015625, "learning_rate": 4.9248765399359934e-05, "loss": 0.9217, "step": 4570 }, { "epoch": 0.0810433534080485, "grad_norm": 2.921875, "learning_rate": 4.924808574856332e-05, "loss": 0.8699, "step": 4572 }, { "epoch": 0.08107880544366007, "grad_norm": 2.640625, "learning_rate": 4.9247405795155655e-05, "loss": 0.8332, "step": 4574 }, { "epoch": 0.08111425747927163, "grad_norm": 2.84375, "learning_rate": 4.924672553914541e-05, "loss": 0.8051, "step": 4576 }, { "epoch": 0.0811497095148832, "grad_norm": 2.796875, "learning_rate": 4.9246044980541084e-05, "loss": 0.8353, "step": 4578 }, { "epoch": 0.08118516155049478, "grad_norm": 2.6875, "learning_rate": 4.924536411935116e-05, "loss": 0.8489, "step": 4580 }, { "epoch": 0.08122061358610634, "grad_norm": 2.6875, "learning_rate": 4.924468295558415e-05, "loss": 0.8225, "step": 4582 }, { "epoch": 0.08125606562171792, "grad_norm": 2.640625, "learning_rate": 4.9244001489248535e-05, "loss": 0.8798, "step": 4584 }, { "epoch": 0.08129151765732949, "grad_norm": 2.84375, "learning_rate": 4.924331972035284e-05, "loss": 0.8132, "step": 4586 }, { "epoch": 0.08132696969294105, "grad_norm": 2.890625, "learning_rate": 4.924263764890557e-05, "loss": 0.8357, "step": 4588 }, { "epoch": 0.08136242172855263, "grad_norm": 2.671875, "learning_rate": 4.924195527491522e-05, "loss": 0.8486, "step": 4590 }, { "epoch": 0.0813978737641642, "grad_norm": 2.40625, "learning_rate": 4.924127259839032e-05, "loss": 0.7952, "step": 4592 }, { "epoch": 0.08143332579977576, "grad_norm": 2.9375, "learning_rate": 4.924058961933939e-05, "loss": 0.8301, "step": 4594 }, { "epoch": 0.08146877783538733, "grad_norm": 2.90625, "learning_rate": 4.923990633777095e-05, "loss": 0.8903, "step": 4596 }, { "epoch": 0.08150422987099891, "grad_norm": 2.703125, "learning_rate": 4.923922275369352e-05, "loss": 0.8317, "step": 4598 }, { "epoch": 0.08153968190661047, "grad_norm": 2.796875, "learning_rate": 4.923853886711565e-05, "loss": 0.8319, "step": 4600 }, { "epoch": 0.08157513394222204, "grad_norm": 2.609375, "learning_rate": 4.9237854678045855e-05, "loss": 0.8296, "step": 4602 }, { "epoch": 0.08161058597783362, "grad_norm": 2.625, "learning_rate": 4.923717018649269e-05, "loss": 0.8253, "step": 4604 }, { "epoch": 0.08164603801344518, "grad_norm": 3.171875, "learning_rate": 4.923648539246468e-05, "loss": 0.8293, "step": 4606 }, { "epoch": 0.08168149004905675, "grad_norm": 3.421875, "learning_rate": 4.923580029597039e-05, "loss": 0.8476, "step": 4608 }, { "epoch": 0.08171694208466833, "grad_norm": 2.921875, "learning_rate": 4.923511489701835e-05, "loss": 0.8445, "step": 4610 }, { "epoch": 0.08175239412027989, "grad_norm": 2.75, "learning_rate": 4.923442919561714e-05, "loss": 0.8342, "step": 4612 }, { "epoch": 0.08178784615589146, "grad_norm": 3.140625, "learning_rate": 4.9233743191775286e-05, "loss": 0.8761, "step": 4614 }, { "epoch": 0.08182329819150304, "grad_norm": 3.25, "learning_rate": 4.923305688550137e-05, "loss": 0.8501, "step": 4616 }, { "epoch": 0.0818587502271146, "grad_norm": 2.453125, "learning_rate": 4.9232370276803955e-05, "loss": 0.8057, "step": 4618 }, { "epoch": 0.08189420226272617, "grad_norm": 2.484375, "learning_rate": 4.923168336569159e-05, "loss": 0.8029, "step": 4620 }, { "epoch": 0.08192965429833775, "grad_norm": 3.140625, "learning_rate": 4.923099615217288e-05, "loss": 0.8876, "step": 4622 }, { "epoch": 0.08196510633394931, "grad_norm": 3.234375, "learning_rate": 4.9230308636256385e-05, "loss": 0.8402, "step": 4624 }, { "epoch": 0.08200055836956088, "grad_norm": 2.84375, "learning_rate": 4.922962081795068e-05, "loss": 0.8426, "step": 4626 }, { "epoch": 0.08203601040517246, "grad_norm": 2.8125, "learning_rate": 4.922893269726435e-05, "loss": 0.8373, "step": 4628 }, { "epoch": 0.08207146244078402, "grad_norm": 2.5625, "learning_rate": 4.922824427420599e-05, "loss": 0.8424, "step": 4630 }, { "epoch": 0.08210691447639559, "grad_norm": 2.890625, "learning_rate": 4.9227555548784196e-05, "loss": 0.8827, "step": 4632 }, { "epoch": 0.08214236651200717, "grad_norm": 2.703125, "learning_rate": 4.922686652100754e-05, "loss": 0.8642, "step": 4634 }, { "epoch": 0.08217781854761873, "grad_norm": 3.03125, "learning_rate": 4.9226177190884645e-05, "loss": 0.8704, "step": 4636 }, { "epoch": 0.0822132705832303, "grad_norm": 2.765625, "learning_rate": 4.922548755842411e-05, "loss": 0.8341, "step": 4638 }, { "epoch": 0.08224872261884188, "grad_norm": 2.921875, "learning_rate": 4.922479762363453e-05, "loss": 0.8625, "step": 4640 }, { "epoch": 0.08228417465445344, "grad_norm": 2.75, "learning_rate": 4.922410738652452e-05, "loss": 0.813, "step": 4642 }, { "epoch": 0.08231962669006501, "grad_norm": 3.03125, "learning_rate": 4.92234168471027e-05, "loss": 0.8251, "step": 4644 }, { "epoch": 0.08235507872567659, "grad_norm": 3.359375, "learning_rate": 4.922272600537767e-05, "loss": 0.8559, "step": 4646 }, { "epoch": 0.08239053076128815, "grad_norm": 2.75, "learning_rate": 4.922203486135808e-05, "loss": 0.8383, "step": 4648 }, { "epoch": 0.08242598279689972, "grad_norm": 2.6875, "learning_rate": 4.9221343415052534e-05, "loss": 0.8475, "step": 4650 }, { "epoch": 0.0824614348325113, "grad_norm": 2.890625, "learning_rate": 4.922065166646966e-05, "loss": 0.8242, "step": 4652 }, { "epoch": 0.08249688686812286, "grad_norm": 2.765625, "learning_rate": 4.921995961561811e-05, "loss": 0.8398, "step": 4654 }, { "epoch": 0.08253233890373443, "grad_norm": 2.671875, "learning_rate": 4.921926726250651e-05, "loss": 0.8347, "step": 4656 }, { "epoch": 0.08256779093934599, "grad_norm": 2.875, "learning_rate": 4.9218574607143485e-05, "loss": 0.8158, "step": 4658 }, { "epoch": 0.08260324297495757, "grad_norm": 3.3125, "learning_rate": 4.92178816495377e-05, "loss": 0.8181, "step": 4660 }, { "epoch": 0.08263869501056914, "grad_norm": 2.8125, "learning_rate": 4.9217188389697796e-05, "loss": 0.8476, "step": 4662 }, { "epoch": 0.0826741470461807, "grad_norm": 3.109375, "learning_rate": 4.921649482763243e-05, "loss": 0.8557, "step": 4664 }, { "epoch": 0.08270959908179228, "grad_norm": 2.671875, "learning_rate": 4.921580096335025e-05, "loss": 0.8052, "step": 4666 }, { "epoch": 0.08274505111740385, "grad_norm": 2.578125, "learning_rate": 4.921510679685992e-05, "loss": 0.8763, "step": 4668 }, { "epoch": 0.08278050315301541, "grad_norm": 2.703125, "learning_rate": 4.921441232817009e-05, "loss": 0.8867, "step": 4670 }, { "epoch": 0.08281595518862699, "grad_norm": 2.765625, "learning_rate": 4.921371755728945e-05, "loss": 0.8104, "step": 4672 }, { "epoch": 0.08285140722423856, "grad_norm": 2.71875, "learning_rate": 4.921302248422665e-05, "loss": 0.8524, "step": 4674 }, { "epoch": 0.08288685925985012, "grad_norm": 2.84375, "learning_rate": 4.9212327108990376e-05, "loss": 0.837, "step": 4676 }, { "epoch": 0.0829223112954617, "grad_norm": 2.828125, "learning_rate": 4.92116314315893e-05, "loss": 0.8133, "step": 4678 }, { "epoch": 0.08295776333107327, "grad_norm": 2.734375, "learning_rate": 4.921093545203211e-05, "loss": 0.8389, "step": 4680 }, { "epoch": 0.08299321536668483, "grad_norm": 2.765625, "learning_rate": 4.921023917032749e-05, "loss": 0.8538, "step": 4682 }, { "epoch": 0.0830286674022964, "grad_norm": 2.484375, "learning_rate": 4.920954258648413e-05, "loss": 0.8015, "step": 4684 }, { "epoch": 0.08306411943790798, "grad_norm": 2.796875, "learning_rate": 4.9208845700510707e-05, "loss": 0.8108, "step": 4686 }, { "epoch": 0.08309957147351954, "grad_norm": 2.640625, "learning_rate": 4.920814851241595e-05, "loss": 0.8173, "step": 4688 }, { "epoch": 0.08313502350913111, "grad_norm": 2.703125, "learning_rate": 4.9207451022208525e-05, "loss": 0.8193, "step": 4690 }, { "epoch": 0.08317047554474269, "grad_norm": 2.71875, "learning_rate": 4.9206753229897165e-05, "loss": 0.8298, "step": 4692 }, { "epoch": 0.08320592758035425, "grad_norm": 2.765625, "learning_rate": 4.9206055135490563e-05, "loss": 0.8196, "step": 4694 }, { "epoch": 0.08324137961596582, "grad_norm": 2.671875, "learning_rate": 4.920535673899743e-05, "loss": 0.8335, "step": 4696 }, { "epoch": 0.0832768316515774, "grad_norm": 2.796875, "learning_rate": 4.9204658040426496e-05, "loss": 0.8865, "step": 4698 }, { "epoch": 0.08331228368718896, "grad_norm": 2.484375, "learning_rate": 4.9203959039786465e-05, "loss": 0.82, "step": 4700 }, { "epoch": 0.08334773572280053, "grad_norm": 2.875, "learning_rate": 4.920325973708607e-05, "loss": 0.8465, "step": 4702 }, { "epoch": 0.08338318775841211, "grad_norm": 2.5, "learning_rate": 4.920256013233403e-05, "loss": 0.8028, "step": 4704 }, { "epoch": 0.08341863979402367, "grad_norm": 2.75, "learning_rate": 4.920186022553909e-05, "loss": 0.838, "step": 4706 }, { "epoch": 0.08345409182963524, "grad_norm": 2.75, "learning_rate": 4.9201160016709964e-05, "loss": 0.8395, "step": 4708 }, { "epoch": 0.08348954386524682, "grad_norm": 2.984375, "learning_rate": 4.920045950585541e-05, "loss": 0.8705, "step": 4710 }, { "epoch": 0.08352499590085838, "grad_norm": 2.71875, "learning_rate": 4.919975869298416e-05, "loss": 0.8168, "step": 4712 }, { "epoch": 0.08356044793646995, "grad_norm": 2.8125, "learning_rate": 4.919905757810496e-05, "loss": 0.8021, "step": 4714 }, { "epoch": 0.08359589997208153, "grad_norm": 3.125, "learning_rate": 4.9198356161226555e-05, "loss": 0.8569, "step": 4716 }, { "epoch": 0.08363135200769309, "grad_norm": 2.53125, "learning_rate": 4.919765444235771e-05, "loss": 0.8266, "step": 4718 }, { "epoch": 0.08366680404330466, "grad_norm": 2.75, "learning_rate": 4.919695242150718e-05, "loss": 0.8239, "step": 4720 }, { "epoch": 0.08370225607891624, "grad_norm": 2.859375, "learning_rate": 4.919625009868373e-05, "loss": 0.8637, "step": 4722 }, { "epoch": 0.0837377081145278, "grad_norm": 2.9375, "learning_rate": 4.919554747389611e-05, "loss": 0.8091, "step": 4724 }, { "epoch": 0.08377316015013937, "grad_norm": 2.921875, "learning_rate": 4.9194844547153095e-05, "loss": 0.8482, "step": 4726 }, { "epoch": 0.08380861218575095, "grad_norm": 2.921875, "learning_rate": 4.919414131846346e-05, "loss": 0.8627, "step": 4728 }, { "epoch": 0.08384406422136251, "grad_norm": 2.75, "learning_rate": 4.9193437787835987e-05, "loss": 0.8841, "step": 4730 }, { "epoch": 0.08387951625697408, "grad_norm": 3.015625, "learning_rate": 4.9192733955279446e-05, "loss": 0.8262, "step": 4732 }, { "epoch": 0.08391496829258566, "grad_norm": 2.921875, "learning_rate": 4.919202982080262e-05, "loss": 0.8459, "step": 4734 }, { "epoch": 0.08395042032819722, "grad_norm": 2.75, "learning_rate": 4.919132538441431e-05, "loss": 0.8286, "step": 4736 }, { "epoch": 0.08398587236380879, "grad_norm": 2.4375, "learning_rate": 4.919062064612329e-05, "loss": 0.834, "step": 4738 }, { "epoch": 0.08402132439942035, "grad_norm": 2.78125, "learning_rate": 4.918991560593837e-05, "loss": 0.8834, "step": 4740 }, { "epoch": 0.08405677643503193, "grad_norm": 2.625, "learning_rate": 4.9189210263868335e-05, "loss": 0.7786, "step": 4742 }, { "epoch": 0.0840922284706435, "grad_norm": 3.015625, "learning_rate": 4.9188504619922e-05, "loss": 0.8179, "step": 4744 }, { "epoch": 0.08412768050625506, "grad_norm": 3.0625, "learning_rate": 4.918779867410817e-05, "loss": 0.8539, "step": 4746 }, { "epoch": 0.08416313254186664, "grad_norm": 2.578125, "learning_rate": 4.9187092426435634e-05, "loss": 0.8234, "step": 4748 }, { "epoch": 0.08419858457747821, "grad_norm": 2.671875, "learning_rate": 4.918638587691323e-05, "loss": 0.841, "step": 4750 }, { "epoch": 0.08423403661308977, "grad_norm": 3.4375, "learning_rate": 4.918567902554977e-05, "loss": 0.8399, "step": 4752 }, { "epoch": 0.08426948864870135, "grad_norm": 2.65625, "learning_rate": 4.918497187235407e-05, "loss": 0.8286, "step": 4754 }, { "epoch": 0.08430494068431292, "grad_norm": 2.859375, "learning_rate": 4.918426441733496e-05, "loss": 0.8949, "step": 4756 }, { "epoch": 0.08434039271992448, "grad_norm": 2.9375, "learning_rate": 4.918355666050127e-05, "loss": 0.8858, "step": 4758 }, { "epoch": 0.08437584475553606, "grad_norm": 2.828125, "learning_rate": 4.918284860186183e-05, "loss": 0.8442, "step": 4760 }, { "epoch": 0.08441129679114763, "grad_norm": 2.796875, "learning_rate": 4.918214024142547e-05, "loss": 0.859, "step": 4762 }, { "epoch": 0.08444674882675919, "grad_norm": 2.875, "learning_rate": 4.918143157920104e-05, "loss": 0.8918, "step": 4764 }, { "epoch": 0.08448220086237077, "grad_norm": 2.765625, "learning_rate": 4.918072261519738e-05, "loss": 0.8376, "step": 4766 }, { "epoch": 0.08451765289798234, "grad_norm": 2.890625, "learning_rate": 4.9180013349423346e-05, "loss": 0.8575, "step": 4768 }, { "epoch": 0.0845531049335939, "grad_norm": 2.71875, "learning_rate": 4.917930378188778e-05, "loss": 0.8454, "step": 4770 }, { "epoch": 0.08458855696920548, "grad_norm": 2.859375, "learning_rate": 4.917859391259952e-05, "loss": 0.8589, "step": 4772 }, { "epoch": 0.08462400900481705, "grad_norm": 2.578125, "learning_rate": 4.917788374156747e-05, "loss": 0.8413, "step": 4774 }, { "epoch": 0.08465946104042861, "grad_norm": 2.546875, "learning_rate": 4.917717326880045e-05, "loss": 0.8128, "step": 4776 }, { "epoch": 0.08469491307604018, "grad_norm": 3.265625, "learning_rate": 4.917646249430735e-05, "loss": 0.8709, "step": 4778 }, { "epoch": 0.08473036511165176, "grad_norm": 2.46875, "learning_rate": 4.917575141809703e-05, "loss": 0.8053, "step": 4780 }, { "epoch": 0.08476581714726332, "grad_norm": 2.578125, "learning_rate": 4.917504004017837e-05, "loss": 0.8852, "step": 4782 }, { "epoch": 0.0848012691828749, "grad_norm": 2.734375, "learning_rate": 4.917432836056025e-05, "loss": 0.8599, "step": 4784 }, { "epoch": 0.08483672121848647, "grad_norm": 2.6875, "learning_rate": 4.917361637925154e-05, "loss": 0.8266, "step": 4786 }, { "epoch": 0.08487217325409803, "grad_norm": 2.765625, "learning_rate": 4.9172904096261136e-05, "loss": 0.8163, "step": 4788 }, { "epoch": 0.0849076252897096, "grad_norm": 3.0625, "learning_rate": 4.917219151159792e-05, "loss": 0.856, "step": 4790 }, { "epoch": 0.08494307732532118, "grad_norm": 2.953125, "learning_rate": 4.917147862527079e-05, "loss": 0.8377, "step": 4792 }, { "epoch": 0.08497852936093274, "grad_norm": 2.71875, "learning_rate": 4.9170765437288644e-05, "loss": 0.8261, "step": 4794 }, { "epoch": 0.08501398139654431, "grad_norm": 2.9375, "learning_rate": 4.917005194766038e-05, "loss": 0.8279, "step": 4796 }, { "epoch": 0.08504943343215589, "grad_norm": 3.109375, "learning_rate": 4.9169338156394904e-05, "loss": 0.8655, "step": 4798 }, { "epoch": 0.08508488546776745, "grad_norm": 2.90625, "learning_rate": 4.916862406350112e-05, "loss": 0.8507, "step": 4800 }, { "epoch": 0.08512033750337902, "grad_norm": 2.671875, "learning_rate": 4.9167909668987935e-05, "loss": 0.8584, "step": 4802 }, { "epoch": 0.0851557895389906, "grad_norm": 2.6875, "learning_rate": 4.9167194972864275e-05, "loss": 0.8287, "step": 4804 }, { "epoch": 0.08519124157460216, "grad_norm": 2.828125, "learning_rate": 4.916647997513906e-05, "loss": 0.8062, "step": 4806 }, { "epoch": 0.08522669361021373, "grad_norm": 3.03125, "learning_rate": 4.91657646758212e-05, "loss": 0.8694, "step": 4808 }, { "epoch": 0.08526214564582531, "grad_norm": 3.109375, "learning_rate": 4.9165049074919646e-05, "loss": 0.8329, "step": 4810 }, { "epoch": 0.08529759768143687, "grad_norm": 2.546875, "learning_rate": 4.91643331724433e-05, "loss": 0.8391, "step": 4812 }, { "epoch": 0.08533304971704844, "grad_norm": 2.625, "learning_rate": 4.91636169684011e-05, "loss": 0.8347, "step": 4814 }, { "epoch": 0.08536850175266002, "grad_norm": 2.78125, "learning_rate": 4.916290046280201e-05, "loss": 0.8175, "step": 4816 }, { "epoch": 0.08540395378827158, "grad_norm": 2.84375, "learning_rate": 4.916218365565495e-05, "loss": 0.8785, "step": 4818 }, { "epoch": 0.08543940582388315, "grad_norm": 2.703125, "learning_rate": 4.916146654696887e-05, "loss": 0.8315, "step": 4820 }, { "epoch": 0.08547485785949473, "grad_norm": 2.5625, "learning_rate": 4.916074913675272e-05, "loss": 0.8088, "step": 4822 }, { "epoch": 0.08551030989510629, "grad_norm": 2.4375, "learning_rate": 4.916003142501545e-05, "loss": 0.818, "step": 4824 }, { "epoch": 0.08554576193071786, "grad_norm": 2.703125, "learning_rate": 4.915931341176603e-05, "loss": 0.8604, "step": 4826 }, { "epoch": 0.08558121396632942, "grad_norm": 2.921875, "learning_rate": 4.915859509701341e-05, "loss": 0.8509, "step": 4828 }, { "epoch": 0.085616666001941, "grad_norm": 2.796875, "learning_rate": 4.915787648076654e-05, "loss": 0.8354, "step": 4830 }, { "epoch": 0.08565211803755257, "grad_norm": 2.828125, "learning_rate": 4.9157157563034414e-05, "loss": 0.858, "step": 4832 }, { "epoch": 0.08568757007316413, "grad_norm": 2.875, "learning_rate": 4.915643834382599e-05, "loss": 0.8217, "step": 4834 }, { "epoch": 0.0857230221087757, "grad_norm": 2.796875, "learning_rate": 4.915571882315024e-05, "loss": 0.839, "step": 4836 }, { "epoch": 0.08575847414438728, "grad_norm": 2.765625, "learning_rate": 4.9154999001016165e-05, "loss": 0.8276, "step": 4838 }, { "epoch": 0.08579392617999884, "grad_norm": 2.65625, "learning_rate": 4.915427887743273e-05, "loss": 0.8278, "step": 4840 }, { "epoch": 0.08582937821561042, "grad_norm": 2.734375, "learning_rate": 4.915355845240892e-05, "loss": 0.8404, "step": 4842 }, { "epoch": 0.08586483025122199, "grad_norm": 2.765625, "learning_rate": 4.915283772595373e-05, "loss": 0.8471, "step": 4844 }, { "epoch": 0.08590028228683355, "grad_norm": 2.734375, "learning_rate": 4.915211669807616e-05, "loss": 0.8204, "step": 4846 }, { "epoch": 0.08593573432244513, "grad_norm": 2.796875, "learning_rate": 4.915139536878521e-05, "loss": 0.8382, "step": 4848 }, { "epoch": 0.0859711863580567, "grad_norm": 2.6875, "learning_rate": 4.915067373808987e-05, "loss": 0.8489, "step": 4850 }, { "epoch": 0.08600663839366826, "grad_norm": 2.765625, "learning_rate": 4.914995180599915e-05, "loss": 0.8831, "step": 4852 }, { "epoch": 0.08604209042927984, "grad_norm": 2.890625, "learning_rate": 4.914922957252206e-05, "loss": 0.8454, "step": 4854 }, { "epoch": 0.08607754246489141, "grad_norm": 2.78125, "learning_rate": 4.914850703766762e-05, "loss": 0.8631, "step": 4856 }, { "epoch": 0.08611299450050297, "grad_norm": 2.84375, "learning_rate": 4.914778420144484e-05, "loss": 0.8594, "step": 4858 }, { "epoch": 0.08614844653611455, "grad_norm": 2.859375, "learning_rate": 4.914706106386274e-05, "loss": 0.8227, "step": 4860 }, { "epoch": 0.08618389857172612, "grad_norm": 2.65625, "learning_rate": 4.914633762493035e-05, "loss": 0.8217, "step": 4862 }, { "epoch": 0.08621935060733768, "grad_norm": 2.703125, "learning_rate": 4.91456138846567e-05, "loss": 0.8461, "step": 4864 }, { "epoch": 0.08625480264294925, "grad_norm": 2.65625, "learning_rate": 4.914488984305081e-05, "loss": 0.8445, "step": 4866 }, { "epoch": 0.08629025467856083, "grad_norm": 2.671875, "learning_rate": 4.914416550012173e-05, "loss": 0.8095, "step": 4868 }, { "epoch": 0.08632570671417239, "grad_norm": 2.78125, "learning_rate": 4.9143440855878496e-05, "loss": 0.8082, "step": 4870 }, { "epoch": 0.08636115874978396, "grad_norm": 2.625, "learning_rate": 4.914271591033014e-05, "loss": 0.8297, "step": 4872 }, { "epoch": 0.08639661078539554, "grad_norm": 2.625, "learning_rate": 4.914199066348573e-05, "loss": 0.8541, "step": 4874 }, { "epoch": 0.0864320628210071, "grad_norm": 2.828125, "learning_rate": 4.9141265115354294e-05, "loss": 0.804, "step": 4876 }, { "epoch": 0.08646751485661867, "grad_norm": 2.734375, "learning_rate": 4.9140539265944894e-05, "loss": 0.8892, "step": 4878 }, { "epoch": 0.08650296689223025, "grad_norm": 2.75, "learning_rate": 4.9139813115266604e-05, "loss": 0.8561, "step": 4880 }, { "epoch": 0.08653841892784181, "grad_norm": 2.671875, "learning_rate": 4.913908666332847e-05, "loss": 0.8416, "step": 4882 }, { "epoch": 0.08657387096345338, "grad_norm": 2.921875, "learning_rate": 4.9138359910139556e-05, "loss": 0.8247, "step": 4884 }, { "epoch": 0.08660932299906496, "grad_norm": 2.65625, "learning_rate": 4.9137632855708946e-05, "loss": 0.8197, "step": 4886 }, { "epoch": 0.08664477503467652, "grad_norm": 2.890625, "learning_rate": 4.9136905500045705e-05, "loss": 0.8617, "step": 4888 }, { "epoch": 0.0866802270702881, "grad_norm": 2.734375, "learning_rate": 4.9136177843158906e-05, "loss": 0.8368, "step": 4890 }, { "epoch": 0.08671567910589967, "grad_norm": 2.8125, "learning_rate": 4.9135449885057636e-05, "loss": 0.8302, "step": 4892 }, { "epoch": 0.08675113114151123, "grad_norm": 2.53125, "learning_rate": 4.913472162575098e-05, "loss": 0.8489, "step": 4894 }, { "epoch": 0.0867865831771228, "grad_norm": 2.875, "learning_rate": 4.913399306524803e-05, "loss": 0.8218, "step": 4896 }, { "epoch": 0.08682203521273438, "grad_norm": 2.859375, "learning_rate": 4.913326420355787e-05, "loss": 0.8156, "step": 4898 }, { "epoch": 0.08685748724834594, "grad_norm": 2.46875, "learning_rate": 4.913253504068959e-05, "loss": 0.8496, "step": 4900 }, { "epoch": 0.08689293928395751, "grad_norm": 2.953125, "learning_rate": 4.9131805576652315e-05, "loss": 0.8293, "step": 4902 }, { "epoch": 0.08692839131956909, "grad_norm": 2.75, "learning_rate": 4.913107581145512e-05, "loss": 0.8324, "step": 4904 }, { "epoch": 0.08696384335518065, "grad_norm": 2.6875, "learning_rate": 4.913034574510713e-05, "loss": 0.8057, "step": 4906 }, { "epoch": 0.08699929539079222, "grad_norm": 2.859375, "learning_rate": 4.9129615377617445e-05, "loss": 0.8315, "step": 4908 }, { "epoch": 0.08703474742640378, "grad_norm": 3.0625, "learning_rate": 4.912888470899519e-05, "loss": 0.8528, "step": 4910 }, { "epoch": 0.08707019946201536, "grad_norm": 3.0625, "learning_rate": 4.912815373924948e-05, "loss": 0.8459, "step": 4912 }, { "epoch": 0.08710565149762693, "grad_norm": 3.484375, "learning_rate": 4.912742246838944e-05, "loss": 0.8431, "step": 4914 }, { "epoch": 0.08714110353323849, "grad_norm": 2.703125, "learning_rate": 4.9126690896424195e-05, "loss": 0.8502, "step": 4916 }, { "epoch": 0.08717655556885007, "grad_norm": 2.875, "learning_rate": 4.912595902336287e-05, "loss": 0.8689, "step": 4918 }, { "epoch": 0.08721200760446164, "grad_norm": 2.890625, "learning_rate": 4.912522684921459e-05, "loss": 0.8495, "step": 4920 }, { "epoch": 0.0872474596400732, "grad_norm": 2.875, "learning_rate": 4.9124494373988516e-05, "loss": 0.8241, "step": 4922 }, { "epoch": 0.08728291167568478, "grad_norm": 2.84375, "learning_rate": 4.912376159769378e-05, "loss": 0.8331, "step": 4924 }, { "epoch": 0.08731836371129635, "grad_norm": 2.625, "learning_rate": 4.912302852033952e-05, "loss": 0.8012, "step": 4926 }, { "epoch": 0.08735381574690791, "grad_norm": 2.5625, "learning_rate": 4.912229514193488e-05, "loss": 0.8198, "step": 4928 }, { "epoch": 0.08738926778251949, "grad_norm": 2.90625, "learning_rate": 4.912156146248903e-05, "loss": 0.812, "step": 4930 }, { "epoch": 0.08742471981813106, "grad_norm": 2.875, "learning_rate": 4.912082748201112e-05, "loss": 0.8546, "step": 4932 }, { "epoch": 0.08746017185374262, "grad_norm": 2.921875, "learning_rate": 4.9120093200510295e-05, "loss": 0.8525, "step": 4934 }, { "epoch": 0.0874956238893542, "grad_norm": 2.53125, "learning_rate": 4.911935861799574e-05, "loss": 0.8227, "step": 4936 }, { "epoch": 0.08753107592496577, "grad_norm": 3.171875, "learning_rate": 4.911862373447661e-05, "loss": 0.7985, "step": 4938 }, { "epoch": 0.08756652796057733, "grad_norm": 2.65625, "learning_rate": 4.911788854996209e-05, "loss": 0.8347, "step": 4940 }, { "epoch": 0.0876019799961889, "grad_norm": 2.84375, "learning_rate": 4.911715306446133e-05, "loss": 0.8388, "step": 4942 }, { "epoch": 0.08763743203180048, "grad_norm": 3.0, "learning_rate": 4.911641727798353e-05, "loss": 0.8764, "step": 4944 }, { "epoch": 0.08767288406741204, "grad_norm": 2.96875, "learning_rate": 4.911568119053786e-05, "loss": 0.8823, "step": 4946 }, { "epoch": 0.08770833610302362, "grad_norm": 3.125, "learning_rate": 4.911494480213351e-05, "loss": 0.8387, "step": 4948 }, { "epoch": 0.08774378813863519, "grad_norm": 2.671875, "learning_rate": 4.911420811277968e-05, "loss": 0.7991, "step": 4950 }, { "epoch": 0.08777924017424675, "grad_norm": 2.65625, "learning_rate": 4.9113471122485556e-05, "loss": 0.805, "step": 4952 }, { "epoch": 0.08781469220985832, "grad_norm": 2.671875, "learning_rate": 4.9112733831260325e-05, "loss": 0.8836, "step": 4954 }, { "epoch": 0.0878501442454699, "grad_norm": 2.84375, "learning_rate": 4.911199623911321e-05, "loss": 0.8398, "step": 4956 }, { "epoch": 0.08788559628108146, "grad_norm": 2.953125, "learning_rate": 4.911125834605339e-05, "loss": 0.837, "step": 4958 }, { "epoch": 0.08792104831669303, "grad_norm": 2.9375, "learning_rate": 4.9110520152090104e-05, "loss": 0.7892, "step": 4960 }, { "epoch": 0.08795650035230461, "grad_norm": 3.046875, "learning_rate": 4.910978165723253e-05, "loss": 0.8841, "step": 4962 }, { "epoch": 0.08799195238791617, "grad_norm": 3.546875, "learning_rate": 4.910904286148992e-05, "loss": 0.8603, "step": 4964 }, { "epoch": 0.08802740442352774, "grad_norm": 2.796875, "learning_rate": 4.910830376487146e-05, "loss": 0.8726, "step": 4966 }, { "epoch": 0.08806285645913932, "grad_norm": 2.59375, "learning_rate": 4.9107564367386404e-05, "loss": 0.8372, "step": 4968 }, { "epoch": 0.08809830849475088, "grad_norm": 2.5625, "learning_rate": 4.9106824669043964e-05, "loss": 0.8159, "step": 4970 }, { "epoch": 0.08813376053036245, "grad_norm": 2.9375, "learning_rate": 4.910608466985337e-05, "loss": 0.8235, "step": 4972 }, { "epoch": 0.08816921256597403, "grad_norm": 2.765625, "learning_rate": 4.910534436982386e-05, "loss": 0.8469, "step": 4974 }, { "epoch": 0.08820466460158559, "grad_norm": 2.71875, "learning_rate": 4.910460376896468e-05, "loss": 0.8395, "step": 4976 }, { "epoch": 0.08824011663719716, "grad_norm": 3.3125, "learning_rate": 4.9103862867285056e-05, "loss": 0.8427, "step": 4978 }, { "epoch": 0.08827556867280874, "grad_norm": 2.53125, "learning_rate": 4.910312166479425e-05, "loss": 0.8158, "step": 4980 }, { "epoch": 0.0883110207084203, "grad_norm": 2.921875, "learning_rate": 4.910238016150151e-05, "loss": 0.8614, "step": 4982 }, { "epoch": 0.08834647274403187, "grad_norm": 2.703125, "learning_rate": 4.910163835741607e-05, "loss": 0.8538, "step": 4984 }, { "epoch": 0.08838192477964345, "grad_norm": 2.703125, "learning_rate": 4.9100896252547215e-05, "loss": 0.8273, "step": 4986 }, { "epoch": 0.08841737681525501, "grad_norm": 2.625, "learning_rate": 4.91001538469042e-05, "loss": 0.792, "step": 4988 }, { "epoch": 0.08845282885086658, "grad_norm": 2.578125, "learning_rate": 4.909941114049627e-05, "loss": 0.8621, "step": 4990 }, { "epoch": 0.08848828088647816, "grad_norm": 2.6875, "learning_rate": 4.9098668133332714e-05, "loss": 0.8404, "step": 4992 }, { "epoch": 0.08852373292208972, "grad_norm": 2.71875, "learning_rate": 4.90979248254228e-05, "loss": 0.8261, "step": 4994 }, { "epoch": 0.08855918495770129, "grad_norm": 2.75, "learning_rate": 4.9097181216775805e-05, "loss": 0.8467, "step": 4996 }, { "epoch": 0.08859463699331285, "grad_norm": 2.703125, "learning_rate": 4.9096437307401004e-05, "loss": 0.8357, "step": 4998 }, { "epoch": 0.08863008902892443, "grad_norm": 2.625, "learning_rate": 4.909569309730769e-05, "loss": 0.8499, "step": 5000 }, { "epoch": 0.088665541064536, "grad_norm": 3.09375, "learning_rate": 4.909494858650514e-05, "loss": 0.8532, "step": 5002 }, { "epoch": 0.08870099310014756, "grad_norm": 3.0625, "learning_rate": 4.909420377500266e-05, "loss": 0.8132, "step": 5004 }, { "epoch": 0.08873644513575914, "grad_norm": 2.96875, "learning_rate": 4.909345866280952e-05, "loss": 0.8429, "step": 5006 }, { "epoch": 0.08877189717137071, "grad_norm": 2.796875, "learning_rate": 4.909271324993504e-05, "loss": 0.8266, "step": 5008 }, { "epoch": 0.08880734920698227, "grad_norm": 2.65625, "learning_rate": 4.909196753638852e-05, "loss": 0.8255, "step": 5010 }, { "epoch": 0.08884280124259385, "grad_norm": 2.953125, "learning_rate": 4.9091221522179264e-05, "loss": 0.8205, "step": 5012 }, { "epoch": 0.08887825327820542, "grad_norm": 2.546875, "learning_rate": 4.909047520731658e-05, "loss": 0.8872, "step": 5014 }, { "epoch": 0.08891370531381698, "grad_norm": 2.625, "learning_rate": 4.908972859180978e-05, "loss": 0.8599, "step": 5016 }, { "epoch": 0.08894915734942856, "grad_norm": 2.703125, "learning_rate": 4.9088981675668185e-05, "loss": 0.8741, "step": 5018 }, { "epoch": 0.08898460938504013, "grad_norm": 2.9375, "learning_rate": 4.9088234458901114e-05, "loss": 0.839, "step": 5020 }, { "epoch": 0.08902006142065169, "grad_norm": 2.84375, "learning_rate": 4.90874869415179e-05, "loss": 0.8378, "step": 5022 }, { "epoch": 0.08905551345626327, "grad_norm": 2.9375, "learning_rate": 4.908673912352786e-05, "loss": 0.8216, "step": 5024 }, { "epoch": 0.08909096549187484, "grad_norm": 2.796875, "learning_rate": 4.908599100494034e-05, "loss": 0.8167, "step": 5026 }, { "epoch": 0.0891264175274864, "grad_norm": 2.59375, "learning_rate": 4.908524258576467e-05, "loss": 0.8109, "step": 5028 }, { "epoch": 0.08916186956309798, "grad_norm": 2.453125, "learning_rate": 4.908449386601019e-05, "loss": 0.8242, "step": 5030 }, { "epoch": 0.08919732159870955, "grad_norm": 2.859375, "learning_rate": 4.908374484568623e-05, "loss": 0.8618, "step": 5032 }, { "epoch": 0.08923277363432111, "grad_norm": 3.046875, "learning_rate": 4.9082995524802157e-05, "loss": 0.8933, "step": 5034 }, { "epoch": 0.08926822566993269, "grad_norm": 2.828125, "learning_rate": 4.908224590336732e-05, "loss": 0.9062, "step": 5036 }, { "epoch": 0.08930367770554426, "grad_norm": 2.703125, "learning_rate": 4.908149598139107e-05, "loss": 0.81, "step": 5038 }, { "epoch": 0.08933912974115582, "grad_norm": 2.84375, "learning_rate": 4.908074575888276e-05, "loss": 0.8303, "step": 5040 }, { "epoch": 0.0893745817767674, "grad_norm": 2.5, "learning_rate": 4.907999523585176e-05, "loss": 0.836, "step": 5042 }, { "epoch": 0.08941003381237897, "grad_norm": 2.59375, "learning_rate": 4.907924441230743e-05, "loss": 0.7865, "step": 5044 }, { "epoch": 0.08944548584799053, "grad_norm": 2.859375, "learning_rate": 4.907849328825916e-05, "loss": 0.8685, "step": 5046 }, { "epoch": 0.0894809378836021, "grad_norm": 2.671875, "learning_rate": 4.9077741863716296e-05, "loss": 0.8481, "step": 5048 }, { "epoch": 0.08951638991921368, "grad_norm": 2.640625, "learning_rate": 4.907699013868824e-05, "loss": 0.8459, "step": 5050 }, { "epoch": 0.08955184195482524, "grad_norm": 2.59375, "learning_rate": 4.9076238113184344e-05, "loss": 0.8189, "step": 5052 }, { "epoch": 0.08958729399043681, "grad_norm": 2.71875, "learning_rate": 4.9075485787214024e-05, "loss": 0.818, "step": 5054 }, { "epoch": 0.08962274602604839, "grad_norm": 3.15625, "learning_rate": 4.907473316078666e-05, "loss": 0.8193, "step": 5056 }, { "epoch": 0.08965819806165995, "grad_norm": 3.0, "learning_rate": 4.9073980233911635e-05, "loss": 0.8221, "step": 5058 }, { "epoch": 0.08969365009727152, "grad_norm": 2.75, "learning_rate": 4.907322700659835e-05, "loss": 0.8591, "step": 5060 }, { "epoch": 0.0897291021328831, "grad_norm": 2.609375, "learning_rate": 4.907247347885621e-05, "loss": 0.8252, "step": 5062 }, { "epoch": 0.08976455416849466, "grad_norm": 2.65625, "learning_rate": 4.907171965069461e-05, "loss": 0.7966, "step": 5064 }, { "epoch": 0.08980000620410623, "grad_norm": 2.78125, "learning_rate": 4.907096552212296e-05, "loss": 0.82, "step": 5066 }, { "epoch": 0.08983545823971781, "grad_norm": 3.0, "learning_rate": 4.907021109315068e-05, "loss": 0.8558, "step": 5068 }, { "epoch": 0.08987091027532937, "grad_norm": 2.796875, "learning_rate": 4.906945636378718e-05, "loss": 0.8481, "step": 5070 }, { "epoch": 0.08990636231094094, "grad_norm": 2.875, "learning_rate": 4.906870133404187e-05, "loss": 0.8009, "step": 5072 }, { "epoch": 0.08994181434655252, "grad_norm": 2.796875, "learning_rate": 4.906794600392419e-05, "loss": 0.834, "step": 5074 }, { "epoch": 0.08997726638216408, "grad_norm": 2.65625, "learning_rate": 4.906719037344355e-05, "loss": 0.8621, "step": 5076 }, { "epoch": 0.09001271841777565, "grad_norm": 3.0, "learning_rate": 4.906643444260939e-05, "loss": 0.8749, "step": 5078 }, { "epoch": 0.09004817045338723, "grad_norm": 2.65625, "learning_rate": 4.906567821143114e-05, "loss": 0.7919, "step": 5080 }, { "epoch": 0.09008362248899879, "grad_norm": 2.890625, "learning_rate": 4.906492167991824e-05, "loss": 0.8311, "step": 5082 }, { "epoch": 0.09011907452461036, "grad_norm": 3.046875, "learning_rate": 4.906416484808013e-05, "loss": 0.8423, "step": 5084 }, { "epoch": 0.09015452656022192, "grad_norm": 3.15625, "learning_rate": 4.9063407715926255e-05, "loss": 0.8415, "step": 5086 }, { "epoch": 0.0901899785958335, "grad_norm": 2.625, "learning_rate": 4.906265028346606e-05, "loss": 0.8695, "step": 5088 }, { "epoch": 0.09022543063144507, "grad_norm": 2.765625, "learning_rate": 4.9061892550709e-05, "loss": 0.8689, "step": 5090 }, { "epoch": 0.09026088266705663, "grad_norm": 2.5625, "learning_rate": 4.906113451766454e-05, "loss": 0.8314, "step": 5092 }, { "epoch": 0.09029633470266821, "grad_norm": 2.78125, "learning_rate": 4.906037618434213e-05, "loss": 0.834, "step": 5094 }, { "epoch": 0.09033178673827978, "grad_norm": 2.734375, "learning_rate": 4.905961755075124e-05, "loss": 0.8216, "step": 5096 }, { "epoch": 0.09036723877389134, "grad_norm": 2.375, "learning_rate": 4.905885861690133e-05, "loss": 0.7918, "step": 5098 }, { "epoch": 0.09040269080950292, "grad_norm": 2.5625, "learning_rate": 4.905809938280187e-05, "loss": 0.8244, "step": 5100 }, { "epoch": 0.09043814284511449, "grad_norm": 2.5625, "learning_rate": 4.905733984846235e-05, "loss": 0.8276, "step": 5102 }, { "epoch": 0.09047359488072605, "grad_norm": 2.953125, "learning_rate": 4.905658001389223e-05, "loss": 0.8623, "step": 5104 }, { "epoch": 0.09050904691633763, "grad_norm": 2.875, "learning_rate": 4.905581987910101e-05, "loss": 0.812, "step": 5106 }, { "epoch": 0.0905444989519492, "grad_norm": 2.875, "learning_rate": 4.905505944409816e-05, "loss": 0.8412, "step": 5108 }, { "epoch": 0.09057995098756076, "grad_norm": 2.71875, "learning_rate": 4.905429870889319e-05, "loss": 0.8368, "step": 5110 }, { "epoch": 0.09061540302317234, "grad_norm": 2.71875, "learning_rate": 4.9053537673495576e-05, "loss": 0.8673, "step": 5112 }, { "epoch": 0.09065085505878391, "grad_norm": 2.78125, "learning_rate": 4.9052776337914816e-05, "loss": 0.8744, "step": 5114 }, { "epoch": 0.09068630709439547, "grad_norm": 2.9375, "learning_rate": 4.905201470216043e-05, "loss": 0.7991, "step": 5116 }, { "epoch": 0.09072175913000705, "grad_norm": 2.984375, "learning_rate": 4.90512527662419e-05, "loss": 0.8332, "step": 5118 }, { "epoch": 0.09075721116561862, "grad_norm": 3.0, "learning_rate": 4.9050490530168743e-05, "loss": 0.8392, "step": 5120 }, { "epoch": 0.09079266320123018, "grad_norm": 2.796875, "learning_rate": 4.9049727993950486e-05, "loss": 0.8683, "step": 5122 }, { "epoch": 0.09082811523684176, "grad_norm": 2.671875, "learning_rate": 4.904896515759663e-05, "loss": 0.8318, "step": 5124 }, { "epoch": 0.09086356727245333, "grad_norm": 2.75, "learning_rate": 4.904820202111669e-05, "loss": 0.8412, "step": 5126 }, { "epoch": 0.09089901930806489, "grad_norm": 3.125, "learning_rate": 4.90474385845202e-05, "loss": 0.8313, "step": 5128 }, { "epoch": 0.09093447134367647, "grad_norm": 2.984375, "learning_rate": 4.9046674847816685e-05, "loss": 0.8337, "step": 5130 }, { "epoch": 0.09096992337928804, "grad_norm": 2.390625, "learning_rate": 4.904591081101568e-05, "loss": 0.8189, "step": 5132 }, { "epoch": 0.0910053754148996, "grad_norm": 3.078125, "learning_rate": 4.904514647412672e-05, "loss": 0.8416, "step": 5134 }, { "epoch": 0.09104082745051117, "grad_norm": 2.78125, "learning_rate": 4.904438183715933e-05, "loss": 0.8311, "step": 5136 }, { "epoch": 0.09107627948612275, "grad_norm": 2.609375, "learning_rate": 4.904361690012307e-05, "loss": 0.8309, "step": 5138 }, { "epoch": 0.09111173152173431, "grad_norm": 2.9375, "learning_rate": 4.904285166302748e-05, "loss": 0.8497, "step": 5140 }, { "epoch": 0.09114718355734588, "grad_norm": 2.828125, "learning_rate": 4.9042086125882104e-05, "loss": 0.869, "step": 5142 }, { "epoch": 0.09118263559295746, "grad_norm": 2.78125, "learning_rate": 4.9041320288696505e-05, "loss": 0.8205, "step": 5144 }, { "epoch": 0.09121808762856902, "grad_norm": 2.921875, "learning_rate": 4.9040554151480235e-05, "loss": 0.825, "step": 5146 }, { "epoch": 0.0912535396641806, "grad_norm": 2.859375, "learning_rate": 4.903978771424285e-05, "loss": 0.8514, "step": 5148 }, { "epoch": 0.09128899169979217, "grad_norm": 2.703125, "learning_rate": 4.903902097699393e-05, "loss": 0.8235, "step": 5150 }, { "epoch": 0.09132444373540373, "grad_norm": 2.78125, "learning_rate": 4.903825393974303e-05, "loss": 0.869, "step": 5152 }, { "epoch": 0.0913598957710153, "grad_norm": 2.9375, "learning_rate": 4.903748660249973e-05, "loss": 0.8642, "step": 5154 }, { "epoch": 0.09139534780662688, "grad_norm": 2.609375, "learning_rate": 4.90367189652736e-05, "loss": 0.8293, "step": 5156 }, { "epoch": 0.09143079984223844, "grad_norm": 2.859375, "learning_rate": 4.903595102807423e-05, "loss": 0.8324, "step": 5158 }, { "epoch": 0.09146625187785001, "grad_norm": 2.578125, "learning_rate": 4.903518279091119e-05, "loss": 0.8141, "step": 5160 }, { "epoch": 0.09150170391346159, "grad_norm": 2.78125, "learning_rate": 4.903441425379408e-05, "loss": 0.8715, "step": 5162 }, { "epoch": 0.09153715594907315, "grad_norm": 2.6875, "learning_rate": 4.9033645416732486e-05, "loss": 0.8393, "step": 5164 }, { "epoch": 0.09157260798468472, "grad_norm": 2.59375, "learning_rate": 4.903287627973601e-05, "loss": 0.8631, "step": 5166 }, { "epoch": 0.09160806002029628, "grad_norm": 2.65625, "learning_rate": 4.903210684281423e-05, "loss": 0.8291, "step": 5168 }, { "epoch": 0.09164351205590786, "grad_norm": 2.84375, "learning_rate": 4.903133710597677e-05, "loss": 0.8772, "step": 5170 }, { "epoch": 0.09167896409151943, "grad_norm": 2.96875, "learning_rate": 4.903056706923322e-05, "loss": 0.851, "step": 5172 }, { "epoch": 0.091714416127131, "grad_norm": 2.703125, "learning_rate": 4.902979673259321e-05, "loss": 0.8378, "step": 5174 }, { "epoch": 0.09174986816274257, "grad_norm": 2.84375, "learning_rate": 4.902902609606634e-05, "loss": 0.8006, "step": 5176 }, { "epoch": 0.09178532019835414, "grad_norm": 2.765625, "learning_rate": 4.902825515966223e-05, "loss": 0.8601, "step": 5178 }, { "epoch": 0.0918207722339657, "grad_norm": 2.65625, "learning_rate": 4.902748392339049e-05, "loss": 0.8244, "step": 5180 }, { "epoch": 0.09185622426957728, "grad_norm": 2.6875, "learning_rate": 4.902671238726076e-05, "loss": 0.8326, "step": 5182 }, { "epoch": 0.09189167630518885, "grad_norm": 3.109375, "learning_rate": 4.902594055128267e-05, "loss": 0.8521, "step": 5184 }, { "epoch": 0.09192712834080041, "grad_norm": 2.65625, "learning_rate": 4.902516841546584e-05, "loss": 0.8358, "step": 5186 }, { "epoch": 0.09196258037641199, "grad_norm": 2.875, "learning_rate": 4.9024395979819915e-05, "loss": 0.8225, "step": 5188 }, { "epoch": 0.09199803241202356, "grad_norm": 2.625, "learning_rate": 4.902362324435453e-05, "loss": 0.8264, "step": 5190 }, { "epoch": 0.09203348444763512, "grad_norm": 2.671875, "learning_rate": 4.902285020907933e-05, "loss": 0.8246, "step": 5192 }, { "epoch": 0.0920689364832467, "grad_norm": 3.046875, "learning_rate": 4.902207687400396e-05, "loss": 0.8338, "step": 5194 }, { "epoch": 0.09210438851885827, "grad_norm": 2.890625, "learning_rate": 4.902130323913808e-05, "loss": 0.8269, "step": 5196 }, { "epoch": 0.09213984055446983, "grad_norm": 2.75, "learning_rate": 4.9020529304491345e-05, "loss": 0.8291, "step": 5198 }, { "epoch": 0.0921752925900814, "grad_norm": 2.78125, "learning_rate": 4.90197550700734e-05, "loss": 0.8452, "step": 5200 }, { "epoch": 0.09221074462569298, "grad_norm": 3.046875, "learning_rate": 4.901898053589391e-05, "loss": 0.8634, "step": 5202 }, { "epoch": 0.09224619666130454, "grad_norm": 2.890625, "learning_rate": 4.901820570196255e-05, "loss": 0.8451, "step": 5204 }, { "epoch": 0.09228164869691612, "grad_norm": 2.703125, "learning_rate": 4.901743056828899e-05, "loss": 0.9183, "step": 5206 }, { "epoch": 0.09231710073252769, "grad_norm": 3.109375, "learning_rate": 4.901665513488289e-05, "loss": 0.8343, "step": 5208 }, { "epoch": 0.09235255276813925, "grad_norm": 2.90625, "learning_rate": 4.901587940175394e-05, "loss": 0.8573, "step": 5210 }, { "epoch": 0.09238800480375083, "grad_norm": 2.671875, "learning_rate": 4.9015103368911816e-05, "loss": 0.8048, "step": 5212 }, { "epoch": 0.0924234568393624, "grad_norm": 2.75, "learning_rate": 4.901432703636622e-05, "loss": 0.8261, "step": 5214 }, { "epoch": 0.09245890887497396, "grad_norm": 2.765625, "learning_rate": 4.901355040412681e-05, "loss": 0.8197, "step": 5216 }, { "epoch": 0.09249436091058554, "grad_norm": 2.875, "learning_rate": 4.901277347220329e-05, "loss": 0.8323, "step": 5218 }, { "epoch": 0.09252981294619711, "grad_norm": 2.796875, "learning_rate": 4.901199624060536e-05, "loss": 0.7934, "step": 5220 }, { "epoch": 0.09256526498180867, "grad_norm": 2.9375, "learning_rate": 4.901121870934272e-05, "loss": 0.8816, "step": 5222 }, { "epoch": 0.09260071701742024, "grad_norm": 2.984375, "learning_rate": 4.901044087842507e-05, "loss": 0.8536, "step": 5224 }, { "epoch": 0.09263616905303182, "grad_norm": 2.953125, "learning_rate": 4.9009662747862115e-05, "loss": 0.837, "step": 5226 }, { "epoch": 0.09267162108864338, "grad_norm": 2.65625, "learning_rate": 4.900888431766359e-05, "loss": 0.8126, "step": 5228 }, { "epoch": 0.09270707312425495, "grad_norm": 2.796875, "learning_rate": 4.900810558783917e-05, "loss": 0.8512, "step": 5230 }, { "epoch": 0.09274252515986653, "grad_norm": 2.59375, "learning_rate": 4.9007326558398595e-05, "loss": 0.8265, "step": 5232 }, { "epoch": 0.09277797719547809, "grad_norm": 2.71875, "learning_rate": 4.9006547229351587e-05, "loss": 0.8007, "step": 5234 }, { "epoch": 0.09281342923108966, "grad_norm": 2.609375, "learning_rate": 4.9005767600707866e-05, "loss": 0.8372, "step": 5236 }, { "epoch": 0.09284888126670124, "grad_norm": 2.59375, "learning_rate": 4.900498767247717e-05, "loss": 0.8367, "step": 5238 }, { "epoch": 0.0928843333023128, "grad_norm": 2.65625, "learning_rate": 4.9004207444669224e-05, "loss": 0.8489, "step": 5240 }, { "epoch": 0.09291978533792437, "grad_norm": 2.71875, "learning_rate": 4.900342691729378e-05, "loss": 0.828, "step": 5242 }, { "epoch": 0.09295523737353595, "grad_norm": 2.671875, "learning_rate": 4.900264609036056e-05, "loss": 0.7999, "step": 5244 }, { "epoch": 0.09299068940914751, "grad_norm": 2.734375, "learning_rate": 4.900186496387931e-05, "loss": 0.831, "step": 5246 }, { "epoch": 0.09302614144475908, "grad_norm": 2.78125, "learning_rate": 4.900108353785979e-05, "loss": 0.8824, "step": 5248 }, { "epoch": 0.09306159348037066, "grad_norm": 2.828125, "learning_rate": 4.900030181231175e-05, "loss": 0.826, "step": 5250 }, { "epoch": 0.09309704551598222, "grad_norm": 2.859375, "learning_rate": 4.899951978724494e-05, "loss": 0.8538, "step": 5252 }, { "epoch": 0.0931324975515938, "grad_norm": 2.828125, "learning_rate": 4.899873746266912e-05, "loss": 0.8773, "step": 5254 }, { "epoch": 0.09316794958720535, "grad_norm": 3.015625, "learning_rate": 4.8997954838594055e-05, "loss": 0.8843, "step": 5256 }, { "epoch": 0.09320340162281693, "grad_norm": 2.890625, "learning_rate": 4.899717191502951e-05, "loss": 0.7928, "step": 5258 }, { "epoch": 0.0932388536584285, "grad_norm": 2.875, "learning_rate": 4.8996388691985265e-05, "loss": 0.8471, "step": 5260 }, { "epoch": 0.09327430569404006, "grad_norm": 2.703125, "learning_rate": 4.899560516947108e-05, "loss": 0.8468, "step": 5262 }, { "epoch": 0.09330975772965164, "grad_norm": 2.5, "learning_rate": 4.899482134749674e-05, "loss": 0.8251, "step": 5264 }, { "epoch": 0.09334520976526321, "grad_norm": 2.671875, "learning_rate": 4.8994037226072037e-05, "loss": 0.7813, "step": 5266 }, { "epoch": 0.09338066180087477, "grad_norm": 2.75, "learning_rate": 4.899325280520674e-05, "loss": 0.8214, "step": 5268 }, { "epoch": 0.09341611383648635, "grad_norm": 2.859375, "learning_rate": 4.899246808491065e-05, "loss": 0.853, "step": 5270 }, { "epoch": 0.09345156587209792, "grad_norm": 2.796875, "learning_rate": 4.899168306519355e-05, "loss": 0.8129, "step": 5272 }, { "epoch": 0.09348701790770948, "grad_norm": 2.75, "learning_rate": 4.899089774606525e-05, "loss": 0.8524, "step": 5274 }, { "epoch": 0.09352246994332106, "grad_norm": 2.484375, "learning_rate": 4.8990112127535525e-05, "loss": 0.7781, "step": 5276 }, { "epoch": 0.09355792197893263, "grad_norm": 2.65625, "learning_rate": 4.898932620961422e-05, "loss": 0.8256, "step": 5278 }, { "epoch": 0.09359337401454419, "grad_norm": 2.609375, "learning_rate": 4.89885399923111e-05, "loss": 0.8313, "step": 5280 }, { "epoch": 0.09362882605015577, "grad_norm": 2.6875, "learning_rate": 4.8987753475636014e-05, "loss": 0.8584, "step": 5282 }, { "epoch": 0.09366427808576734, "grad_norm": 3.046875, "learning_rate": 4.8986966659598756e-05, "loss": 0.8305, "step": 5284 }, { "epoch": 0.0936997301213789, "grad_norm": 2.59375, "learning_rate": 4.8986179544209146e-05, "loss": 0.7863, "step": 5286 }, { "epoch": 0.09373518215699048, "grad_norm": 2.46875, "learning_rate": 4.8985392129477014e-05, "loss": 0.8056, "step": 5288 }, { "epoch": 0.09377063419260205, "grad_norm": 2.546875, "learning_rate": 4.898460441541218e-05, "loss": 0.8324, "step": 5290 }, { "epoch": 0.09380608622821361, "grad_norm": 2.875, "learning_rate": 4.8983816402024484e-05, "loss": 0.8534, "step": 5292 }, { "epoch": 0.09384153826382519, "grad_norm": 2.921875, "learning_rate": 4.8983028089323756e-05, "loss": 0.8487, "step": 5294 }, { "epoch": 0.09387699029943676, "grad_norm": 2.703125, "learning_rate": 4.8982239477319824e-05, "loss": 0.8318, "step": 5296 }, { "epoch": 0.09391244233504832, "grad_norm": 2.890625, "learning_rate": 4.8981450566022545e-05, "loss": 0.8097, "step": 5298 }, { "epoch": 0.0939478943706599, "grad_norm": 3.171875, "learning_rate": 4.898066135544176e-05, "loss": 0.8471, "step": 5300 }, { "epoch": 0.09398334640627147, "grad_norm": 2.703125, "learning_rate": 4.897987184558731e-05, "loss": 0.8206, "step": 5302 }, { "epoch": 0.09401879844188303, "grad_norm": 2.96875, "learning_rate": 4.897908203646906e-05, "loss": 0.8759, "step": 5304 }, { "epoch": 0.0940542504774946, "grad_norm": 2.28125, "learning_rate": 4.897829192809686e-05, "loss": 0.7891, "step": 5306 }, { "epoch": 0.09408970251310618, "grad_norm": 2.765625, "learning_rate": 4.897750152048057e-05, "loss": 0.8524, "step": 5308 }, { "epoch": 0.09412515454871774, "grad_norm": 2.734375, "learning_rate": 4.8976710813630056e-05, "loss": 0.8392, "step": 5310 }, { "epoch": 0.09416060658432931, "grad_norm": 2.828125, "learning_rate": 4.897591980755518e-05, "loss": 0.84, "step": 5312 }, { "epoch": 0.09419605861994089, "grad_norm": 2.53125, "learning_rate": 4.897512850226582e-05, "loss": 0.7987, "step": 5314 }, { "epoch": 0.09423151065555245, "grad_norm": 2.875, "learning_rate": 4.8974336897771855e-05, "loss": 0.853, "step": 5316 }, { "epoch": 0.09426696269116402, "grad_norm": 2.765625, "learning_rate": 4.8973544994083154e-05, "loss": 0.8278, "step": 5318 }, { "epoch": 0.0943024147267756, "grad_norm": 2.8125, "learning_rate": 4.897275279120961e-05, "loss": 0.8601, "step": 5320 }, { "epoch": 0.09433786676238716, "grad_norm": 2.65625, "learning_rate": 4.89719602891611e-05, "loss": 0.8513, "step": 5322 }, { "epoch": 0.09437331879799873, "grad_norm": 2.625, "learning_rate": 4.8971167487947525e-05, "loss": 0.8074, "step": 5324 }, { "epoch": 0.09440877083361031, "grad_norm": 2.96875, "learning_rate": 4.897037438757876e-05, "loss": 0.8192, "step": 5326 }, { "epoch": 0.09444422286922187, "grad_norm": 3.140625, "learning_rate": 4.8969580988064724e-05, "loss": 0.8364, "step": 5328 }, { "epoch": 0.09447967490483344, "grad_norm": 2.9375, "learning_rate": 4.896878728941531e-05, "loss": 0.832, "step": 5330 }, { "epoch": 0.09451512694044502, "grad_norm": 2.515625, "learning_rate": 4.896799329164043e-05, "loss": 0.8042, "step": 5332 }, { "epoch": 0.09455057897605658, "grad_norm": 2.546875, "learning_rate": 4.8967198994749966e-05, "loss": 0.8357, "step": 5334 }, { "epoch": 0.09458603101166815, "grad_norm": 2.96875, "learning_rate": 4.896640439875386e-05, "loss": 0.8761, "step": 5336 }, { "epoch": 0.09462148304727971, "grad_norm": 2.65625, "learning_rate": 4.896560950366202e-05, "loss": 0.8303, "step": 5338 }, { "epoch": 0.09465693508289129, "grad_norm": 2.84375, "learning_rate": 4.896481430948437e-05, "loss": 0.8626, "step": 5340 }, { "epoch": 0.09469238711850286, "grad_norm": 2.65625, "learning_rate": 4.896401881623083e-05, "loss": 0.8361, "step": 5342 }, { "epoch": 0.09472783915411442, "grad_norm": 3.03125, "learning_rate": 4.8963223023911315e-05, "loss": 0.8183, "step": 5344 }, { "epoch": 0.094763291189726, "grad_norm": 3.0, "learning_rate": 4.8962426932535775e-05, "loss": 0.8441, "step": 5346 }, { "epoch": 0.09479874322533757, "grad_norm": 2.8125, "learning_rate": 4.8961630542114135e-05, "loss": 0.8247, "step": 5348 }, { "epoch": 0.09483419526094913, "grad_norm": 2.6875, "learning_rate": 4.896083385265634e-05, "loss": 0.8313, "step": 5350 }, { "epoch": 0.09486964729656071, "grad_norm": 2.75, "learning_rate": 4.896003686417233e-05, "loss": 0.8515, "step": 5352 }, { "epoch": 0.09490509933217228, "grad_norm": 2.71875, "learning_rate": 4.895923957667204e-05, "loss": 0.8214, "step": 5354 }, { "epoch": 0.09494055136778384, "grad_norm": 2.875, "learning_rate": 4.895844199016544e-05, "loss": 0.8345, "step": 5356 }, { "epoch": 0.09497600340339542, "grad_norm": 2.875, "learning_rate": 4.895764410466248e-05, "loss": 0.8197, "step": 5358 }, { "epoch": 0.09501145543900699, "grad_norm": 2.84375, "learning_rate": 4.89568459201731e-05, "loss": 0.86, "step": 5360 }, { "epoch": 0.09504690747461855, "grad_norm": 3.046875, "learning_rate": 4.8956047436707276e-05, "loss": 0.8541, "step": 5362 }, { "epoch": 0.09508235951023013, "grad_norm": 2.765625, "learning_rate": 4.8955248654274974e-05, "loss": 0.8416, "step": 5364 }, { "epoch": 0.0951178115458417, "grad_norm": 2.953125, "learning_rate": 4.8954449572886154e-05, "loss": 0.8919, "step": 5366 }, { "epoch": 0.09515326358145326, "grad_norm": 2.96875, "learning_rate": 4.895365019255079e-05, "loss": 0.8213, "step": 5368 }, { "epoch": 0.09518871561706484, "grad_norm": 2.671875, "learning_rate": 4.895285051327887e-05, "loss": 0.816, "step": 5370 }, { "epoch": 0.09522416765267641, "grad_norm": 2.796875, "learning_rate": 4.895205053508036e-05, "loss": 0.8613, "step": 5372 }, { "epoch": 0.09525961968828797, "grad_norm": 2.75, "learning_rate": 4.895125025796525e-05, "loss": 0.8261, "step": 5374 }, { "epoch": 0.09529507172389955, "grad_norm": 2.5, "learning_rate": 4.895044968194352e-05, "loss": 0.81, "step": 5376 }, { "epoch": 0.09533052375951112, "grad_norm": 2.5625, "learning_rate": 4.894964880702517e-05, "loss": 0.8326, "step": 5378 }, { "epoch": 0.09536597579512268, "grad_norm": 3.28125, "learning_rate": 4.894884763322019e-05, "loss": 0.8157, "step": 5380 }, { "epoch": 0.09540142783073426, "grad_norm": 2.71875, "learning_rate": 4.894804616053858e-05, "loss": 0.8442, "step": 5382 }, { "epoch": 0.09543687986634583, "grad_norm": 2.765625, "learning_rate": 4.8947244388990345e-05, "loss": 0.8449, "step": 5384 }, { "epoch": 0.09547233190195739, "grad_norm": 2.609375, "learning_rate": 4.894644231858548e-05, "loss": 0.8198, "step": 5386 }, { "epoch": 0.09550778393756897, "grad_norm": 2.8125, "learning_rate": 4.894563994933401e-05, "loss": 0.8566, "step": 5388 }, { "epoch": 0.09554323597318054, "grad_norm": 2.75, "learning_rate": 4.8944837281245934e-05, "loss": 0.8476, "step": 5390 }, { "epoch": 0.0955786880087921, "grad_norm": 2.984375, "learning_rate": 4.894403431433129e-05, "loss": 0.8254, "step": 5392 }, { "epoch": 0.09561414004440368, "grad_norm": 2.703125, "learning_rate": 4.894323104860007e-05, "loss": 0.8202, "step": 5394 }, { "epoch": 0.09564959208001525, "grad_norm": 2.453125, "learning_rate": 4.894242748406232e-05, "loss": 0.8245, "step": 5396 }, { "epoch": 0.09568504411562681, "grad_norm": 3.453125, "learning_rate": 4.894162362072806e-05, "loss": 0.8343, "step": 5398 }, { "epoch": 0.09572049615123839, "grad_norm": 2.546875, "learning_rate": 4.8940819458607323e-05, "loss": 0.8675, "step": 5400 }, { "epoch": 0.09575594818684996, "grad_norm": 2.75, "learning_rate": 4.894001499771015e-05, "loss": 0.8272, "step": 5402 }, { "epoch": 0.09579140022246152, "grad_norm": 2.8125, "learning_rate": 4.8939210238046577e-05, "loss": 0.7998, "step": 5404 }, { "epoch": 0.0958268522580731, "grad_norm": 2.609375, "learning_rate": 4.8938405179626644e-05, "loss": 0.8126, "step": 5406 }, { "epoch": 0.09586230429368467, "grad_norm": 2.65625, "learning_rate": 4.89375998224604e-05, "loss": 0.8555, "step": 5408 }, { "epoch": 0.09589775632929623, "grad_norm": 2.609375, "learning_rate": 4.8936794166557895e-05, "loss": 0.8274, "step": 5410 }, { "epoch": 0.0959332083649078, "grad_norm": 2.828125, "learning_rate": 4.893598821192918e-05, "loss": 0.8555, "step": 5412 }, { "epoch": 0.09596866040051938, "grad_norm": 2.53125, "learning_rate": 4.893518195858433e-05, "loss": 0.776, "step": 5414 }, { "epoch": 0.09600411243613094, "grad_norm": 2.890625, "learning_rate": 4.8934375406533384e-05, "loss": 0.8853, "step": 5416 }, { "epoch": 0.09603956447174251, "grad_norm": 2.890625, "learning_rate": 4.8933568555786416e-05, "loss": 0.8464, "step": 5418 }, { "epoch": 0.09607501650735409, "grad_norm": 2.671875, "learning_rate": 4.8932761406353506e-05, "loss": 0.847, "step": 5420 }, { "epoch": 0.09611046854296565, "grad_norm": 2.625, "learning_rate": 4.893195395824472e-05, "loss": 0.8339, "step": 5422 }, { "epoch": 0.09614592057857722, "grad_norm": 2.703125, "learning_rate": 4.8931146211470126e-05, "loss": 0.8212, "step": 5424 }, { "epoch": 0.09618137261418878, "grad_norm": 2.875, "learning_rate": 4.8930338166039815e-05, "loss": 0.812, "step": 5426 }, { "epoch": 0.09621682464980036, "grad_norm": 2.921875, "learning_rate": 4.892952982196387e-05, "loss": 0.846, "step": 5428 }, { "epoch": 0.09625227668541193, "grad_norm": 2.765625, "learning_rate": 4.892872117925237e-05, "loss": 0.8056, "step": 5430 }, { "epoch": 0.0962877287210235, "grad_norm": 2.65625, "learning_rate": 4.8927912237915416e-05, "loss": 0.8156, "step": 5432 }, { "epoch": 0.09632318075663507, "grad_norm": 2.515625, "learning_rate": 4.8927102997963105e-05, "loss": 0.7889, "step": 5434 }, { "epoch": 0.09635863279224664, "grad_norm": 2.9375, "learning_rate": 4.8926293459405524e-05, "loss": 0.8546, "step": 5436 }, { "epoch": 0.0963940848278582, "grad_norm": 2.609375, "learning_rate": 4.8925483622252796e-05, "loss": 0.8052, "step": 5438 }, { "epoch": 0.09642953686346978, "grad_norm": 2.671875, "learning_rate": 4.892467348651501e-05, "loss": 0.8228, "step": 5440 }, { "epoch": 0.09646498889908135, "grad_norm": 2.5, "learning_rate": 4.892386305220228e-05, "loss": 0.8544, "step": 5442 }, { "epoch": 0.09650044093469291, "grad_norm": 2.9375, "learning_rate": 4.892305231932473e-05, "loss": 0.8331, "step": 5444 }, { "epoch": 0.09653589297030449, "grad_norm": 2.96875, "learning_rate": 4.892224128789246e-05, "loss": 0.7923, "step": 5446 }, { "epoch": 0.09657134500591606, "grad_norm": 2.46875, "learning_rate": 4.8921429957915606e-05, "loss": 0.853, "step": 5448 }, { "epoch": 0.09660679704152762, "grad_norm": 2.6875, "learning_rate": 4.8920618329404286e-05, "loss": 0.8734, "step": 5450 }, { "epoch": 0.0966422490771392, "grad_norm": 2.921875, "learning_rate": 4.891980640236864e-05, "loss": 0.8263, "step": 5452 }, { "epoch": 0.09667770111275077, "grad_norm": 3.109375, "learning_rate": 4.891899417681878e-05, "loss": 0.8675, "step": 5454 }, { "epoch": 0.09671315314836233, "grad_norm": 2.703125, "learning_rate": 4.891818165276486e-05, "loss": 0.7996, "step": 5456 }, { "epoch": 0.09674860518397391, "grad_norm": 2.921875, "learning_rate": 4.891736883021701e-05, "loss": 0.8463, "step": 5458 }, { "epoch": 0.09678405721958548, "grad_norm": 2.828125, "learning_rate": 4.891655570918539e-05, "loss": 0.8306, "step": 5460 }, { "epoch": 0.09681950925519704, "grad_norm": 3.203125, "learning_rate": 4.8915742289680136e-05, "loss": 0.8614, "step": 5462 }, { "epoch": 0.09685496129080862, "grad_norm": 2.703125, "learning_rate": 4.891492857171139e-05, "loss": 0.8186, "step": 5464 }, { "epoch": 0.09689041332642019, "grad_norm": 2.6875, "learning_rate": 4.891411455528932e-05, "loss": 0.7825, "step": 5466 }, { "epoch": 0.09692586536203175, "grad_norm": 2.828125, "learning_rate": 4.891330024042408e-05, "loss": 0.8715, "step": 5468 }, { "epoch": 0.09696131739764333, "grad_norm": 2.5, "learning_rate": 4.8912485627125835e-05, "loss": 0.823, "step": 5470 }, { "epoch": 0.0969967694332549, "grad_norm": 2.875, "learning_rate": 4.891167071540475e-05, "loss": 0.8334, "step": 5472 }, { "epoch": 0.09703222146886646, "grad_norm": 3.0, "learning_rate": 4.8910855505271e-05, "loss": 0.8304, "step": 5474 }, { "epoch": 0.09706767350447804, "grad_norm": 2.921875, "learning_rate": 4.891003999673475e-05, "loss": 0.8579, "step": 5476 }, { "epoch": 0.09710312554008961, "grad_norm": 2.609375, "learning_rate": 4.890922418980617e-05, "loss": 0.8458, "step": 5478 }, { "epoch": 0.09713857757570117, "grad_norm": 2.765625, "learning_rate": 4.890840808449547e-05, "loss": 0.7615, "step": 5480 }, { "epoch": 0.09717402961131275, "grad_norm": 2.703125, "learning_rate": 4.89075916808128e-05, "loss": 0.8277, "step": 5482 }, { "epoch": 0.09720948164692432, "grad_norm": 2.4375, "learning_rate": 4.8906774978768376e-05, "loss": 0.8035, "step": 5484 }, { "epoch": 0.09724493368253588, "grad_norm": 2.6875, "learning_rate": 4.8905957978372377e-05, "loss": 0.8383, "step": 5486 }, { "epoch": 0.09728038571814746, "grad_norm": 2.734375, "learning_rate": 4.890514067963501e-05, "loss": 0.8408, "step": 5488 }, { "epoch": 0.09731583775375903, "grad_norm": 2.828125, "learning_rate": 4.8904323082566456e-05, "loss": 0.8368, "step": 5490 }, { "epoch": 0.09735128978937059, "grad_norm": 2.6875, "learning_rate": 4.890350518717693e-05, "loss": 0.8349, "step": 5492 }, { "epoch": 0.09738674182498216, "grad_norm": 2.671875, "learning_rate": 4.890268699347664e-05, "loss": 0.8517, "step": 5494 }, { "epoch": 0.09742219386059374, "grad_norm": 2.859375, "learning_rate": 4.890186850147579e-05, "loss": 0.8121, "step": 5496 }, { "epoch": 0.0974576458962053, "grad_norm": 2.96875, "learning_rate": 4.890104971118461e-05, "loss": 0.8557, "step": 5498 }, { "epoch": 0.09749309793181687, "grad_norm": 2.640625, "learning_rate": 4.8900230622613294e-05, "loss": 0.8497, "step": 5500 }, { "epoch": 0.09752854996742845, "grad_norm": 2.515625, "learning_rate": 4.889941123577209e-05, "loss": 0.8452, "step": 5502 }, { "epoch": 0.09756400200304001, "grad_norm": 2.953125, "learning_rate": 4.8898591550671205e-05, "loss": 0.825, "step": 5504 }, { "epoch": 0.09759945403865158, "grad_norm": 2.84375, "learning_rate": 4.889777156732088e-05, "loss": 0.8256, "step": 5506 }, { "epoch": 0.09763490607426314, "grad_norm": 2.96875, "learning_rate": 4.889695128573134e-05, "loss": 0.8618, "step": 5508 }, { "epoch": 0.09767035810987472, "grad_norm": 2.765625, "learning_rate": 4.889613070591283e-05, "loss": 0.8529, "step": 5510 }, { "epoch": 0.0977058101454863, "grad_norm": 2.625, "learning_rate": 4.889530982787558e-05, "loss": 0.825, "step": 5512 }, { "epoch": 0.09774126218109785, "grad_norm": 2.84375, "learning_rate": 4.8894488651629844e-05, "loss": 0.8353, "step": 5514 }, { "epoch": 0.09777671421670943, "grad_norm": 2.734375, "learning_rate": 4.889366717718587e-05, "loss": 0.8779, "step": 5516 }, { "epoch": 0.097812166252321, "grad_norm": 2.609375, "learning_rate": 4.8892845404553897e-05, "loss": 0.7985, "step": 5518 }, { "epoch": 0.09784761828793256, "grad_norm": 2.984375, "learning_rate": 4.88920233337442e-05, "loss": 0.8459, "step": 5520 }, { "epoch": 0.09788307032354414, "grad_norm": 2.65625, "learning_rate": 4.8891200964767014e-05, "loss": 0.7836, "step": 5522 }, { "epoch": 0.09791852235915571, "grad_norm": 2.796875, "learning_rate": 4.889037829763262e-05, "loss": 0.8491, "step": 5524 }, { "epoch": 0.09795397439476727, "grad_norm": 2.703125, "learning_rate": 4.888955533235129e-05, "loss": 0.844, "step": 5526 }, { "epoch": 0.09798942643037885, "grad_norm": 2.625, "learning_rate": 4.888873206893328e-05, "loss": 0.8221, "step": 5528 }, { "epoch": 0.09802487846599042, "grad_norm": 2.71875, "learning_rate": 4.888790850738887e-05, "loss": 0.8318, "step": 5530 }, { "epoch": 0.09806033050160198, "grad_norm": 2.734375, "learning_rate": 4.888708464772834e-05, "loss": 0.8489, "step": 5532 }, { "epoch": 0.09809578253721356, "grad_norm": 2.78125, "learning_rate": 4.8886260489961963e-05, "loss": 0.8574, "step": 5534 }, { "epoch": 0.09813123457282513, "grad_norm": 2.765625, "learning_rate": 4.888543603410004e-05, "loss": 0.8249, "step": 5536 }, { "epoch": 0.09816668660843669, "grad_norm": 3.015625, "learning_rate": 4.888461128015283e-05, "loss": 0.8662, "step": 5538 }, { "epoch": 0.09820213864404827, "grad_norm": 2.921875, "learning_rate": 4.888378622813067e-05, "loss": 0.8338, "step": 5540 }, { "epoch": 0.09823759067965984, "grad_norm": 2.6875, "learning_rate": 4.888296087804383e-05, "loss": 0.8295, "step": 5542 }, { "epoch": 0.0982730427152714, "grad_norm": 2.75, "learning_rate": 4.88821352299026e-05, "loss": 0.8539, "step": 5544 }, { "epoch": 0.09830849475088298, "grad_norm": 2.53125, "learning_rate": 4.8881309283717305e-05, "loss": 0.8205, "step": 5546 }, { "epoch": 0.09834394678649455, "grad_norm": 2.375, "learning_rate": 4.888048303949824e-05, "loss": 0.8314, "step": 5548 }, { "epoch": 0.09837939882210611, "grad_norm": 2.84375, "learning_rate": 4.887965649725572e-05, "loss": 0.878, "step": 5550 }, { "epoch": 0.09841485085771769, "grad_norm": 2.859375, "learning_rate": 4.887882965700006e-05, "loss": 0.8663, "step": 5552 }, { "epoch": 0.09845030289332926, "grad_norm": 2.46875, "learning_rate": 4.8878002518741585e-05, "loss": 0.785, "step": 5554 }, { "epoch": 0.09848575492894082, "grad_norm": 2.5625, "learning_rate": 4.8877175082490606e-05, "loss": 0.7979, "step": 5556 }, { "epoch": 0.0985212069645524, "grad_norm": 2.703125, "learning_rate": 4.8876347348257454e-05, "loss": 0.8108, "step": 5558 }, { "epoch": 0.09855665900016397, "grad_norm": 2.59375, "learning_rate": 4.8875519316052464e-05, "loss": 0.8471, "step": 5560 }, { "epoch": 0.09859211103577553, "grad_norm": 3.3125, "learning_rate": 4.8874690985885975e-05, "loss": 0.8127, "step": 5562 }, { "epoch": 0.0986275630713871, "grad_norm": 2.859375, "learning_rate": 4.88738623577683e-05, "loss": 0.8511, "step": 5564 }, { "epoch": 0.09866301510699868, "grad_norm": 2.65625, "learning_rate": 4.8873033431709804e-05, "loss": 0.8387, "step": 5566 }, { "epoch": 0.09869846714261024, "grad_norm": 2.734375, "learning_rate": 4.887220420772082e-05, "loss": 0.8615, "step": 5568 }, { "epoch": 0.09873391917822182, "grad_norm": 2.71875, "learning_rate": 4.88713746858117e-05, "loss": 0.8746, "step": 5570 }, { "epoch": 0.09876937121383339, "grad_norm": 2.703125, "learning_rate": 4.88705448659928e-05, "loss": 0.8258, "step": 5572 }, { "epoch": 0.09880482324944495, "grad_norm": 2.765625, "learning_rate": 4.8869714748274464e-05, "loss": 0.8558, "step": 5574 }, { "epoch": 0.09884027528505653, "grad_norm": 2.59375, "learning_rate": 4.8868884332667066e-05, "loss": 0.8201, "step": 5576 }, { "epoch": 0.0988757273206681, "grad_norm": 3.046875, "learning_rate": 4.886805361918096e-05, "loss": 0.8155, "step": 5578 }, { "epoch": 0.09891117935627966, "grad_norm": 2.828125, "learning_rate": 4.886722260782652e-05, "loss": 0.8609, "step": 5580 }, { "epoch": 0.09894663139189123, "grad_norm": 3.015625, "learning_rate": 4.886639129861411e-05, "loss": 0.833, "step": 5582 }, { "epoch": 0.09898208342750281, "grad_norm": 2.734375, "learning_rate": 4.886555969155411e-05, "loss": 0.8938, "step": 5584 }, { "epoch": 0.09901753546311437, "grad_norm": 2.9375, "learning_rate": 4.8864727786656886e-05, "loss": 0.8474, "step": 5586 }, { "epoch": 0.09905298749872594, "grad_norm": 2.84375, "learning_rate": 4.8863895583932836e-05, "loss": 0.7775, "step": 5588 }, { "epoch": 0.09908843953433752, "grad_norm": 3.015625, "learning_rate": 4.886306308339235e-05, "loss": 0.853, "step": 5590 }, { "epoch": 0.09912389156994908, "grad_norm": 2.421875, "learning_rate": 4.8862230285045794e-05, "loss": 0.8057, "step": 5592 }, { "epoch": 0.09915934360556065, "grad_norm": 2.5625, "learning_rate": 4.886139718890358e-05, "loss": 0.8473, "step": 5594 }, { "epoch": 0.09919479564117221, "grad_norm": 3.140625, "learning_rate": 4.88605637949761e-05, "loss": 0.829, "step": 5596 }, { "epoch": 0.09923024767678379, "grad_norm": 2.703125, "learning_rate": 4.885973010327375e-05, "loss": 0.8445, "step": 5598 }, { "epoch": 0.09926569971239536, "grad_norm": 2.921875, "learning_rate": 4.885889611380694e-05, "loss": 0.8265, "step": 5600 }, { "epoch": 0.09930115174800692, "grad_norm": 3.109375, "learning_rate": 4.885806182658607e-05, "loss": 0.8717, "step": 5602 }, { "epoch": 0.0993366037836185, "grad_norm": 2.859375, "learning_rate": 4.885722724162156e-05, "loss": 0.8219, "step": 5604 }, { "epoch": 0.09937205581923007, "grad_norm": 2.84375, "learning_rate": 4.885639235892383e-05, "loss": 0.8432, "step": 5606 }, { "epoch": 0.09940750785484163, "grad_norm": 2.484375, "learning_rate": 4.8855557178503286e-05, "loss": 0.8082, "step": 5608 }, { "epoch": 0.09944295989045321, "grad_norm": 2.875, "learning_rate": 4.885472170037037e-05, "loss": 0.836, "step": 5610 }, { "epoch": 0.09947841192606478, "grad_norm": 2.671875, "learning_rate": 4.885388592453548e-05, "loss": 0.8404, "step": 5612 }, { "epoch": 0.09951386396167634, "grad_norm": 2.6875, "learning_rate": 4.8853049851009064e-05, "loss": 0.8383, "step": 5614 }, { "epoch": 0.09954931599728792, "grad_norm": 3.078125, "learning_rate": 4.885221347980156e-05, "loss": 0.8697, "step": 5616 }, { "epoch": 0.09958476803289949, "grad_norm": 2.796875, "learning_rate": 4.8851376810923396e-05, "loss": 0.8104, "step": 5618 }, { "epoch": 0.09962022006851105, "grad_norm": 2.90625, "learning_rate": 4.8850539844385017e-05, "loss": 0.8173, "step": 5620 }, { "epoch": 0.09965567210412263, "grad_norm": 2.453125, "learning_rate": 4.8849702580196874e-05, "loss": 0.8496, "step": 5622 }, { "epoch": 0.0996911241397342, "grad_norm": 2.78125, "learning_rate": 4.8848865018369404e-05, "loss": 0.8188, "step": 5624 }, { "epoch": 0.09972657617534576, "grad_norm": 2.875, "learning_rate": 4.884802715891307e-05, "loss": 0.8529, "step": 5626 }, { "epoch": 0.09976202821095734, "grad_norm": 2.59375, "learning_rate": 4.8847189001838324e-05, "loss": 0.8529, "step": 5628 }, { "epoch": 0.09979748024656891, "grad_norm": 2.9375, "learning_rate": 4.884635054715562e-05, "loss": 0.8271, "step": 5630 }, { "epoch": 0.09983293228218047, "grad_norm": 3.109375, "learning_rate": 4.884551179487543e-05, "loss": 0.8768, "step": 5632 }, { "epoch": 0.09986838431779205, "grad_norm": 2.78125, "learning_rate": 4.884467274500822e-05, "loss": 0.8548, "step": 5634 }, { "epoch": 0.09990383635340362, "grad_norm": 2.8125, "learning_rate": 4.884383339756447e-05, "loss": 0.8354, "step": 5636 }, { "epoch": 0.09993928838901518, "grad_norm": 2.8125, "learning_rate": 4.8842993752554635e-05, "loss": 0.8216, "step": 5638 }, { "epoch": 0.09997474042462676, "grad_norm": 2.59375, "learning_rate": 4.884215380998921e-05, "loss": 0.8471, "step": 5640 }, { "epoch": 0.10001019246023833, "grad_norm": 2.96875, "learning_rate": 4.884131356987867e-05, "loss": 0.8059, "step": 5642 }, { "epoch": 0.10004564449584989, "grad_norm": 2.828125, "learning_rate": 4.8840473032233494e-05, "loss": 0.8661, "step": 5644 }, { "epoch": 0.10008109653146147, "grad_norm": 2.75, "learning_rate": 4.883963219706419e-05, "loss": 0.7908, "step": 5646 }, { "epoch": 0.10011654856707304, "grad_norm": 2.90625, "learning_rate": 4.883879106438124e-05, "loss": 0.8687, "step": 5648 }, { "epoch": 0.1001520006026846, "grad_norm": 2.765625, "learning_rate": 4.883794963419514e-05, "loss": 0.8066, "step": 5650 }, { "epoch": 0.10018745263829618, "grad_norm": 2.84375, "learning_rate": 4.88371079065164e-05, "loss": 0.827, "step": 5652 }, { "epoch": 0.10022290467390775, "grad_norm": 2.625, "learning_rate": 4.883626588135551e-05, "loss": 0.8052, "step": 5654 }, { "epoch": 0.10025835670951931, "grad_norm": 2.8125, "learning_rate": 4.883542355872299e-05, "loss": 0.8144, "step": 5656 }, { "epoch": 0.10029380874513089, "grad_norm": 2.71875, "learning_rate": 4.883458093862935e-05, "loss": 0.8118, "step": 5658 }, { "epoch": 0.10032926078074246, "grad_norm": 3.0625, "learning_rate": 4.88337380210851e-05, "loss": 0.8369, "step": 5660 }, { "epoch": 0.10036471281635402, "grad_norm": 3.015625, "learning_rate": 4.883289480610077e-05, "loss": 0.8589, "step": 5662 }, { "epoch": 0.1004001648519656, "grad_norm": 2.59375, "learning_rate": 4.883205129368688e-05, "loss": 0.8265, "step": 5664 }, { "epoch": 0.10043561688757717, "grad_norm": 3.125, "learning_rate": 4.883120748385394e-05, "loss": 0.8489, "step": 5666 }, { "epoch": 0.10047106892318873, "grad_norm": 2.765625, "learning_rate": 4.8830363376612497e-05, "loss": 0.8463, "step": 5668 }, { "epoch": 0.1005065209588003, "grad_norm": 2.78125, "learning_rate": 4.8829518971973085e-05, "loss": 0.8248, "step": 5670 }, { "epoch": 0.10054197299441188, "grad_norm": 2.96875, "learning_rate": 4.8828674269946254e-05, "loss": 0.8391, "step": 5672 }, { "epoch": 0.10057742503002344, "grad_norm": 2.703125, "learning_rate": 4.882782927054251e-05, "loss": 0.8326, "step": 5674 }, { "epoch": 0.10061287706563501, "grad_norm": 2.78125, "learning_rate": 4.882698397377243e-05, "loss": 0.8218, "step": 5676 }, { "epoch": 0.10064832910124658, "grad_norm": 2.609375, "learning_rate": 4.882613837964655e-05, "loss": 0.8307, "step": 5678 }, { "epoch": 0.10068378113685815, "grad_norm": 2.796875, "learning_rate": 4.882529248817543e-05, "loss": 0.8427, "step": 5680 }, { "epoch": 0.10071923317246972, "grad_norm": 2.875, "learning_rate": 4.882444629936962e-05, "loss": 0.8637, "step": 5682 }, { "epoch": 0.10075468520808128, "grad_norm": 2.84375, "learning_rate": 4.882359981323968e-05, "loss": 0.8579, "step": 5684 }, { "epoch": 0.10079013724369286, "grad_norm": 2.734375, "learning_rate": 4.8822753029796174e-05, "loss": 0.8361, "step": 5686 }, { "epoch": 0.10082558927930443, "grad_norm": 2.46875, "learning_rate": 4.8821905949049685e-05, "loss": 0.8359, "step": 5688 }, { "epoch": 0.100861041314916, "grad_norm": 2.953125, "learning_rate": 4.882105857101076e-05, "loss": 0.8288, "step": 5690 }, { "epoch": 0.10089649335052757, "grad_norm": 2.6875, "learning_rate": 4.882021089568999e-05, "loss": 0.8332, "step": 5692 }, { "epoch": 0.10093194538613914, "grad_norm": 2.640625, "learning_rate": 4.881936292309795e-05, "loss": 0.8297, "step": 5694 }, { "epoch": 0.1009673974217507, "grad_norm": 2.765625, "learning_rate": 4.881851465324522e-05, "loss": 0.8713, "step": 5696 }, { "epoch": 0.10100284945736228, "grad_norm": 2.53125, "learning_rate": 4.881766608614238e-05, "loss": 0.8066, "step": 5698 }, { "epoch": 0.10103830149297385, "grad_norm": 3.03125, "learning_rate": 4.881681722180004e-05, "loss": 0.8149, "step": 5700 }, { "epoch": 0.10107375352858541, "grad_norm": 2.828125, "learning_rate": 4.881596806022878e-05, "loss": 0.7993, "step": 5702 }, { "epoch": 0.10110920556419699, "grad_norm": 2.796875, "learning_rate": 4.881511860143919e-05, "loss": 0.7886, "step": 5704 }, { "epoch": 0.10114465759980856, "grad_norm": 2.65625, "learning_rate": 4.881426884544189e-05, "loss": 0.8257, "step": 5706 }, { "epoch": 0.10118010963542012, "grad_norm": 2.734375, "learning_rate": 4.881341879224747e-05, "loss": 0.8037, "step": 5708 }, { "epoch": 0.1012155616710317, "grad_norm": 2.484375, "learning_rate": 4.881256844186655e-05, "loss": 0.8028, "step": 5710 }, { "epoch": 0.10125101370664327, "grad_norm": 3.25, "learning_rate": 4.881171779430973e-05, "loss": 0.8512, "step": 5712 }, { "epoch": 0.10128646574225483, "grad_norm": 2.578125, "learning_rate": 4.881086684958763e-05, "loss": 0.7972, "step": 5714 }, { "epoch": 0.10132191777786641, "grad_norm": 3.25, "learning_rate": 4.881001560771087e-05, "loss": 0.8728, "step": 5716 }, { "epoch": 0.10135736981347798, "grad_norm": 2.90625, "learning_rate": 4.8809164068690084e-05, "loss": 0.8728, "step": 5718 }, { "epoch": 0.10139282184908954, "grad_norm": 3.03125, "learning_rate": 4.880831223253588e-05, "loss": 0.8824, "step": 5720 }, { "epoch": 0.10142827388470112, "grad_norm": 2.71875, "learning_rate": 4.8807460099258906e-05, "loss": 0.8078, "step": 5722 }, { "epoch": 0.10146372592031269, "grad_norm": 2.828125, "learning_rate": 4.8806607668869786e-05, "loss": 0.8533, "step": 5724 }, { "epoch": 0.10149917795592425, "grad_norm": 2.765625, "learning_rate": 4.880575494137916e-05, "loss": 0.8264, "step": 5726 }, { "epoch": 0.10153462999153583, "grad_norm": 2.515625, "learning_rate": 4.880490191679767e-05, "loss": 0.8368, "step": 5728 }, { "epoch": 0.1015700820271474, "grad_norm": 2.578125, "learning_rate": 4.880404859513596e-05, "loss": 0.8025, "step": 5730 }, { "epoch": 0.10160553406275896, "grad_norm": 2.46875, "learning_rate": 4.8803194976404685e-05, "loss": 0.7964, "step": 5732 }, { "epoch": 0.10164098609837054, "grad_norm": 2.65625, "learning_rate": 4.8802341060614495e-05, "loss": 0.8358, "step": 5734 }, { "epoch": 0.10167643813398211, "grad_norm": 2.921875, "learning_rate": 4.8801486847776044e-05, "loss": 0.8685, "step": 5736 }, { "epoch": 0.10171189016959367, "grad_norm": 2.671875, "learning_rate": 4.880063233789999e-05, "loss": 0.8414, "step": 5738 }, { "epoch": 0.10174734220520525, "grad_norm": 2.484375, "learning_rate": 4.8799777530997017e-05, "loss": 0.8109, "step": 5740 }, { "epoch": 0.10178279424081682, "grad_norm": 2.6875, "learning_rate": 4.8798922427077764e-05, "loss": 0.8377, "step": 5742 }, { "epoch": 0.10181824627642838, "grad_norm": 2.65625, "learning_rate": 4.8798067026152914e-05, "loss": 0.8486, "step": 5744 }, { "epoch": 0.10185369831203996, "grad_norm": 2.78125, "learning_rate": 4.879721132823315e-05, "loss": 0.8192, "step": 5746 }, { "epoch": 0.10188915034765153, "grad_norm": 2.890625, "learning_rate": 4.8796355333329145e-05, "loss": 0.8028, "step": 5748 }, { "epoch": 0.10192460238326309, "grad_norm": 2.734375, "learning_rate": 4.8795499041451585e-05, "loss": 0.8343, "step": 5750 }, { "epoch": 0.10196005441887467, "grad_norm": 2.875, "learning_rate": 4.879464245261115e-05, "loss": 0.8285, "step": 5752 }, { "epoch": 0.10199550645448624, "grad_norm": 2.578125, "learning_rate": 4.879378556681854e-05, "loss": 0.8591, "step": 5754 }, { "epoch": 0.1020309584900978, "grad_norm": 2.65625, "learning_rate": 4.8792928384084435e-05, "loss": 0.8476, "step": 5756 }, { "epoch": 0.10206641052570938, "grad_norm": 2.59375, "learning_rate": 4.879207090441954e-05, "loss": 0.8165, "step": 5758 }, { "epoch": 0.10210186256132095, "grad_norm": 2.921875, "learning_rate": 4.8791213127834555e-05, "loss": 0.8621, "step": 5760 }, { "epoch": 0.10213731459693251, "grad_norm": 2.921875, "learning_rate": 4.879035505434019e-05, "loss": 0.8944, "step": 5762 }, { "epoch": 0.10217276663254408, "grad_norm": 3.109375, "learning_rate": 4.878949668394714e-05, "loss": 0.815, "step": 5764 }, { "epoch": 0.10220821866815565, "grad_norm": 3.140625, "learning_rate": 4.878863801666613e-05, "loss": 0.8699, "step": 5766 }, { "epoch": 0.10224367070376722, "grad_norm": 2.625, "learning_rate": 4.8787779052507874e-05, "loss": 0.8312, "step": 5768 }, { "epoch": 0.1022791227393788, "grad_norm": 2.515625, "learning_rate": 4.8786919791483094e-05, "loss": 0.8142, "step": 5770 }, { "epoch": 0.10231457477499036, "grad_norm": 2.828125, "learning_rate": 4.8786060233602506e-05, "loss": 0.8463, "step": 5772 }, { "epoch": 0.10235002681060193, "grad_norm": 2.734375, "learning_rate": 4.8785200378876836e-05, "loss": 0.8769, "step": 5774 }, { "epoch": 0.1023854788462135, "grad_norm": 2.875, "learning_rate": 4.8784340227316825e-05, "loss": 0.8068, "step": 5776 }, { "epoch": 0.10242093088182506, "grad_norm": 2.515625, "learning_rate": 4.8783479778933207e-05, "loss": 0.8446, "step": 5778 }, { "epoch": 0.10245638291743664, "grad_norm": 2.90625, "learning_rate": 4.878261903373671e-05, "loss": 0.8235, "step": 5780 }, { "epoch": 0.10249183495304821, "grad_norm": 2.75, "learning_rate": 4.8781757991738074e-05, "loss": 0.8487, "step": 5782 }, { "epoch": 0.10252728698865977, "grad_norm": 2.875, "learning_rate": 4.8780896652948054e-05, "loss": 0.8151, "step": 5784 }, { "epoch": 0.10256273902427135, "grad_norm": 2.96875, "learning_rate": 4.87800350173774e-05, "loss": 0.8187, "step": 5786 }, { "epoch": 0.10259819105988292, "grad_norm": 2.765625, "learning_rate": 4.8779173085036865e-05, "loss": 0.8441, "step": 5788 }, { "epoch": 0.10263364309549448, "grad_norm": 2.6875, "learning_rate": 4.87783108559372e-05, "loss": 0.8383, "step": 5790 }, { "epoch": 0.10266909513110606, "grad_norm": 2.953125, "learning_rate": 4.877744833008917e-05, "loss": 0.8315, "step": 5792 }, { "epoch": 0.10270454716671763, "grad_norm": 2.859375, "learning_rate": 4.877658550750353e-05, "loss": 0.8346, "step": 5794 }, { "epoch": 0.1027399992023292, "grad_norm": 2.609375, "learning_rate": 4.877572238819106e-05, "loss": 0.8366, "step": 5796 }, { "epoch": 0.10277545123794077, "grad_norm": 2.9375, "learning_rate": 4.877485897216253e-05, "loss": 0.8602, "step": 5798 }, { "epoch": 0.10281090327355234, "grad_norm": 2.515625, "learning_rate": 4.877399525942871e-05, "loss": 0.8087, "step": 5800 }, { "epoch": 0.1028463553091639, "grad_norm": 2.515625, "learning_rate": 4.877313125000038e-05, "loss": 0.8278, "step": 5802 }, { "epoch": 0.10288180734477548, "grad_norm": 3.03125, "learning_rate": 4.877226694388832e-05, "loss": 0.8435, "step": 5804 }, { "epoch": 0.10291725938038705, "grad_norm": 3.0, "learning_rate": 4.877140234110333e-05, "loss": 0.8759, "step": 5806 }, { "epoch": 0.10295271141599861, "grad_norm": 2.671875, "learning_rate": 4.8770537441656184e-05, "loss": 0.8405, "step": 5808 }, { "epoch": 0.10298816345161019, "grad_norm": 2.6875, "learning_rate": 4.876967224555768e-05, "loss": 0.8133, "step": 5810 }, { "epoch": 0.10302361548722176, "grad_norm": 2.734375, "learning_rate": 4.876880675281862e-05, "loss": 0.8224, "step": 5812 }, { "epoch": 0.10305906752283332, "grad_norm": 2.625, "learning_rate": 4.87679409634498e-05, "loss": 0.8069, "step": 5814 }, { "epoch": 0.1030945195584449, "grad_norm": 2.578125, "learning_rate": 4.876707487746203e-05, "loss": 0.8132, "step": 5816 }, { "epoch": 0.10312997159405647, "grad_norm": 2.828125, "learning_rate": 4.8766208494866114e-05, "loss": 0.8216, "step": 5818 }, { "epoch": 0.10316542362966803, "grad_norm": 3.109375, "learning_rate": 4.8765341815672865e-05, "loss": 0.8498, "step": 5820 }, { "epoch": 0.1032008756652796, "grad_norm": 3.15625, "learning_rate": 4.87644748398931e-05, "loss": 0.8971, "step": 5822 }, { "epoch": 0.10323632770089118, "grad_norm": 2.6875, "learning_rate": 4.876360756753764e-05, "loss": 0.8124, "step": 5824 }, { "epoch": 0.10327177973650274, "grad_norm": 2.96875, "learning_rate": 4.876273999861731e-05, "loss": 0.8198, "step": 5826 }, { "epoch": 0.10330723177211432, "grad_norm": 2.671875, "learning_rate": 4.8761872133142935e-05, "loss": 0.8254, "step": 5828 }, { "epoch": 0.10334268380772589, "grad_norm": 2.703125, "learning_rate": 4.876100397112534e-05, "loss": 0.8737, "step": 5830 }, { "epoch": 0.10337813584333745, "grad_norm": 2.59375, "learning_rate": 4.8760135512575364e-05, "loss": 0.8547, "step": 5832 }, { "epoch": 0.10341358787894903, "grad_norm": 2.59375, "learning_rate": 4.8759266757503855e-05, "loss": 0.8565, "step": 5834 }, { "epoch": 0.1034490399145606, "grad_norm": 3.078125, "learning_rate": 4.875839770592163e-05, "loss": 0.8471, "step": 5836 }, { "epoch": 0.10348449195017216, "grad_norm": 2.6875, "learning_rate": 4.8757528357839564e-05, "loss": 0.8266, "step": 5838 }, { "epoch": 0.10351994398578374, "grad_norm": 2.734375, "learning_rate": 4.8756658713268486e-05, "loss": 0.8161, "step": 5840 }, { "epoch": 0.10355539602139531, "grad_norm": 2.5, "learning_rate": 4.8755788772219256e-05, "loss": 0.8541, "step": 5842 }, { "epoch": 0.10359084805700687, "grad_norm": 3.015625, "learning_rate": 4.8754918534702733e-05, "loss": 0.8992, "step": 5844 }, { "epoch": 0.10362630009261845, "grad_norm": 2.78125, "learning_rate": 4.875404800072977e-05, "loss": 0.8513, "step": 5846 }, { "epoch": 0.10366175212823002, "grad_norm": 2.734375, "learning_rate": 4.875317717031124e-05, "loss": 0.8752, "step": 5848 }, { "epoch": 0.10369720416384158, "grad_norm": 2.765625, "learning_rate": 4.8752306043458e-05, "loss": 0.8375, "step": 5850 }, { "epoch": 0.10373265619945315, "grad_norm": 2.796875, "learning_rate": 4.875143462018094e-05, "loss": 0.8408, "step": 5852 }, { "epoch": 0.10376810823506472, "grad_norm": 2.703125, "learning_rate": 4.875056290049091e-05, "loss": 0.8534, "step": 5854 }, { "epoch": 0.10380356027067629, "grad_norm": 2.6875, "learning_rate": 4.87496908843988e-05, "loss": 0.7775, "step": 5856 }, { "epoch": 0.10383901230628786, "grad_norm": 2.90625, "learning_rate": 4.874881857191551e-05, "loss": 0.8615, "step": 5858 }, { "epoch": 0.10387446434189943, "grad_norm": 2.71875, "learning_rate": 4.87479459630519e-05, "loss": 0.8127, "step": 5860 }, { "epoch": 0.103909916377511, "grad_norm": 2.53125, "learning_rate": 4.874707305781887e-05, "loss": 0.8257, "step": 5862 }, { "epoch": 0.10394536841312257, "grad_norm": 2.578125, "learning_rate": 4.8746199856227315e-05, "loss": 0.772, "step": 5864 }, { "epoch": 0.10398082044873413, "grad_norm": 2.71875, "learning_rate": 4.8745326358288133e-05, "loss": 0.8003, "step": 5866 }, { "epoch": 0.10401627248434571, "grad_norm": 2.734375, "learning_rate": 4.874445256401223e-05, "loss": 0.8276, "step": 5868 }, { "epoch": 0.10405172451995728, "grad_norm": 2.96875, "learning_rate": 4.87435784734105e-05, "loss": 0.8251, "step": 5870 }, { "epoch": 0.10408717655556884, "grad_norm": 2.515625, "learning_rate": 4.874270408649385e-05, "loss": 0.7982, "step": 5872 }, { "epoch": 0.10412262859118042, "grad_norm": 3.015625, "learning_rate": 4.874182940327321e-05, "loss": 0.8599, "step": 5874 }, { "epoch": 0.104158080626792, "grad_norm": 2.75, "learning_rate": 4.874095442375948e-05, "loss": 0.8527, "step": 5876 }, { "epoch": 0.10419353266240355, "grad_norm": 2.703125, "learning_rate": 4.874007914796358e-05, "loss": 0.8557, "step": 5878 }, { "epoch": 0.10422898469801513, "grad_norm": 2.515625, "learning_rate": 4.873920357589644e-05, "loss": 0.8352, "step": 5880 }, { "epoch": 0.1042644367336267, "grad_norm": 2.953125, "learning_rate": 4.8738327707568974e-05, "loss": 0.8541, "step": 5882 }, { "epoch": 0.10429988876923826, "grad_norm": 2.78125, "learning_rate": 4.8737451542992136e-05, "loss": 0.8092, "step": 5884 }, { "epoch": 0.10433534080484984, "grad_norm": 2.59375, "learning_rate": 4.873657508217684e-05, "loss": 0.8368, "step": 5886 }, { "epoch": 0.10437079284046141, "grad_norm": 2.625, "learning_rate": 4.873569832513403e-05, "loss": 0.8305, "step": 5888 }, { "epoch": 0.10440624487607297, "grad_norm": 2.9375, "learning_rate": 4.8734821271874656e-05, "loss": 0.833, "step": 5890 }, { "epoch": 0.10444169691168455, "grad_norm": 2.875, "learning_rate": 4.873394392240965e-05, "loss": 0.8522, "step": 5892 }, { "epoch": 0.10447714894729612, "grad_norm": 2.796875, "learning_rate": 4.873306627674997e-05, "loss": 0.8828, "step": 5894 }, { "epoch": 0.10451260098290768, "grad_norm": 2.6875, "learning_rate": 4.873218833490656e-05, "loss": 0.817, "step": 5896 }, { "epoch": 0.10454805301851926, "grad_norm": 2.734375, "learning_rate": 4.873131009689039e-05, "loss": 0.82, "step": 5898 }, { "epoch": 0.10458350505413083, "grad_norm": 2.5, "learning_rate": 4.873043156271241e-05, "loss": 0.8608, "step": 5900 }, { "epoch": 0.10461895708974239, "grad_norm": 2.546875, "learning_rate": 4.8729552732383586e-05, "loss": 0.8599, "step": 5902 }, { "epoch": 0.10465440912535397, "grad_norm": 2.703125, "learning_rate": 4.872867360591489e-05, "loss": 0.8522, "step": 5904 }, { "epoch": 0.10468986116096554, "grad_norm": 2.78125, "learning_rate": 4.872779418331729e-05, "loss": 0.8401, "step": 5906 }, { "epoch": 0.1047253131965771, "grad_norm": 2.9375, "learning_rate": 4.872691446460176e-05, "loss": 0.8433, "step": 5908 }, { "epoch": 0.10476076523218868, "grad_norm": 3.0, "learning_rate": 4.872603444977927e-05, "loss": 0.7944, "step": 5910 }, { "epoch": 0.10479621726780025, "grad_norm": 2.765625, "learning_rate": 4.8725154138860826e-05, "loss": 0.839, "step": 5912 }, { "epoch": 0.10483166930341181, "grad_norm": 2.5625, "learning_rate": 4.872427353185739e-05, "loss": 0.8284, "step": 5914 }, { "epoch": 0.10486712133902339, "grad_norm": 2.953125, "learning_rate": 4.872339262877996e-05, "loss": 0.8338, "step": 5916 }, { "epoch": 0.10490257337463496, "grad_norm": 2.765625, "learning_rate": 4.8722511429639536e-05, "loss": 0.826, "step": 5918 }, { "epoch": 0.10493802541024652, "grad_norm": 2.796875, "learning_rate": 4.872162993444712e-05, "loss": 0.8543, "step": 5920 }, { "epoch": 0.1049734774458581, "grad_norm": 2.6875, "learning_rate": 4.872074814321369e-05, "loss": 0.8229, "step": 5922 }, { "epoch": 0.10500892948146967, "grad_norm": 2.765625, "learning_rate": 4.871986605595027e-05, "loss": 0.7872, "step": 5924 }, { "epoch": 0.10504438151708123, "grad_norm": 2.5, "learning_rate": 4.871898367266785e-05, "loss": 0.7904, "step": 5926 }, { "epoch": 0.1050798335526928, "grad_norm": 2.78125, "learning_rate": 4.871810099337747e-05, "loss": 0.8301, "step": 5928 }, { "epoch": 0.10511528558830438, "grad_norm": 2.703125, "learning_rate": 4.871721801809013e-05, "loss": 0.8046, "step": 5930 }, { "epoch": 0.10515073762391594, "grad_norm": 2.609375, "learning_rate": 4.871633474681684e-05, "loss": 0.8217, "step": 5932 }, { "epoch": 0.10518618965952752, "grad_norm": 2.609375, "learning_rate": 4.871545117956863e-05, "loss": 0.8079, "step": 5934 }, { "epoch": 0.10522164169513908, "grad_norm": 3.078125, "learning_rate": 4.871456731635653e-05, "loss": 0.8503, "step": 5936 }, { "epoch": 0.10525709373075065, "grad_norm": 2.71875, "learning_rate": 4.871368315719158e-05, "loss": 0.8112, "step": 5938 }, { "epoch": 0.10529254576636222, "grad_norm": 2.609375, "learning_rate": 4.871279870208479e-05, "loss": 0.8458, "step": 5940 }, { "epoch": 0.10532799780197379, "grad_norm": 2.765625, "learning_rate": 4.8711913951047224e-05, "loss": 0.8309, "step": 5942 }, { "epoch": 0.10536344983758536, "grad_norm": 2.78125, "learning_rate": 4.8711028904089905e-05, "loss": 0.775, "step": 5944 }, { "epoch": 0.10539890187319693, "grad_norm": 2.53125, "learning_rate": 4.871014356122388e-05, "loss": 0.8295, "step": 5946 }, { "epoch": 0.1054343539088085, "grad_norm": 2.65625, "learning_rate": 4.870925792246021e-05, "loss": 0.8225, "step": 5948 }, { "epoch": 0.10546980594442007, "grad_norm": 2.8125, "learning_rate": 4.870837198780993e-05, "loss": 0.8658, "step": 5950 }, { "epoch": 0.10550525798003164, "grad_norm": 2.578125, "learning_rate": 4.870748575728411e-05, "loss": 0.8338, "step": 5952 }, { "epoch": 0.1055407100156432, "grad_norm": 2.84375, "learning_rate": 4.8706599230893805e-05, "loss": 0.8351, "step": 5954 }, { "epoch": 0.10557616205125478, "grad_norm": 2.703125, "learning_rate": 4.8705712408650086e-05, "loss": 0.8041, "step": 5956 }, { "epoch": 0.10561161408686635, "grad_norm": 2.765625, "learning_rate": 4.8704825290564004e-05, "loss": 0.8343, "step": 5958 }, { "epoch": 0.10564706612247791, "grad_norm": 2.59375, "learning_rate": 4.870393787664664e-05, "loss": 0.8447, "step": 5960 }, { "epoch": 0.10568251815808949, "grad_norm": 2.625, "learning_rate": 4.870305016690908e-05, "loss": 0.852, "step": 5962 }, { "epoch": 0.10571797019370106, "grad_norm": 2.875, "learning_rate": 4.870216216136238e-05, "loss": 0.8384, "step": 5964 }, { "epoch": 0.10575342222931262, "grad_norm": 2.875, "learning_rate": 4.870127386001764e-05, "loss": 0.8305, "step": 5966 }, { "epoch": 0.1057888742649242, "grad_norm": 2.625, "learning_rate": 4.870038526288593e-05, "loss": 0.8172, "step": 5968 }, { "epoch": 0.10582432630053577, "grad_norm": 2.609375, "learning_rate": 4.8699496369978346e-05, "loss": 0.8577, "step": 5970 }, { "epoch": 0.10585977833614733, "grad_norm": 2.90625, "learning_rate": 4.8698607181306e-05, "loss": 0.8757, "step": 5972 }, { "epoch": 0.10589523037175891, "grad_norm": 2.75, "learning_rate": 4.8697717696879965e-05, "loss": 0.8196, "step": 5974 }, { "epoch": 0.10593068240737048, "grad_norm": 2.796875, "learning_rate": 4.869682791671134e-05, "loss": 0.84, "step": 5976 }, { "epoch": 0.10596613444298204, "grad_norm": 2.8125, "learning_rate": 4.869593784081124e-05, "loss": 0.82, "step": 5978 }, { "epoch": 0.10600158647859362, "grad_norm": 2.703125, "learning_rate": 4.869504746919078e-05, "loss": 0.8291, "step": 5980 }, { "epoch": 0.10603703851420519, "grad_norm": 2.9375, "learning_rate": 4.8694156801861065e-05, "loss": 0.7903, "step": 5982 }, { "epoch": 0.10607249054981675, "grad_norm": 2.59375, "learning_rate": 4.86932658388332e-05, "loss": 0.7952, "step": 5984 }, { "epoch": 0.10610794258542833, "grad_norm": 3.0625, "learning_rate": 4.8692374580118314e-05, "loss": 0.8418, "step": 5986 }, { "epoch": 0.1061433946210399, "grad_norm": 2.578125, "learning_rate": 4.869148302572753e-05, "loss": 0.8257, "step": 5988 }, { "epoch": 0.10617884665665146, "grad_norm": 2.84375, "learning_rate": 4.869059117567198e-05, "loss": 0.8086, "step": 5990 }, { "epoch": 0.10621429869226304, "grad_norm": 2.921875, "learning_rate": 4.8689699029962774e-05, "loss": 0.8294, "step": 5992 }, { "epoch": 0.10624975072787461, "grad_norm": 2.953125, "learning_rate": 4.868880658861106e-05, "loss": 0.8258, "step": 5994 }, { "epoch": 0.10628520276348617, "grad_norm": 2.671875, "learning_rate": 4.868791385162797e-05, "loss": 0.8312, "step": 5996 }, { "epoch": 0.10632065479909775, "grad_norm": 2.703125, "learning_rate": 4.868702081902466e-05, "loss": 0.8579, "step": 5998 }, { "epoch": 0.10635610683470932, "grad_norm": 2.953125, "learning_rate": 4.868612749081226e-05, "loss": 0.8733, "step": 6000 }, { "epoch": 0.10639155887032088, "grad_norm": 2.65625, "learning_rate": 4.868523386700192e-05, "loss": 0.8503, "step": 6002 }, { "epoch": 0.10642701090593246, "grad_norm": 2.5, "learning_rate": 4.86843399476048e-05, "loss": 0.7982, "step": 6004 }, { "epoch": 0.10646246294154403, "grad_norm": 3.0, "learning_rate": 4.8683445732632046e-05, "loss": 0.8144, "step": 6006 }, { "epoch": 0.10649791497715559, "grad_norm": 2.5625, "learning_rate": 4.868255122209482e-05, "loss": 0.8306, "step": 6008 }, { "epoch": 0.10653336701276717, "grad_norm": 2.53125, "learning_rate": 4.868165641600429e-05, "loss": 0.7931, "step": 6010 }, { "epoch": 0.10656881904837874, "grad_norm": 2.53125, "learning_rate": 4.868076131437162e-05, "loss": 0.8033, "step": 6012 }, { "epoch": 0.1066042710839903, "grad_norm": 2.84375, "learning_rate": 4.8679865917207986e-05, "loss": 0.805, "step": 6014 }, { "epoch": 0.10663972311960188, "grad_norm": 2.625, "learning_rate": 4.867897022452455e-05, "loss": 0.8566, "step": 6016 }, { "epoch": 0.10667517515521345, "grad_norm": 2.640625, "learning_rate": 4.8678074236332505e-05, "loss": 0.8544, "step": 6018 }, { "epoch": 0.10671062719082501, "grad_norm": 3.046875, "learning_rate": 4.867717795264301e-05, "loss": 0.7666, "step": 6020 }, { "epoch": 0.10674607922643659, "grad_norm": 2.734375, "learning_rate": 4.8676281373467284e-05, "loss": 0.8344, "step": 6022 }, { "epoch": 0.10678153126204815, "grad_norm": 3.0, "learning_rate": 4.867538449881649e-05, "loss": 0.8397, "step": 6024 }, { "epoch": 0.10681698329765972, "grad_norm": 2.84375, "learning_rate": 4.8674487328701836e-05, "loss": 0.8351, "step": 6026 }, { "epoch": 0.1068524353332713, "grad_norm": 2.90625, "learning_rate": 4.86735898631345e-05, "loss": 0.8393, "step": 6028 }, { "epoch": 0.10688788736888286, "grad_norm": 2.671875, "learning_rate": 4.8672692102125696e-05, "loss": 0.8348, "step": 6030 }, { "epoch": 0.10692333940449443, "grad_norm": 2.828125, "learning_rate": 4.867179404568663e-05, "loss": 0.8175, "step": 6032 }, { "epoch": 0.106958791440106, "grad_norm": 2.5, "learning_rate": 4.86708956938285e-05, "loss": 0.8695, "step": 6034 }, { "epoch": 0.10699424347571757, "grad_norm": 2.65625, "learning_rate": 4.8669997046562524e-05, "loss": 0.8068, "step": 6036 }, { "epoch": 0.10702969551132914, "grad_norm": 2.8125, "learning_rate": 4.866909810389991e-05, "loss": 0.8816, "step": 6038 }, { "epoch": 0.10706514754694071, "grad_norm": 2.765625, "learning_rate": 4.866819886585189e-05, "loss": 0.8582, "step": 6040 }, { "epoch": 0.10710059958255227, "grad_norm": 2.578125, "learning_rate": 4.866729933242968e-05, "loss": 0.7931, "step": 6042 }, { "epoch": 0.10713605161816385, "grad_norm": 3.0, "learning_rate": 4.866639950364449e-05, "loss": 0.8141, "step": 6044 }, { "epoch": 0.10717150365377542, "grad_norm": 2.828125, "learning_rate": 4.866549937950757e-05, "loss": 0.8202, "step": 6046 }, { "epoch": 0.10720695568938698, "grad_norm": 2.65625, "learning_rate": 4.8664598960030154e-05, "loss": 0.8198, "step": 6048 }, { "epoch": 0.10724240772499856, "grad_norm": 2.828125, "learning_rate": 4.8663698245223466e-05, "loss": 0.7826, "step": 6050 }, { "epoch": 0.10727785976061013, "grad_norm": 2.59375, "learning_rate": 4.8662797235098754e-05, "loss": 0.8455, "step": 6052 }, { "epoch": 0.1073133117962217, "grad_norm": 2.734375, "learning_rate": 4.866189592966726e-05, "loss": 0.8529, "step": 6054 }, { "epoch": 0.10734876383183327, "grad_norm": 2.578125, "learning_rate": 4.8660994328940235e-05, "loss": 0.809, "step": 6056 }, { "epoch": 0.10738421586744484, "grad_norm": 2.625, "learning_rate": 4.866009243292893e-05, "loss": 0.8287, "step": 6058 }, { "epoch": 0.1074196679030564, "grad_norm": 2.546875, "learning_rate": 4.86591902416446e-05, "loss": 0.8295, "step": 6060 }, { "epoch": 0.10745511993866798, "grad_norm": 3.03125, "learning_rate": 4.86582877550985e-05, "loss": 0.8268, "step": 6062 }, { "epoch": 0.10749057197427955, "grad_norm": 2.296875, "learning_rate": 4.865738497330189e-05, "loss": 0.8194, "step": 6064 }, { "epoch": 0.10752602400989111, "grad_norm": 2.71875, "learning_rate": 4.865648189626605e-05, "loss": 0.8585, "step": 6066 }, { "epoch": 0.10756147604550269, "grad_norm": 2.75, "learning_rate": 4.865557852400225e-05, "loss": 0.8076, "step": 6068 }, { "epoch": 0.10759692808111426, "grad_norm": 3.171875, "learning_rate": 4.8654674856521745e-05, "loss": 0.7988, "step": 6070 }, { "epoch": 0.10763238011672582, "grad_norm": 2.921875, "learning_rate": 4.865377089383584e-05, "loss": 0.8241, "step": 6072 }, { "epoch": 0.1076678321523374, "grad_norm": 2.71875, "learning_rate": 4.865286663595578e-05, "loss": 0.8282, "step": 6074 }, { "epoch": 0.10770328418794897, "grad_norm": 2.375, "learning_rate": 4.865196208289289e-05, "loss": 0.8366, "step": 6076 }, { "epoch": 0.10773873622356053, "grad_norm": 2.8125, "learning_rate": 4.865105723465843e-05, "loss": 0.8635, "step": 6078 }, { "epoch": 0.10777418825917211, "grad_norm": 2.59375, "learning_rate": 4.86501520912637e-05, "loss": 0.7817, "step": 6080 }, { "epoch": 0.10780964029478368, "grad_norm": 2.640625, "learning_rate": 4.8649246652720005e-05, "loss": 0.8349, "step": 6082 }, { "epoch": 0.10784509233039524, "grad_norm": 3.046875, "learning_rate": 4.864834091903864e-05, "loss": 0.8655, "step": 6084 }, { "epoch": 0.10788054436600682, "grad_norm": 2.671875, "learning_rate": 4.864743489023089e-05, "loss": 0.7774, "step": 6086 }, { "epoch": 0.10791599640161839, "grad_norm": 2.828125, "learning_rate": 4.8646528566308094e-05, "loss": 0.8279, "step": 6088 }, { "epoch": 0.10795144843722995, "grad_norm": 2.578125, "learning_rate": 4.864562194728154e-05, "loss": 0.8073, "step": 6090 }, { "epoch": 0.10798690047284153, "grad_norm": 2.6875, "learning_rate": 4.8644715033162546e-05, "loss": 0.8185, "step": 6092 }, { "epoch": 0.1080223525084531, "grad_norm": 2.75, "learning_rate": 4.864380782396244e-05, "loss": 0.8483, "step": 6094 }, { "epoch": 0.10805780454406466, "grad_norm": 2.703125, "learning_rate": 4.8642900319692536e-05, "loss": 0.8436, "step": 6096 }, { "epoch": 0.10809325657967624, "grad_norm": 2.984375, "learning_rate": 4.864199252036415e-05, "loss": 0.8808, "step": 6098 }, { "epoch": 0.10812870861528781, "grad_norm": 2.84375, "learning_rate": 4.864108442598864e-05, "loss": 0.8295, "step": 6100 }, { "epoch": 0.10816416065089937, "grad_norm": 3.0625, "learning_rate": 4.864017603657731e-05, "loss": 0.839, "step": 6102 }, { "epoch": 0.10819961268651095, "grad_norm": 2.671875, "learning_rate": 4.86392673521415e-05, "loss": 0.8202, "step": 6104 }, { "epoch": 0.1082350647221225, "grad_norm": 3.015625, "learning_rate": 4.863835837269257e-05, "loss": 0.8195, "step": 6106 }, { "epoch": 0.10827051675773408, "grad_norm": 2.828125, "learning_rate": 4.863744909824185e-05, "loss": 0.7983, "step": 6108 }, { "epoch": 0.10830596879334566, "grad_norm": 3.03125, "learning_rate": 4.8636539528800685e-05, "loss": 0.8431, "step": 6110 }, { "epoch": 0.10834142082895722, "grad_norm": 2.625, "learning_rate": 4.863562966438042e-05, "loss": 0.8398, "step": 6112 }, { "epoch": 0.10837687286456879, "grad_norm": 2.59375, "learning_rate": 4.863471950499243e-05, "loss": 0.8235, "step": 6114 }, { "epoch": 0.10841232490018037, "grad_norm": 3.09375, "learning_rate": 4.8633809050648064e-05, "loss": 0.8191, "step": 6116 }, { "epoch": 0.10844777693579193, "grad_norm": 2.734375, "learning_rate": 4.8632898301358684e-05, "loss": 0.81, "step": 6118 }, { "epoch": 0.1084832289714035, "grad_norm": 2.828125, "learning_rate": 4.863198725713565e-05, "loss": 0.7965, "step": 6120 }, { "epoch": 0.10851868100701507, "grad_norm": 2.78125, "learning_rate": 4.863107591799034e-05, "loss": 0.805, "step": 6122 }, { "epoch": 0.10855413304262664, "grad_norm": 2.671875, "learning_rate": 4.863016428393413e-05, "loss": 0.8054, "step": 6124 }, { "epoch": 0.10858958507823821, "grad_norm": 2.765625, "learning_rate": 4.862925235497839e-05, "loss": 0.7906, "step": 6126 }, { "epoch": 0.10862503711384978, "grad_norm": 2.90625, "learning_rate": 4.8628340131134496e-05, "loss": 0.8641, "step": 6128 }, { "epoch": 0.10866048914946135, "grad_norm": 2.703125, "learning_rate": 4.862742761241384e-05, "loss": 0.8086, "step": 6130 }, { "epoch": 0.10869594118507292, "grad_norm": 2.796875, "learning_rate": 4.862651479882782e-05, "loss": 0.8464, "step": 6132 }, { "epoch": 0.1087313932206845, "grad_norm": 2.734375, "learning_rate": 4.8625601690387804e-05, "loss": 0.8145, "step": 6134 }, { "epoch": 0.10876684525629605, "grad_norm": 2.671875, "learning_rate": 4.8624688287105195e-05, "loss": 0.8007, "step": 6136 }, { "epoch": 0.10880229729190763, "grad_norm": 2.78125, "learning_rate": 4.86237745889914e-05, "loss": 0.8283, "step": 6138 }, { "epoch": 0.1088377493275192, "grad_norm": 2.5625, "learning_rate": 4.8622860596057826e-05, "loss": 0.7944, "step": 6140 }, { "epoch": 0.10887320136313076, "grad_norm": 2.765625, "learning_rate": 4.862194630831587e-05, "loss": 0.8282, "step": 6142 }, { "epoch": 0.10890865339874234, "grad_norm": 2.640625, "learning_rate": 4.862103172577695e-05, "loss": 0.8247, "step": 6144 }, { "epoch": 0.10894410543435391, "grad_norm": 2.8125, "learning_rate": 4.862011684845246e-05, "loss": 0.8667, "step": 6146 }, { "epoch": 0.10897955746996547, "grad_norm": 2.53125, "learning_rate": 4.861920167635384e-05, "loss": 0.8452, "step": 6148 }, { "epoch": 0.10901500950557705, "grad_norm": 2.921875, "learning_rate": 4.86182862094925e-05, "loss": 0.7993, "step": 6150 }, { "epoch": 0.10905046154118862, "grad_norm": 2.84375, "learning_rate": 4.861737044787987e-05, "loss": 0.8357, "step": 6152 }, { "epoch": 0.10908591357680018, "grad_norm": 2.5625, "learning_rate": 4.861645439152738e-05, "loss": 0.8498, "step": 6154 }, { "epoch": 0.10912136561241176, "grad_norm": 2.78125, "learning_rate": 4.8615538040446446e-05, "loss": 0.8273, "step": 6156 }, { "epoch": 0.10915681764802333, "grad_norm": 2.71875, "learning_rate": 4.8614621394648525e-05, "loss": 0.8296, "step": 6158 }, { "epoch": 0.1091922696836349, "grad_norm": 2.71875, "learning_rate": 4.8613704454145045e-05, "loss": 0.8243, "step": 6160 }, { "epoch": 0.10922772171924647, "grad_norm": 3.015625, "learning_rate": 4.8612787218947454e-05, "loss": 0.8467, "step": 6162 }, { "epoch": 0.10926317375485804, "grad_norm": 2.625, "learning_rate": 4.8611869689067194e-05, "loss": 0.8519, "step": 6164 }, { "epoch": 0.1092986257904696, "grad_norm": 2.6875, "learning_rate": 4.861095186451572e-05, "loss": 0.8219, "step": 6166 }, { "epoch": 0.10933407782608118, "grad_norm": 2.3125, "learning_rate": 4.861003374530448e-05, "loss": 0.8044, "step": 6168 }, { "epoch": 0.10936952986169275, "grad_norm": 2.65625, "learning_rate": 4.860911533144494e-05, "loss": 0.7915, "step": 6170 }, { "epoch": 0.10940498189730431, "grad_norm": 3.109375, "learning_rate": 4.8608196622948554e-05, "loss": 0.8269, "step": 6172 }, { "epoch": 0.10944043393291589, "grad_norm": 2.859375, "learning_rate": 4.8607277619826796e-05, "loss": 0.838, "step": 6174 }, { "epoch": 0.10947588596852746, "grad_norm": 2.78125, "learning_rate": 4.860635832209113e-05, "loss": 0.803, "step": 6176 }, { "epoch": 0.10951133800413902, "grad_norm": 2.859375, "learning_rate": 4.860543872975303e-05, "loss": 0.8131, "step": 6178 }, { "epoch": 0.1095467900397506, "grad_norm": 2.890625, "learning_rate": 4.8604518842823974e-05, "loss": 0.8283, "step": 6180 }, { "epoch": 0.10958224207536217, "grad_norm": 2.515625, "learning_rate": 4.860359866131543e-05, "loss": 0.8426, "step": 6182 }, { "epoch": 0.10961769411097373, "grad_norm": 2.578125, "learning_rate": 4.86026781852389e-05, "loss": 0.8239, "step": 6184 }, { "epoch": 0.1096531461465853, "grad_norm": 3.28125, "learning_rate": 4.860175741460585e-05, "loss": 0.8929, "step": 6186 }, { "epoch": 0.10968859818219688, "grad_norm": 2.765625, "learning_rate": 4.8600836349427805e-05, "loss": 0.8083, "step": 6188 }, { "epoch": 0.10972405021780844, "grad_norm": 2.765625, "learning_rate": 4.8599914989716223e-05, "loss": 0.8372, "step": 6190 }, { "epoch": 0.10975950225342002, "grad_norm": 2.9375, "learning_rate": 4.859899333548261e-05, "loss": 0.8588, "step": 6192 }, { "epoch": 0.10979495428903158, "grad_norm": 2.6875, "learning_rate": 4.8598071386738485e-05, "loss": 0.7989, "step": 6194 }, { "epoch": 0.10983040632464315, "grad_norm": 2.453125, "learning_rate": 4.859714914349535e-05, "loss": 0.8067, "step": 6196 }, { "epoch": 0.10986585836025473, "grad_norm": 2.9375, "learning_rate": 4.8596226605764704e-05, "loss": 0.7671, "step": 6198 }, { "epoch": 0.10990131039586629, "grad_norm": 2.859375, "learning_rate": 4.8595303773558064e-05, "loss": 0.8118, "step": 6200 }, { "epoch": 0.10993676243147786, "grad_norm": 2.71875, "learning_rate": 4.8594380646886945e-05, "loss": 0.8167, "step": 6202 }, { "epoch": 0.10997221446708944, "grad_norm": 2.59375, "learning_rate": 4.8593457225762873e-05, "loss": 0.8439, "step": 6204 }, { "epoch": 0.110007666502701, "grad_norm": 2.984375, "learning_rate": 4.859253351019737e-05, "loss": 0.8135, "step": 6206 }, { "epoch": 0.11004311853831257, "grad_norm": 2.71875, "learning_rate": 4.859160950020196e-05, "loss": 0.8585, "step": 6208 }, { "epoch": 0.11007857057392414, "grad_norm": 2.578125, "learning_rate": 4.859068519578818e-05, "loss": 0.8055, "step": 6210 }, { "epoch": 0.1101140226095357, "grad_norm": 3.15625, "learning_rate": 4.8589760596967555e-05, "loss": 0.8373, "step": 6212 }, { "epoch": 0.11014947464514728, "grad_norm": 2.703125, "learning_rate": 4.858883570375163e-05, "loss": 0.805, "step": 6214 }, { "epoch": 0.11018492668075885, "grad_norm": 3.078125, "learning_rate": 4.858791051615196e-05, "loss": 0.8253, "step": 6216 }, { "epoch": 0.11022037871637042, "grad_norm": 2.84375, "learning_rate": 4.8586985034180076e-05, "loss": 0.8373, "step": 6218 }, { "epoch": 0.11025583075198199, "grad_norm": 2.640625, "learning_rate": 4.858605925784753e-05, "loss": 0.833, "step": 6220 }, { "epoch": 0.11029128278759356, "grad_norm": 2.46875, "learning_rate": 4.8585133187165876e-05, "loss": 0.8138, "step": 6222 }, { "epoch": 0.11032673482320512, "grad_norm": 2.609375, "learning_rate": 4.858420682214667e-05, "loss": 0.8439, "step": 6224 }, { "epoch": 0.1103621868588167, "grad_norm": 3.078125, "learning_rate": 4.858328016280148e-05, "loss": 0.8632, "step": 6226 }, { "epoch": 0.11039763889442827, "grad_norm": 2.671875, "learning_rate": 4.858235320914187e-05, "loss": 0.7924, "step": 6228 }, { "epoch": 0.11043309093003983, "grad_norm": 2.6875, "learning_rate": 4.8581425961179396e-05, "loss": 0.7989, "step": 6230 }, { "epoch": 0.11046854296565141, "grad_norm": 2.6875, "learning_rate": 4.858049841892564e-05, "loss": 0.796, "step": 6232 }, { "epoch": 0.11050399500126298, "grad_norm": 2.546875, "learning_rate": 4.8579570582392176e-05, "loss": 0.7959, "step": 6234 }, { "epoch": 0.11053944703687454, "grad_norm": 3.21875, "learning_rate": 4.8578642451590585e-05, "loss": 0.7785, "step": 6236 }, { "epoch": 0.11057489907248612, "grad_norm": 2.796875, "learning_rate": 4.857771402653244e-05, "loss": 0.824, "step": 6238 }, { "epoch": 0.1106103511080977, "grad_norm": 2.515625, "learning_rate": 4.857678530722933e-05, "loss": 0.7858, "step": 6240 }, { "epoch": 0.11064580314370925, "grad_norm": 2.984375, "learning_rate": 4.857585629369287e-05, "loss": 0.8481, "step": 6242 }, { "epoch": 0.11068125517932083, "grad_norm": 2.625, "learning_rate": 4.857492698593462e-05, "loss": 0.8059, "step": 6244 }, { "epoch": 0.1107167072149324, "grad_norm": 2.40625, "learning_rate": 4.85739973839662e-05, "loss": 0.7678, "step": 6246 }, { "epoch": 0.11075215925054396, "grad_norm": 2.765625, "learning_rate": 4.857306748779919e-05, "loss": 0.814, "step": 6248 }, { "epoch": 0.11078761128615554, "grad_norm": 2.578125, "learning_rate": 4.857213729744521e-05, "loss": 0.8241, "step": 6250 }, { "epoch": 0.11082306332176711, "grad_norm": 2.984375, "learning_rate": 4.857120681291587e-05, "loss": 0.8479, "step": 6252 }, { "epoch": 0.11085851535737867, "grad_norm": 2.828125, "learning_rate": 4.8570276034222787e-05, "loss": 0.7965, "step": 6254 }, { "epoch": 0.11089396739299025, "grad_norm": 2.71875, "learning_rate": 4.856934496137756e-05, "loss": 0.8589, "step": 6256 }, { "epoch": 0.11092941942860182, "grad_norm": 2.84375, "learning_rate": 4.8568413594391814e-05, "loss": 0.7966, "step": 6258 }, { "epoch": 0.11096487146421338, "grad_norm": 2.84375, "learning_rate": 4.856748193327718e-05, "loss": 0.8463, "step": 6260 }, { "epoch": 0.11100032349982496, "grad_norm": 2.734375, "learning_rate": 4.856654997804528e-05, "loss": 0.8121, "step": 6262 }, { "epoch": 0.11103577553543653, "grad_norm": 2.859375, "learning_rate": 4.8565617728707745e-05, "loss": 0.838, "step": 6264 }, { "epoch": 0.11107122757104809, "grad_norm": 2.78125, "learning_rate": 4.8564685185276204e-05, "loss": 0.8355, "step": 6266 }, { "epoch": 0.11110667960665967, "grad_norm": 2.578125, "learning_rate": 4.8563752347762305e-05, "loss": 0.8608, "step": 6268 }, { "epoch": 0.11114213164227124, "grad_norm": 2.859375, "learning_rate": 4.856281921617768e-05, "loss": 0.7789, "step": 6270 }, { "epoch": 0.1111775836778828, "grad_norm": 2.96875, "learning_rate": 4.856188579053399e-05, "loss": 0.8424, "step": 6272 }, { "epoch": 0.11121303571349438, "grad_norm": 2.984375, "learning_rate": 4.856095207084286e-05, "loss": 0.8807, "step": 6274 }, { "epoch": 0.11124848774910594, "grad_norm": 2.953125, "learning_rate": 4.8560018057115965e-05, "loss": 0.83, "step": 6276 }, { "epoch": 0.11128393978471751, "grad_norm": 2.640625, "learning_rate": 4.855908374936495e-05, "loss": 0.806, "step": 6278 }, { "epoch": 0.11131939182032909, "grad_norm": 2.6875, "learning_rate": 4.8558149147601474e-05, "loss": 0.855, "step": 6280 }, { "epoch": 0.11135484385594065, "grad_norm": 2.453125, "learning_rate": 4.85572142518372e-05, "loss": 0.7635, "step": 6282 }, { "epoch": 0.11139029589155222, "grad_norm": 2.5625, "learning_rate": 4.855627906208381e-05, "loss": 0.7634, "step": 6284 }, { "epoch": 0.1114257479271638, "grad_norm": 2.703125, "learning_rate": 4.8555343578352955e-05, "loss": 0.8574, "step": 6286 }, { "epoch": 0.11146119996277536, "grad_norm": 3.109375, "learning_rate": 4.855440780065632e-05, "loss": 0.8141, "step": 6288 }, { "epoch": 0.11149665199838693, "grad_norm": 2.859375, "learning_rate": 4.8553471729005577e-05, "loss": 0.8272, "step": 6290 }, { "epoch": 0.1115321040339985, "grad_norm": 2.71875, "learning_rate": 4.855253536341242e-05, "loss": 0.8545, "step": 6292 }, { "epoch": 0.11156755606961007, "grad_norm": 2.90625, "learning_rate": 4.8551598703888525e-05, "loss": 0.8552, "step": 6294 }, { "epoch": 0.11160300810522164, "grad_norm": 2.8125, "learning_rate": 4.855066175044558e-05, "loss": 0.8481, "step": 6296 }, { "epoch": 0.11163846014083322, "grad_norm": 2.609375, "learning_rate": 4.8549724503095286e-05, "loss": 0.8124, "step": 6298 }, { "epoch": 0.11167391217644478, "grad_norm": 2.828125, "learning_rate": 4.8548786961849334e-05, "loss": 0.8208, "step": 6300 }, { "epoch": 0.11170936421205635, "grad_norm": 2.75, "learning_rate": 4.8547849126719426e-05, "loss": 0.8274, "step": 6302 }, { "epoch": 0.11174481624766792, "grad_norm": 2.53125, "learning_rate": 4.8546910997717264e-05, "loss": 0.8817, "step": 6304 }, { "epoch": 0.11178026828327949, "grad_norm": 3.078125, "learning_rate": 4.854597257485456e-05, "loss": 0.8826, "step": 6306 }, { "epoch": 0.11181572031889106, "grad_norm": 2.90625, "learning_rate": 4.8545033858143025e-05, "loss": 0.8136, "step": 6308 }, { "epoch": 0.11185117235450263, "grad_norm": 2.875, "learning_rate": 4.8544094847594366e-05, "loss": 0.8205, "step": 6310 }, { "epoch": 0.1118866243901142, "grad_norm": 2.65625, "learning_rate": 4.854315554322031e-05, "loss": 0.8596, "step": 6312 }, { "epoch": 0.11192207642572577, "grad_norm": 2.890625, "learning_rate": 4.854221594503258e-05, "loss": 0.8556, "step": 6314 }, { "epoch": 0.11195752846133734, "grad_norm": 2.65625, "learning_rate": 4.854127605304289e-05, "loss": 0.8152, "step": 6316 }, { "epoch": 0.1119929804969489, "grad_norm": 2.8125, "learning_rate": 4.8540335867262976e-05, "loss": 0.8278, "step": 6318 }, { "epoch": 0.11202843253256048, "grad_norm": 2.6875, "learning_rate": 4.853939538770458e-05, "loss": 0.8356, "step": 6320 }, { "epoch": 0.11206388456817205, "grad_norm": 2.4375, "learning_rate": 4.8538454614379435e-05, "loss": 0.809, "step": 6322 }, { "epoch": 0.11209933660378361, "grad_norm": 2.609375, "learning_rate": 4.8537513547299276e-05, "loss": 0.8496, "step": 6324 }, { "epoch": 0.11213478863939519, "grad_norm": 2.734375, "learning_rate": 4.853657218647585e-05, "loss": 0.8314, "step": 6326 }, { "epoch": 0.11217024067500676, "grad_norm": 2.890625, "learning_rate": 4.853563053192091e-05, "loss": 0.8568, "step": 6328 }, { "epoch": 0.11220569271061832, "grad_norm": 2.453125, "learning_rate": 4.853468858364619e-05, "loss": 0.8321, "step": 6330 }, { "epoch": 0.1122411447462299, "grad_norm": 2.609375, "learning_rate": 4.853374634166347e-05, "loss": 0.854, "step": 6332 }, { "epoch": 0.11227659678184147, "grad_norm": 2.6875, "learning_rate": 4.8532803805984496e-05, "loss": 0.8576, "step": 6334 }, { "epoch": 0.11231204881745303, "grad_norm": 3.078125, "learning_rate": 4.853186097662103e-05, "loss": 0.8006, "step": 6336 }, { "epoch": 0.11234750085306461, "grad_norm": 2.796875, "learning_rate": 4.853091785358485e-05, "loss": 0.8828, "step": 6338 }, { "epoch": 0.11238295288867618, "grad_norm": 2.71875, "learning_rate": 4.85299744368877e-05, "loss": 0.8404, "step": 6340 }, { "epoch": 0.11241840492428774, "grad_norm": 2.65625, "learning_rate": 4.8529030726541383e-05, "loss": 0.8326, "step": 6342 }, { "epoch": 0.11245385695989932, "grad_norm": 3.015625, "learning_rate": 4.8528086722557656e-05, "loss": 0.8589, "step": 6344 }, { "epoch": 0.11248930899551089, "grad_norm": 2.640625, "learning_rate": 4.8527142424948305e-05, "loss": 0.8304, "step": 6346 }, { "epoch": 0.11252476103112245, "grad_norm": 2.609375, "learning_rate": 4.8526197833725126e-05, "loss": 0.8005, "step": 6348 }, { "epoch": 0.11256021306673403, "grad_norm": 2.859375, "learning_rate": 4.852525294889989e-05, "loss": 0.8631, "step": 6350 }, { "epoch": 0.1125956651023456, "grad_norm": 2.859375, "learning_rate": 4.85243077704844e-05, "loss": 0.8739, "step": 6352 }, { "epoch": 0.11263111713795716, "grad_norm": 2.484375, "learning_rate": 4.8523362298490446e-05, "loss": 0.8413, "step": 6354 }, { "epoch": 0.11266656917356874, "grad_norm": 2.671875, "learning_rate": 4.852241653292984e-05, "loss": 0.8242, "step": 6356 }, { "epoch": 0.11270202120918031, "grad_norm": 2.828125, "learning_rate": 4.8521470473814365e-05, "loss": 0.8035, "step": 6358 }, { "epoch": 0.11273747324479187, "grad_norm": 2.75, "learning_rate": 4.852052412115584e-05, "loss": 0.7923, "step": 6360 }, { "epoch": 0.11277292528040345, "grad_norm": 2.875, "learning_rate": 4.8519577474966074e-05, "loss": 0.8228, "step": 6362 }, { "epoch": 0.11280837731601501, "grad_norm": 2.515625, "learning_rate": 4.851863053525688e-05, "loss": 0.7987, "step": 6364 }, { "epoch": 0.11284382935162658, "grad_norm": 2.75, "learning_rate": 4.8517683302040075e-05, "loss": 0.8717, "step": 6366 }, { "epoch": 0.11287928138723816, "grad_norm": 2.859375, "learning_rate": 4.851673577532748e-05, "loss": 0.8595, "step": 6368 }, { "epoch": 0.11291473342284972, "grad_norm": 2.859375, "learning_rate": 4.851578795513092e-05, "loss": 0.8714, "step": 6370 }, { "epoch": 0.11295018545846129, "grad_norm": 3.015625, "learning_rate": 4.851483984146223e-05, "loss": 0.8443, "step": 6372 }, { "epoch": 0.11298563749407287, "grad_norm": 2.84375, "learning_rate": 4.851389143433323e-05, "loss": 0.8583, "step": 6374 }, { "epoch": 0.11302108952968443, "grad_norm": 2.84375, "learning_rate": 4.8512942733755764e-05, "loss": 0.8792, "step": 6376 }, { "epoch": 0.113056541565296, "grad_norm": 2.875, "learning_rate": 4.8511993739741676e-05, "loss": 0.8085, "step": 6378 }, { "epoch": 0.11309199360090758, "grad_norm": 2.78125, "learning_rate": 4.85110444523028e-05, "loss": 0.8323, "step": 6380 }, { "epoch": 0.11312744563651914, "grad_norm": 2.6875, "learning_rate": 4.851009487145098e-05, "loss": 0.8491, "step": 6382 }, { "epoch": 0.11316289767213071, "grad_norm": 2.875, "learning_rate": 4.850914499719807e-05, "loss": 0.842, "step": 6384 }, { "epoch": 0.11319834970774229, "grad_norm": 2.78125, "learning_rate": 4.850819482955594e-05, "loss": 0.8243, "step": 6386 }, { "epoch": 0.11323380174335385, "grad_norm": 2.75, "learning_rate": 4.850724436853643e-05, "loss": 0.8169, "step": 6388 }, { "epoch": 0.11326925377896542, "grad_norm": 2.96875, "learning_rate": 4.8506293614151404e-05, "loss": 0.83, "step": 6390 }, { "epoch": 0.113304705814577, "grad_norm": 3.046875, "learning_rate": 4.8505342566412734e-05, "loss": 0.8608, "step": 6392 }, { "epoch": 0.11334015785018856, "grad_norm": 3.046875, "learning_rate": 4.8504391225332277e-05, "loss": 0.8466, "step": 6394 }, { "epoch": 0.11337560988580013, "grad_norm": 2.875, "learning_rate": 4.8503439590921925e-05, "loss": 0.8541, "step": 6396 }, { "epoch": 0.1134110619214117, "grad_norm": 2.859375, "learning_rate": 4.850248766319353e-05, "loss": 0.8495, "step": 6398 }, { "epoch": 0.11344651395702327, "grad_norm": 2.8125, "learning_rate": 4.8501535442159e-05, "loss": 0.8642, "step": 6400 }, { "epoch": 0.11348196599263484, "grad_norm": 2.8125, "learning_rate": 4.8500582927830185e-05, "loss": 0.8203, "step": 6402 }, { "epoch": 0.11351741802824641, "grad_norm": 2.953125, "learning_rate": 4.8499630120218994e-05, "loss": 0.8405, "step": 6404 }, { "epoch": 0.11355287006385797, "grad_norm": 3.109375, "learning_rate": 4.849867701933732e-05, "loss": 0.8076, "step": 6406 }, { "epoch": 0.11358832209946955, "grad_norm": 2.828125, "learning_rate": 4.849772362519704e-05, "loss": 0.8343, "step": 6408 }, { "epoch": 0.11362377413508112, "grad_norm": 2.5625, "learning_rate": 4.849676993781008e-05, "loss": 0.8003, "step": 6410 }, { "epoch": 0.11365922617069268, "grad_norm": 3.09375, "learning_rate": 4.849581595718832e-05, "loss": 0.8195, "step": 6412 }, { "epoch": 0.11369467820630426, "grad_norm": 2.796875, "learning_rate": 4.849486168334366e-05, "loss": 0.8063, "step": 6414 }, { "epoch": 0.11373013024191583, "grad_norm": 2.8125, "learning_rate": 4.849390711628803e-05, "loss": 0.8344, "step": 6416 }, { "epoch": 0.1137655822775274, "grad_norm": 2.90625, "learning_rate": 4.8492952256033333e-05, "loss": 0.852, "step": 6418 }, { "epoch": 0.11380103431313897, "grad_norm": 2.84375, "learning_rate": 4.849199710259148e-05, "loss": 0.8153, "step": 6420 }, { "epoch": 0.11383648634875054, "grad_norm": 2.609375, "learning_rate": 4.84910416559744e-05, "loss": 0.8212, "step": 6422 }, { "epoch": 0.1138719383843621, "grad_norm": 2.75, "learning_rate": 4.8490085916194005e-05, "loss": 0.8511, "step": 6424 }, { "epoch": 0.11390739041997368, "grad_norm": 2.8125, "learning_rate": 4.848912988326224e-05, "loss": 0.8078, "step": 6426 }, { "epoch": 0.11394284245558525, "grad_norm": 2.71875, "learning_rate": 4.8488173557191026e-05, "loss": 0.8286, "step": 6428 }, { "epoch": 0.11397829449119681, "grad_norm": 2.59375, "learning_rate": 4.8487216937992294e-05, "loss": 0.863, "step": 6430 }, { "epoch": 0.11401374652680839, "grad_norm": 2.65625, "learning_rate": 4.848626002567799e-05, "loss": 0.8603, "step": 6432 }, { "epoch": 0.11404919856241996, "grad_norm": 2.640625, "learning_rate": 4.8485302820260045e-05, "loss": 0.847, "step": 6434 }, { "epoch": 0.11408465059803152, "grad_norm": 2.640625, "learning_rate": 4.848434532175042e-05, "loss": 0.8007, "step": 6436 }, { "epoch": 0.1141201026336431, "grad_norm": 2.828125, "learning_rate": 4.8483387530161054e-05, "loss": 0.8277, "step": 6438 }, { "epoch": 0.11415555466925467, "grad_norm": 2.609375, "learning_rate": 4.8482429445503905e-05, "loss": 0.7848, "step": 6440 }, { "epoch": 0.11419100670486623, "grad_norm": 2.765625, "learning_rate": 4.848147106779093e-05, "loss": 0.804, "step": 6442 }, { "epoch": 0.11422645874047781, "grad_norm": 3.0, "learning_rate": 4.848051239703408e-05, "loss": 0.8579, "step": 6444 }, { "epoch": 0.11426191077608938, "grad_norm": 2.859375, "learning_rate": 4.8479553433245325e-05, "loss": 0.8028, "step": 6446 }, { "epoch": 0.11429736281170094, "grad_norm": 2.75, "learning_rate": 4.847859417643664e-05, "loss": 0.8015, "step": 6448 }, { "epoch": 0.11433281484731252, "grad_norm": 2.6875, "learning_rate": 4.847763462661999e-05, "loss": 0.8323, "step": 6450 }, { "epoch": 0.11436826688292408, "grad_norm": 2.578125, "learning_rate": 4.8476674783807344e-05, "loss": 0.8275, "step": 6452 }, { "epoch": 0.11440371891853565, "grad_norm": 2.625, "learning_rate": 4.84757146480107e-05, "loss": 0.9091, "step": 6454 }, { "epoch": 0.11443917095414723, "grad_norm": 2.890625, "learning_rate": 4.8474754219242016e-05, "loss": 0.8291, "step": 6456 }, { "epoch": 0.11447462298975879, "grad_norm": 2.640625, "learning_rate": 4.847379349751329e-05, "loss": 0.8048, "step": 6458 }, { "epoch": 0.11451007502537036, "grad_norm": 2.734375, "learning_rate": 4.8472832482836504e-05, "loss": 0.8122, "step": 6460 }, { "epoch": 0.11454552706098194, "grad_norm": 2.9375, "learning_rate": 4.847187117522366e-05, "loss": 0.8276, "step": 6462 }, { "epoch": 0.1145809790965935, "grad_norm": 2.765625, "learning_rate": 4.8470909574686764e-05, "loss": 0.8586, "step": 6464 }, { "epoch": 0.11461643113220507, "grad_norm": 2.75, "learning_rate": 4.846994768123779e-05, "loss": 0.8073, "step": 6466 }, { "epoch": 0.11465188316781665, "grad_norm": 2.703125, "learning_rate": 4.846898549488877e-05, "loss": 0.8771, "step": 6468 }, { "epoch": 0.1146873352034282, "grad_norm": 2.890625, "learning_rate": 4.846802301565169e-05, "loss": 0.8059, "step": 6470 }, { "epoch": 0.11472278723903978, "grad_norm": 2.625, "learning_rate": 4.8467060243538574e-05, "loss": 0.8012, "step": 6472 }, { "epoch": 0.11475823927465136, "grad_norm": 3.15625, "learning_rate": 4.8466097178561435e-05, "loss": 0.865, "step": 6474 }, { "epoch": 0.11479369131026292, "grad_norm": 3.078125, "learning_rate": 4.84651338207323e-05, "loss": 0.8429, "step": 6476 }, { "epoch": 0.11482914334587449, "grad_norm": 2.546875, "learning_rate": 4.846417017006317e-05, "loss": 0.8308, "step": 6478 }, { "epoch": 0.11486459538148606, "grad_norm": 2.796875, "learning_rate": 4.8463206226566084e-05, "loss": 0.8076, "step": 6480 }, { "epoch": 0.11490004741709763, "grad_norm": 2.625, "learning_rate": 4.8462241990253077e-05, "loss": 0.8155, "step": 6482 }, { "epoch": 0.1149354994527092, "grad_norm": 2.90625, "learning_rate": 4.846127746113617e-05, "loss": 0.8628, "step": 6484 }, { "epoch": 0.11497095148832077, "grad_norm": 2.6875, "learning_rate": 4.8460312639227414e-05, "loss": 0.8624, "step": 6486 }, { "epoch": 0.11500640352393234, "grad_norm": 2.515625, "learning_rate": 4.8459347524538834e-05, "loss": 0.8101, "step": 6488 }, { "epoch": 0.11504185555954391, "grad_norm": 2.765625, "learning_rate": 4.845838211708249e-05, "loss": 0.8185, "step": 6490 }, { "epoch": 0.11507730759515548, "grad_norm": 2.5, "learning_rate": 4.845741641687042e-05, "loss": 0.8343, "step": 6492 }, { "epoch": 0.11511275963076704, "grad_norm": 2.8125, "learning_rate": 4.8456450423914677e-05, "loss": 0.8335, "step": 6494 }, { "epoch": 0.11514821166637862, "grad_norm": 2.484375, "learning_rate": 4.845548413822733e-05, "loss": 0.8088, "step": 6496 }, { "epoch": 0.1151836637019902, "grad_norm": 3.0, "learning_rate": 4.8454517559820414e-05, "loss": 0.8173, "step": 6498 }, { "epoch": 0.11521911573760175, "grad_norm": 2.65625, "learning_rate": 4.8453550688706006e-05, "loss": 0.8532, "step": 6500 }, { "epoch": 0.11525456777321333, "grad_norm": 2.609375, "learning_rate": 4.845258352489618e-05, "loss": 0.7932, "step": 6502 }, { "epoch": 0.1152900198088249, "grad_norm": 2.796875, "learning_rate": 4.8451616068402985e-05, "loss": 0.8306, "step": 6504 }, { "epoch": 0.11532547184443646, "grad_norm": 3.015625, "learning_rate": 4.845064831923851e-05, "loss": 0.8666, "step": 6506 }, { "epoch": 0.11536092388004804, "grad_norm": 2.96875, "learning_rate": 4.8449680277414834e-05, "loss": 0.8321, "step": 6508 }, { "epoch": 0.11539637591565961, "grad_norm": 2.921875, "learning_rate": 4.8448711942944025e-05, "loss": 0.8143, "step": 6510 }, { "epoch": 0.11543182795127117, "grad_norm": 2.625, "learning_rate": 4.844774331583818e-05, "loss": 0.8275, "step": 6512 }, { "epoch": 0.11546727998688275, "grad_norm": 2.5, "learning_rate": 4.8446774396109375e-05, "loss": 0.8232, "step": 6514 }, { "epoch": 0.11550273202249432, "grad_norm": 2.53125, "learning_rate": 4.844580518376971e-05, "loss": 0.8541, "step": 6516 }, { "epoch": 0.11553818405810588, "grad_norm": 2.875, "learning_rate": 4.844483567883128e-05, "loss": 0.857, "step": 6518 }, { "epoch": 0.11557363609371746, "grad_norm": 2.96875, "learning_rate": 4.8443865881306194e-05, "loss": 0.8258, "step": 6520 }, { "epoch": 0.11560908812932903, "grad_norm": 2.703125, "learning_rate": 4.844289579120653e-05, "loss": 0.8415, "step": 6522 }, { "epoch": 0.11564454016494059, "grad_norm": 2.96875, "learning_rate": 4.844192540854442e-05, "loss": 0.8205, "step": 6524 }, { "epoch": 0.11567999220055217, "grad_norm": 2.59375, "learning_rate": 4.844095473333197e-05, "loss": 0.7767, "step": 6526 }, { "epoch": 0.11571544423616374, "grad_norm": 2.8125, "learning_rate": 4.8439983765581274e-05, "loss": 0.7606, "step": 6528 }, { "epoch": 0.1157508962717753, "grad_norm": 2.421875, "learning_rate": 4.8439012505304465e-05, "loss": 0.7859, "step": 6530 }, { "epoch": 0.11578634830738688, "grad_norm": 2.859375, "learning_rate": 4.8438040952513664e-05, "loss": 0.7816, "step": 6532 }, { "epoch": 0.11582180034299844, "grad_norm": 2.625, "learning_rate": 4.8437069107220994e-05, "loss": 0.8353, "step": 6534 }, { "epoch": 0.11585725237861001, "grad_norm": 3.21875, "learning_rate": 4.8436096969438584e-05, "loss": 0.8546, "step": 6536 }, { "epoch": 0.11589270441422159, "grad_norm": 2.625, "learning_rate": 4.8435124539178564e-05, "loss": 0.7771, "step": 6538 }, { "epoch": 0.11592815644983315, "grad_norm": 2.875, "learning_rate": 4.843415181645306e-05, "loss": 0.8452, "step": 6540 }, { "epoch": 0.11596360848544472, "grad_norm": 2.90625, "learning_rate": 4.8433178801274244e-05, "loss": 0.8423, "step": 6542 }, { "epoch": 0.1159990605210563, "grad_norm": 2.6875, "learning_rate": 4.8432205493654224e-05, "loss": 0.8103, "step": 6544 }, { "epoch": 0.11603451255666786, "grad_norm": 2.59375, "learning_rate": 4.843123189360516e-05, "loss": 0.8496, "step": 6546 }, { "epoch": 0.11606996459227943, "grad_norm": 3.0, "learning_rate": 4.8430258001139206e-05, "loss": 0.857, "step": 6548 }, { "epoch": 0.116105416627891, "grad_norm": 2.71875, "learning_rate": 4.842928381626851e-05, "loss": 0.8203, "step": 6550 }, { "epoch": 0.11614086866350257, "grad_norm": 2.84375, "learning_rate": 4.8428309339005235e-05, "loss": 0.8282, "step": 6552 }, { "epoch": 0.11617632069911414, "grad_norm": 2.953125, "learning_rate": 4.8427334569361537e-05, "loss": 0.8114, "step": 6554 }, { "epoch": 0.11621177273472572, "grad_norm": 2.8125, "learning_rate": 4.842635950734958e-05, "loss": 0.8155, "step": 6556 }, { "epoch": 0.11624722477033728, "grad_norm": 2.890625, "learning_rate": 4.8425384152981545e-05, "loss": 0.8523, "step": 6558 }, { "epoch": 0.11628267680594885, "grad_norm": 2.625, "learning_rate": 4.8424408506269585e-05, "loss": 0.8089, "step": 6560 }, { "epoch": 0.11631812884156043, "grad_norm": 3.578125, "learning_rate": 4.842343256722589e-05, "loss": 0.8448, "step": 6562 }, { "epoch": 0.11635358087717199, "grad_norm": 2.734375, "learning_rate": 4.842245633586264e-05, "loss": 0.8025, "step": 6564 }, { "epoch": 0.11638903291278356, "grad_norm": 2.578125, "learning_rate": 4.842147981219201e-05, "loss": 0.8363, "step": 6566 }, { "epoch": 0.11642448494839513, "grad_norm": 2.78125, "learning_rate": 4.842050299622618e-05, "loss": 0.8789, "step": 6568 }, { "epoch": 0.1164599369840067, "grad_norm": 2.5625, "learning_rate": 4.841952588797736e-05, "loss": 0.7869, "step": 6570 }, { "epoch": 0.11649538901961827, "grad_norm": 2.6875, "learning_rate": 4.841854848745774e-05, "loss": 0.8549, "step": 6572 }, { "epoch": 0.11653084105522984, "grad_norm": 2.625, "learning_rate": 4.84175707946795e-05, "loss": 0.829, "step": 6574 }, { "epoch": 0.1165662930908414, "grad_norm": 2.59375, "learning_rate": 4.8416592809654865e-05, "loss": 0.8299, "step": 6576 }, { "epoch": 0.11660174512645298, "grad_norm": 2.515625, "learning_rate": 4.841561453239602e-05, "loss": 0.8258, "step": 6578 }, { "epoch": 0.11663719716206455, "grad_norm": 2.640625, "learning_rate": 4.84146359629152e-05, "loss": 0.7927, "step": 6580 }, { "epoch": 0.11667264919767611, "grad_norm": 2.796875, "learning_rate": 4.841365710122458e-05, "loss": 0.8182, "step": 6582 }, { "epoch": 0.11670810123328769, "grad_norm": 2.6875, "learning_rate": 4.84126779473364e-05, "loss": 0.8024, "step": 6584 }, { "epoch": 0.11674355326889926, "grad_norm": 2.671875, "learning_rate": 4.841169850126288e-05, "loss": 0.8211, "step": 6586 }, { "epoch": 0.11677900530451082, "grad_norm": 2.78125, "learning_rate": 4.841071876301625e-05, "loss": 0.8367, "step": 6588 }, { "epoch": 0.1168144573401224, "grad_norm": 2.75, "learning_rate": 4.840973873260871e-05, "loss": 0.8627, "step": 6590 }, { "epoch": 0.11684990937573397, "grad_norm": 2.734375, "learning_rate": 4.8408758410052514e-05, "loss": 0.8322, "step": 6592 }, { "epoch": 0.11688536141134553, "grad_norm": 2.71875, "learning_rate": 4.840777779535988e-05, "loss": 0.8435, "step": 6594 }, { "epoch": 0.11692081344695711, "grad_norm": 3.203125, "learning_rate": 4.840679688854306e-05, "loss": 0.8276, "step": 6596 }, { "epoch": 0.11695626548256868, "grad_norm": 2.71875, "learning_rate": 4.840581568961429e-05, "loss": 0.8271, "step": 6598 }, { "epoch": 0.11699171751818024, "grad_norm": 2.609375, "learning_rate": 4.840483419858582e-05, "loss": 0.8112, "step": 6600 }, { "epoch": 0.11702716955379182, "grad_norm": 2.828125, "learning_rate": 4.8403852415469885e-05, "loss": 0.8333, "step": 6602 }, { "epoch": 0.11706262158940339, "grad_norm": 2.765625, "learning_rate": 4.840287034027876e-05, "loss": 0.8411, "step": 6604 }, { "epoch": 0.11709807362501495, "grad_norm": 2.375, "learning_rate": 4.840188797302467e-05, "loss": 0.824, "step": 6606 }, { "epoch": 0.11713352566062653, "grad_norm": 2.640625, "learning_rate": 4.84009053137199e-05, "loss": 0.8263, "step": 6608 }, { "epoch": 0.1171689776962381, "grad_norm": 2.796875, "learning_rate": 4.839992236237672e-05, "loss": 0.8372, "step": 6610 }, { "epoch": 0.11720442973184966, "grad_norm": 2.84375, "learning_rate": 4.8398939119007365e-05, "loss": 0.8426, "step": 6612 }, { "epoch": 0.11723988176746124, "grad_norm": 2.734375, "learning_rate": 4.839795558362413e-05, "loss": 0.812, "step": 6614 }, { "epoch": 0.11727533380307281, "grad_norm": 2.75, "learning_rate": 4.839697175623928e-05, "loss": 0.8206, "step": 6616 }, { "epoch": 0.11731078583868437, "grad_norm": 2.8125, "learning_rate": 4.839598763686509e-05, "loss": 0.8273, "step": 6618 }, { "epoch": 0.11734623787429595, "grad_norm": 2.796875, "learning_rate": 4.839500322551386e-05, "loss": 0.8424, "step": 6620 }, { "epoch": 0.11738168990990751, "grad_norm": 2.4375, "learning_rate": 4.839401852219786e-05, "loss": 0.8293, "step": 6622 }, { "epoch": 0.11741714194551908, "grad_norm": 2.90625, "learning_rate": 4.839303352692938e-05, "loss": 0.8094, "step": 6624 }, { "epoch": 0.11745259398113066, "grad_norm": 2.796875, "learning_rate": 4.8392048239720703e-05, "loss": 0.7789, "step": 6626 }, { "epoch": 0.11748804601674222, "grad_norm": 2.9375, "learning_rate": 4.839106266058415e-05, "loss": 0.8413, "step": 6628 }, { "epoch": 0.11752349805235379, "grad_norm": 2.546875, "learning_rate": 4.8390076789532004e-05, "loss": 0.7869, "step": 6630 }, { "epoch": 0.11755895008796537, "grad_norm": 2.734375, "learning_rate": 4.838909062657657e-05, "loss": 0.833, "step": 6632 }, { "epoch": 0.11759440212357693, "grad_norm": 2.765625, "learning_rate": 4.838810417173015e-05, "loss": 0.8326, "step": 6634 }, { "epoch": 0.1176298541591885, "grad_norm": 2.921875, "learning_rate": 4.8387117425005066e-05, "loss": 0.8743, "step": 6636 }, { "epoch": 0.11766530619480008, "grad_norm": 2.609375, "learning_rate": 4.8386130386413635e-05, "loss": 0.8445, "step": 6638 }, { "epoch": 0.11770075823041164, "grad_norm": 2.796875, "learning_rate": 4.8385143055968166e-05, "loss": 0.8199, "step": 6640 }, { "epoch": 0.11773621026602321, "grad_norm": 2.453125, "learning_rate": 4.838415543368098e-05, "loss": 0.8384, "step": 6642 }, { "epoch": 0.11777166230163479, "grad_norm": 2.6875, "learning_rate": 4.83831675195644e-05, "loss": 0.8122, "step": 6644 }, { "epoch": 0.11780711433724635, "grad_norm": 2.65625, "learning_rate": 4.838217931363076e-05, "loss": 0.8082, "step": 6646 }, { "epoch": 0.11784256637285792, "grad_norm": 2.703125, "learning_rate": 4.8381190815892394e-05, "loss": 0.829, "step": 6648 }, { "epoch": 0.1178780184084695, "grad_norm": 2.59375, "learning_rate": 4.8380202026361644e-05, "loss": 0.8042, "step": 6650 }, { "epoch": 0.11791347044408106, "grad_norm": 2.734375, "learning_rate": 4.837921294505083e-05, "loss": 0.8213, "step": 6652 }, { "epoch": 0.11794892247969263, "grad_norm": 2.625, "learning_rate": 4.837822357197232e-05, "loss": 0.7967, "step": 6654 }, { "epoch": 0.1179843745153042, "grad_norm": 2.8125, "learning_rate": 4.8377233907138444e-05, "loss": 0.8582, "step": 6656 }, { "epoch": 0.11801982655091577, "grad_norm": 2.65625, "learning_rate": 4.837624395056155e-05, "loss": 0.8131, "step": 6658 }, { "epoch": 0.11805527858652734, "grad_norm": 2.921875, "learning_rate": 4.837525370225401e-05, "loss": 0.8288, "step": 6660 }, { "epoch": 0.11809073062213891, "grad_norm": 2.703125, "learning_rate": 4.8374263162228176e-05, "loss": 0.8583, "step": 6662 }, { "epoch": 0.11812618265775048, "grad_norm": 2.765625, "learning_rate": 4.83732723304964e-05, "loss": 0.8188, "step": 6664 }, { "epoch": 0.11816163469336205, "grad_norm": 3.0, "learning_rate": 4.837228120707106e-05, "loss": 0.8512, "step": 6666 }, { "epoch": 0.11819708672897362, "grad_norm": 2.484375, "learning_rate": 4.837128979196451e-05, "loss": 0.815, "step": 6668 }, { "epoch": 0.11823253876458518, "grad_norm": 3.0625, "learning_rate": 4.8370298085189134e-05, "loss": 0.8627, "step": 6670 }, { "epoch": 0.11826799080019676, "grad_norm": 2.703125, "learning_rate": 4.83693060867573e-05, "loss": 0.8659, "step": 6672 }, { "epoch": 0.11830344283580833, "grad_norm": 2.90625, "learning_rate": 4.8368313796681404e-05, "loss": 0.8722, "step": 6674 }, { "epoch": 0.1183388948714199, "grad_norm": 2.53125, "learning_rate": 4.8367321214973815e-05, "loss": 0.8038, "step": 6676 }, { "epoch": 0.11837434690703147, "grad_norm": 2.859375, "learning_rate": 4.836632834164692e-05, "loss": 0.8188, "step": 6678 }, { "epoch": 0.11840979894264304, "grad_norm": 2.671875, "learning_rate": 4.836533517671312e-05, "loss": 0.8267, "step": 6680 }, { "epoch": 0.1184452509782546, "grad_norm": 2.609375, "learning_rate": 4.83643417201848e-05, "loss": 0.8017, "step": 6682 }, { "epoch": 0.11848070301386618, "grad_norm": 2.609375, "learning_rate": 4.836334797207437e-05, "loss": 0.8188, "step": 6684 }, { "epoch": 0.11851615504947775, "grad_norm": 2.71875, "learning_rate": 4.836235393239421e-05, "loss": 0.7921, "step": 6686 }, { "epoch": 0.11855160708508931, "grad_norm": 2.6875, "learning_rate": 4.836135960115675e-05, "loss": 0.8295, "step": 6688 }, { "epoch": 0.11858705912070089, "grad_norm": 3.03125, "learning_rate": 4.8360364978374384e-05, "loss": 0.8566, "step": 6690 }, { "epoch": 0.11862251115631246, "grad_norm": 2.6875, "learning_rate": 4.835937006405953e-05, "loss": 0.8308, "step": 6692 }, { "epoch": 0.11865796319192402, "grad_norm": 3.03125, "learning_rate": 4.83583748582246e-05, "loss": 0.8197, "step": 6694 }, { "epoch": 0.1186934152275356, "grad_norm": 2.59375, "learning_rate": 4.8357379360882014e-05, "loss": 0.8059, "step": 6696 }, { "epoch": 0.11872886726314717, "grad_norm": 2.953125, "learning_rate": 4.8356383572044206e-05, "loss": 0.829, "step": 6698 }, { "epoch": 0.11876431929875873, "grad_norm": 2.609375, "learning_rate": 4.835538749172359e-05, "loss": 0.8027, "step": 6700 }, { "epoch": 0.11879977133437031, "grad_norm": 2.796875, "learning_rate": 4.835439111993261e-05, "loss": 0.8291, "step": 6702 }, { "epoch": 0.11883522336998187, "grad_norm": 2.765625, "learning_rate": 4.835339445668369e-05, "loss": 0.8159, "step": 6704 }, { "epoch": 0.11887067540559344, "grad_norm": 2.734375, "learning_rate": 4.8352397501989265e-05, "loss": 0.8232, "step": 6706 }, { "epoch": 0.11890612744120502, "grad_norm": 2.78125, "learning_rate": 4.835140025586179e-05, "loss": 0.8433, "step": 6708 }, { "epoch": 0.11894157947681658, "grad_norm": 2.59375, "learning_rate": 4.83504027183137e-05, "loss": 0.811, "step": 6710 }, { "epoch": 0.11897703151242815, "grad_norm": 2.640625, "learning_rate": 4.8349404889357455e-05, "loss": 0.8117, "step": 6712 }, { "epoch": 0.11901248354803973, "grad_norm": 2.484375, "learning_rate": 4.8348406769005494e-05, "loss": 0.8082, "step": 6714 }, { "epoch": 0.11904793558365129, "grad_norm": 2.953125, "learning_rate": 4.834740835727028e-05, "loss": 0.8391, "step": 6716 }, { "epoch": 0.11908338761926286, "grad_norm": 2.71875, "learning_rate": 4.834640965416427e-05, "loss": 0.7962, "step": 6718 }, { "epoch": 0.11911883965487444, "grad_norm": 2.828125, "learning_rate": 4.834541065969993e-05, "loss": 0.8261, "step": 6720 }, { "epoch": 0.119154291690486, "grad_norm": 2.78125, "learning_rate": 4.834441137388973e-05, "loss": 0.7957, "step": 6722 }, { "epoch": 0.11918974372609757, "grad_norm": 2.75, "learning_rate": 4.834341179674614e-05, "loss": 0.8004, "step": 6724 }, { "epoch": 0.11922519576170915, "grad_norm": 2.796875, "learning_rate": 4.834241192828164e-05, "loss": 0.8264, "step": 6726 }, { "epoch": 0.1192606477973207, "grad_norm": 2.703125, "learning_rate": 4.8341411768508684e-05, "loss": 0.7818, "step": 6728 }, { "epoch": 0.11929609983293228, "grad_norm": 2.75, "learning_rate": 4.8340411317439785e-05, "loss": 0.8018, "step": 6730 }, { "epoch": 0.11933155186854386, "grad_norm": 2.859375, "learning_rate": 4.833941057508741e-05, "loss": 0.8283, "step": 6732 }, { "epoch": 0.11936700390415542, "grad_norm": 2.484375, "learning_rate": 4.8338409541464045e-05, "loss": 0.8298, "step": 6734 }, { "epoch": 0.11940245593976699, "grad_norm": 2.734375, "learning_rate": 4.83374082165822e-05, "loss": 0.8132, "step": 6736 }, { "epoch": 0.11943790797537857, "grad_norm": 2.828125, "learning_rate": 4.833640660045436e-05, "loss": 0.8407, "step": 6738 }, { "epoch": 0.11947336001099013, "grad_norm": 2.609375, "learning_rate": 4.8335404693093026e-05, "loss": 0.8115, "step": 6740 }, { "epoch": 0.1195088120466017, "grad_norm": 2.84375, "learning_rate": 4.83344024945107e-05, "loss": 0.8323, "step": 6742 }, { "epoch": 0.11954426408221328, "grad_norm": 2.953125, "learning_rate": 4.8333400004719885e-05, "loss": 0.8535, "step": 6744 }, { "epoch": 0.11957971611782484, "grad_norm": 2.625, "learning_rate": 4.8332397223733104e-05, "loss": 0.8649, "step": 6746 }, { "epoch": 0.11961516815343641, "grad_norm": 2.703125, "learning_rate": 4.8331394151562864e-05, "loss": 0.8297, "step": 6748 }, { "epoch": 0.11965062018904798, "grad_norm": 2.765625, "learning_rate": 4.833039078822169e-05, "loss": 0.8305, "step": 6750 }, { "epoch": 0.11968607222465955, "grad_norm": 2.578125, "learning_rate": 4.832938713372209e-05, "loss": 0.8449, "step": 6752 }, { "epoch": 0.11972152426027112, "grad_norm": 3.0625, "learning_rate": 4.8328383188076595e-05, "loss": 0.8273, "step": 6754 }, { "epoch": 0.1197569762958827, "grad_norm": 2.828125, "learning_rate": 4.832737895129775e-05, "loss": 0.8507, "step": 6756 }, { "epoch": 0.11979242833149426, "grad_norm": 2.609375, "learning_rate": 4.8326374423398066e-05, "loss": 0.7767, "step": 6758 }, { "epoch": 0.11982788036710583, "grad_norm": 2.875, "learning_rate": 4.832536960439009e-05, "loss": 0.8543, "step": 6760 }, { "epoch": 0.1198633324027174, "grad_norm": 2.75, "learning_rate": 4.8324364494286364e-05, "loss": 0.8477, "step": 6762 }, { "epoch": 0.11989878443832896, "grad_norm": 2.75, "learning_rate": 4.832335909309942e-05, "loss": 0.7939, "step": 6764 }, { "epoch": 0.11993423647394054, "grad_norm": 2.640625, "learning_rate": 4.8322353400841816e-05, "loss": 0.7968, "step": 6766 }, { "epoch": 0.11996968850955211, "grad_norm": 2.71875, "learning_rate": 4.83213474175261e-05, "loss": 0.8366, "step": 6768 }, { "epoch": 0.12000514054516367, "grad_norm": 2.984375, "learning_rate": 4.8320341143164815e-05, "loss": 0.8365, "step": 6770 }, { "epoch": 0.12004059258077525, "grad_norm": 2.671875, "learning_rate": 4.831933457777055e-05, "loss": 0.8163, "step": 6772 }, { "epoch": 0.12007604461638682, "grad_norm": 2.921875, "learning_rate": 4.8318327721355825e-05, "loss": 0.8055, "step": 6774 }, { "epoch": 0.12011149665199838, "grad_norm": 2.75, "learning_rate": 4.831732057393324e-05, "loss": 0.7799, "step": 6776 }, { "epoch": 0.12014694868760996, "grad_norm": 2.765625, "learning_rate": 4.8316313135515343e-05, "loss": 0.8324, "step": 6778 }, { "epoch": 0.12018240072322153, "grad_norm": 3.03125, "learning_rate": 4.8315305406114726e-05, "loss": 0.8153, "step": 6780 }, { "epoch": 0.1202178527588331, "grad_norm": 2.65625, "learning_rate": 4.8314297385743945e-05, "loss": 0.8353, "step": 6782 }, { "epoch": 0.12025330479444467, "grad_norm": 3.125, "learning_rate": 4.831328907441559e-05, "loss": 0.8471, "step": 6784 }, { "epoch": 0.12028875683005624, "grad_norm": 2.796875, "learning_rate": 4.831228047214224e-05, "loss": 0.8277, "step": 6786 }, { "epoch": 0.1203242088656678, "grad_norm": 2.890625, "learning_rate": 4.8311271578936496e-05, "loss": 0.8371, "step": 6788 }, { "epoch": 0.12035966090127938, "grad_norm": 2.71875, "learning_rate": 4.8310262394810934e-05, "loss": 0.7628, "step": 6790 }, { "epoch": 0.12039511293689094, "grad_norm": 2.59375, "learning_rate": 4.8309252919778146e-05, "loss": 0.8369, "step": 6792 }, { "epoch": 0.12043056497250251, "grad_norm": 2.875, "learning_rate": 4.830824315385074e-05, "loss": 0.8366, "step": 6794 }, { "epoch": 0.12046601700811409, "grad_norm": 2.875, "learning_rate": 4.830723309704131e-05, "loss": 0.8113, "step": 6796 }, { "epoch": 0.12050146904372565, "grad_norm": 2.75, "learning_rate": 4.8306222749362475e-05, "loss": 0.8502, "step": 6798 }, { "epoch": 0.12053692107933722, "grad_norm": 2.890625, "learning_rate": 4.8305212110826833e-05, "loss": 0.8319, "step": 6800 }, { "epoch": 0.1205723731149488, "grad_norm": 2.890625, "learning_rate": 4.8304201181446994e-05, "loss": 0.8532, "step": 6802 }, { "epoch": 0.12060782515056036, "grad_norm": 2.96875, "learning_rate": 4.830318996123557e-05, "loss": 0.8519, "step": 6804 }, { "epoch": 0.12064327718617193, "grad_norm": 2.578125, "learning_rate": 4.830217845020521e-05, "loss": 0.8027, "step": 6806 }, { "epoch": 0.1206787292217835, "grad_norm": 2.84375, "learning_rate": 4.83011666483685e-05, "loss": 0.8347, "step": 6808 }, { "epoch": 0.12071418125739507, "grad_norm": 2.375, "learning_rate": 4.830015455573809e-05, "loss": 0.8236, "step": 6810 }, { "epoch": 0.12074963329300664, "grad_norm": 2.8125, "learning_rate": 4.82991421723266e-05, "loss": 0.8405, "step": 6812 }, { "epoch": 0.12078508532861822, "grad_norm": 2.828125, "learning_rate": 4.829812949814667e-05, "loss": 0.8778, "step": 6814 }, { "epoch": 0.12082053736422978, "grad_norm": 2.765625, "learning_rate": 4.829711653321093e-05, "loss": 0.8311, "step": 6816 }, { "epoch": 0.12085598939984135, "grad_norm": 2.609375, "learning_rate": 4.829610327753204e-05, "loss": 0.8165, "step": 6818 }, { "epoch": 0.12089144143545293, "grad_norm": 2.75, "learning_rate": 4.829508973112263e-05, "loss": 0.8344, "step": 6820 }, { "epoch": 0.12092689347106449, "grad_norm": 3.03125, "learning_rate": 4.829407589399535e-05, "loss": 0.7916, "step": 6822 }, { "epoch": 0.12096234550667606, "grad_norm": 2.734375, "learning_rate": 4.829306176616285e-05, "loss": 0.7913, "step": 6824 }, { "epoch": 0.12099779754228764, "grad_norm": 2.734375, "learning_rate": 4.82920473476378e-05, "loss": 0.7872, "step": 6826 }, { "epoch": 0.1210332495778992, "grad_norm": 2.859375, "learning_rate": 4.8291032638432846e-05, "loss": 0.8254, "step": 6828 }, { "epoch": 0.12106870161351077, "grad_norm": 2.859375, "learning_rate": 4.8290017638560656e-05, "loss": 0.8217, "step": 6830 }, { "epoch": 0.12110415364912235, "grad_norm": 2.53125, "learning_rate": 4.8289002348033895e-05, "loss": 0.7936, "step": 6832 }, { "epoch": 0.1211396056847339, "grad_norm": 3.09375, "learning_rate": 4.828798676686524e-05, "loss": 0.8685, "step": 6834 }, { "epoch": 0.12117505772034548, "grad_norm": 2.59375, "learning_rate": 4.828697089506736e-05, "loss": 0.8507, "step": 6836 }, { "epoch": 0.12121050975595705, "grad_norm": 2.875, "learning_rate": 4.8285954732652924e-05, "loss": 0.826, "step": 6838 }, { "epoch": 0.12124596179156862, "grad_norm": 2.578125, "learning_rate": 4.828493827963464e-05, "loss": 0.8032, "step": 6840 }, { "epoch": 0.12128141382718019, "grad_norm": 2.65625, "learning_rate": 4.828392153602516e-05, "loss": 0.8642, "step": 6842 }, { "epoch": 0.12131686586279176, "grad_norm": 2.59375, "learning_rate": 4.82829045018372e-05, "loss": 0.8316, "step": 6844 }, { "epoch": 0.12135231789840333, "grad_norm": 2.703125, "learning_rate": 4.828188717708343e-05, "loss": 0.81, "step": 6846 }, { "epoch": 0.1213877699340149, "grad_norm": 2.96875, "learning_rate": 4.828086956177657e-05, "loss": 0.8534, "step": 6848 }, { "epoch": 0.12142322196962647, "grad_norm": 2.546875, "learning_rate": 4.8279851655929295e-05, "loss": 0.8427, "step": 6850 }, { "epoch": 0.12145867400523803, "grad_norm": 2.71875, "learning_rate": 4.827883345955433e-05, "loss": 0.8379, "step": 6852 }, { "epoch": 0.12149412604084961, "grad_norm": 2.53125, "learning_rate": 4.827781497266437e-05, "loss": 0.7829, "step": 6854 }, { "epoch": 0.12152957807646118, "grad_norm": 2.546875, "learning_rate": 4.827679619527213e-05, "loss": 0.7964, "step": 6856 }, { "epoch": 0.12156503011207274, "grad_norm": 2.703125, "learning_rate": 4.827577712739031e-05, "loss": 0.8052, "step": 6858 }, { "epoch": 0.12160048214768432, "grad_norm": 2.796875, "learning_rate": 4.827475776903165e-05, "loss": 0.8155, "step": 6860 }, { "epoch": 0.1216359341832959, "grad_norm": 2.734375, "learning_rate": 4.827373812020886e-05, "loss": 0.7722, "step": 6862 }, { "epoch": 0.12167138621890745, "grad_norm": 2.890625, "learning_rate": 4.827271818093466e-05, "loss": 0.8165, "step": 6864 }, { "epoch": 0.12170683825451903, "grad_norm": 2.671875, "learning_rate": 4.8271697951221794e-05, "loss": 0.7605, "step": 6866 }, { "epoch": 0.1217422902901306, "grad_norm": 2.578125, "learning_rate": 4.827067743108298e-05, "loss": 0.8532, "step": 6868 }, { "epoch": 0.12177774232574216, "grad_norm": 2.703125, "learning_rate": 4.826965662053096e-05, "loss": 0.7734, "step": 6870 }, { "epoch": 0.12181319436135374, "grad_norm": 2.5, "learning_rate": 4.826863551957846e-05, "loss": 0.8054, "step": 6872 }, { "epoch": 0.1218486463969653, "grad_norm": 2.765625, "learning_rate": 4.826761412823825e-05, "loss": 0.8259, "step": 6874 }, { "epoch": 0.12188409843257687, "grad_norm": 2.921875, "learning_rate": 4.8266592446523055e-05, "loss": 0.8487, "step": 6876 }, { "epoch": 0.12191955046818845, "grad_norm": 3.046875, "learning_rate": 4.8265570474445636e-05, "loss": 0.7975, "step": 6878 }, { "epoch": 0.12195500250380001, "grad_norm": 3.015625, "learning_rate": 4.826454821201875e-05, "loss": 0.7988, "step": 6880 }, { "epoch": 0.12199045453941158, "grad_norm": 2.578125, "learning_rate": 4.826352565925513e-05, "loss": 0.8201, "step": 6882 }, { "epoch": 0.12202590657502316, "grad_norm": 2.84375, "learning_rate": 4.826250281616757e-05, "loss": 0.8149, "step": 6884 }, { "epoch": 0.12206135861063472, "grad_norm": 2.84375, "learning_rate": 4.826147968276881e-05, "loss": 0.7627, "step": 6886 }, { "epoch": 0.12209681064624629, "grad_norm": 2.875, "learning_rate": 4.826045625907164e-05, "loss": 0.8573, "step": 6888 }, { "epoch": 0.12213226268185787, "grad_norm": 3.0625, "learning_rate": 4.825943254508881e-05, "loss": 0.8613, "step": 6890 }, { "epoch": 0.12216771471746943, "grad_norm": 2.5625, "learning_rate": 4.825840854083311e-05, "loss": 0.8319, "step": 6892 }, { "epoch": 0.122203166753081, "grad_norm": 2.8125, "learning_rate": 4.8257384246317316e-05, "loss": 0.827, "step": 6894 }, { "epoch": 0.12223861878869258, "grad_norm": 2.265625, "learning_rate": 4.8256359661554215e-05, "loss": 0.8295, "step": 6896 }, { "epoch": 0.12227407082430414, "grad_norm": 2.796875, "learning_rate": 4.825533478655658e-05, "loss": 0.8233, "step": 6898 }, { "epoch": 0.12230952285991571, "grad_norm": 2.640625, "learning_rate": 4.825430962133722e-05, "loss": 0.8086, "step": 6900 }, { "epoch": 0.12234497489552729, "grad_norm": 2.765625, "learning_rate": 4.825328416590891e-05, "loss": 0.7928, "step": 6902 }, { "epoch": 0.12238042693113885, "grad_norm": 2.640625, "learning_rate": 4.825225842028447e-05, "loss": 0.7984, "step": 6904 }, { "epoch": 0.12241587896675042, "grad_norm": 2.65625, "learning_rate": 4.8251232384476675e-05, "loss": 0.8349, "step": 6906 }, { "epoch": 0.122451331002362, "grad_norm": 2.8125, "learning_rate": 4.825020605849835e-05, "loss": 0.8235, "step": 6908 }, { "epoch": 0.12248678303797356, "grad_norm": 2.90625, "learning_rate": 4.8249179442362294e-05, "loss": 0.8255, "step": 6910 }, { "epoch": 0.12252223507358513, "grad_norm": 2.828125, "learning_rate": 4.824815253608132e-05, "loss": 0.842, "step": 6912 }, { "epoch": 0.1225576871091967, "grad_norm": 2.921875, "learning_rate": 4.8247125339668244e-05, "loss": 0.8267, "step": 6914 }, { "epoch": 0.12259313914480827, "grad_norm": 2.609375, "learning_rate": 4.824609785313589e-05, "loss": 0.8199, "step": 6916 }, { "epoch": 0.12262859118041984, "grad_norm": 2.875, "learning_rate": 4.824507007649708e-05, "loss": 0.8283, "step": 6918 }, { "epoch": 0.12266404321603142, "grad_norm": 2.703125, "learning_rate": 4.8244042009764625e-05, "loss": 0.8408, "step": 6920 }, { "epoch": 0.12269949525164298, "grad_norm": 2.859375, "learning_rate": 4.8243013652951374e-05, "loss": 0.8198, "step": 6922 }, { "epoch": 0.12273494728725455, "grad_norm": 2.71875, "learning_rate": 4.824198500607016e-05, "loss": 0.8309, "step": 6924 }, { "epoch": 0.12277039932286613, "grad_norm": 2.78125, "learning_rate": 4.82409560691338e-05, "loss": 0.7655, "step": 6926 }, { "epoch": 0.12280585135847769, "grad_norm": 3.0, "learning_rate": 4.823992684215516e-05, "loss": 0.8159, "step": 6928 }, { "epoch": 0.12284130339408926, "grad_norm": 3.15625, "learning_rate": 4.823889732514707e-05, "loss": 0.8199, "step": 6930 }, { "epoch": 0.12287675542970083, "grad_norm": 2.84375, "learning_rate": 4.823786751812238e-05, "loss": 0.8167, "step": 6932 }, { "epoch": 0.1229122074653124, "grad_norm": 3.046875, "learning_rate": 4.8236837421093946e-05, "loss": 0.7913, "step": 6934 }, { "epoch": 0.12294765950092397, "grad_norm": 2.625, "learning_rate": 4.823580703407462e-05, "loss": 0.8211, "step": 6936 }, { "epoch": 0.12298311153653554, "grad_norm": 2.609375, "learning_rate": 4.823477635707726e-05, "loss": 0.8176, "step": 6938 }, { "epoch": 0.1230185635721471, "grad_norm": 2.78125, "learning_rate": 4.8233745390114734e-05, "loss": 0.7938, "step": 6940 }, { "epoch": 0.12305401560775868, "grad_norm": 2.9375, "learning_rate": 4.82327141331999e-05, "loss": 0.8201, "step": 6942 }, { "epoch": 0.12308946764337025, "grad_norm": 2.765625, "learning_rate": 4.823168258634564e-05, "loss": 0.8104, "step": 6944 }, { "epoch": 0.12312491967898181, "grad_norm": 2.765625, "learning_rate": 4.823065074956481e-05, "loss": 0.7937, "step": 6946 }, { "epoch": 0.12316037171459339, "grad_norm": 2.625, "learning_rate": 4.822961862287031e-05, "loss": 0.8213, "step": 6948 }, { "epoch": 0.12319582375020496, "grad_norm": 2.640625, "learning_rate": 4.8228586206274996e-05, "loss": 0.8671, "step": 6950 }, { "epoch": 0.12323127578581652, "grad_norm": 2.65625, "learning_rate": 4.8227553499791774e-05, "loss": 0.8332, "step": 6952 }, { "epoch": 0.1232667278214281, "grad_norm": 2.703125, "learning_rate": 4.8226520503433515e-05, "loss": 0.7827, "step": 6954 }, { "epoch": 0.12330217985703967, "grad_norm": 2.796875, "learning_rate": 4.8225487217213114e-05, "loss": 0.8011, "step": 6956 }, { "epoch": 0.12333763189265123, "grad_norm": 2.71875, "learning_rate": 4.822445364114349e-05, "loss": 0.816, "step": 6958 }, { "epoch": 0.12337308392826281, "grad_norm": 2.546875, "learning_rate": 4.82234197752375e-05, "loss": 0.8316, "step": 6960 }, { "epoch": 0.12340853596387437, "grad_norm": 2.6875, "learning_rate": 4.822238561950808e-05, "loss": 0.8169, "step": 6962 }, { "epoch": 0.12344398799948594, "grad_norm": 2.875, "learning_rate": 4.8221351173968124e-05, "loss": 0.8094, "step": 6964 }, { "epoch": 0.12347944003509752, "grad_norm": 2.78125, "learning_rate": 4.822031643863053e-05, "loss": 0.8237, "step": 6966 }, { "epoch": 0.12351489207070908, "grad_norm": 2.640625, "learning_rate": 4.821928141350823e-05, "loss": 0.8092, "step": 6968 }, { "epoch": 0.12355034410632065, "grad_norm": 2.359375, "learning_rate": 4.821824609861414e-05, "loss": 0.8337, "step": 6970 }, { "epoch": 0.12358579614193223, "grad_norm": 2.75, "learning_rate": 4.821721049396117e-05, "loss": 0.8329, "step": 6972 }, { "epoch": 0.12362124817754379, "grad_norm": 2.65625, "learning_rate": 4.821617459956225e-05, "loss": 0.823, "step": 6974 }, { "epoch": 0.12365670021315536, "grad_norm": 2.703125, "learning_rate": 4.8215138415430313e-05, "loss": 0.8266, "step": 6976 }, { "epoch": 0.12369215224876694, "grad_norm": 3.015625, "learning_rate": 4.821410194157827e-05, "loss": 0.8009, "step": 6978 }, { "epoch": 0.1237276042843785, "grad_norm": 2.71875, "learning_rate": 4.821306517801908e-05, "loss": 0.8233, "step": 6980 }, { "epoch": 0.12376305631999007, "grad_norm": 2.765625, "learning_rate": 4.821202812476567e-05, "loss": 0.8174, "step": 6982 }, { "epoch": 0.12379850835560165, "grad_norm": 2.859375, "learning_rate": 4.821099078183098e-05, "loss": 0.8217, "step": 6984 }, { "epoch": 0.12383396039121321, "grad_norm": 2.609375, "learning_rate": 4.8209953149227966e-05, "loss": 0.7908, "step": 6986 }, { "epoch": 0.12386941242682478, "grad_norm": 2.640625, "learning_rate": 4.8208915226969566e-05, "loss": 0.8364, "step": 6988 }, { "epoch": 0.12390486446243636, "grad_norm": 2.953125, "learning_rate": 4.820787701506874e-05, "loss": 0.8028, "step": 6990 }, { "epoch": 0.12394031649804792, "grad_norm": 3.0, "learning_rate": 4.820683851353844e-05, "loss": 0.8089, "step": 6992 }, { "epoch": 0.12397576853365949, "grad_norm": 2.640625, "learning_rate": 4.820579972239163e-05, "loss": 0.8408, "step": 6994 }, { "epoch": 0.12401122056927107, "grad_norm": 2.96875, "learning_rate": 4.8204760641641275e-05, "loss": 0.8123, "step": 6996 }, { "epoch": 0.12404667260488263, "grad_norm": 2.671875, "learning_rate": 4.820372127130034e-05, "loss": 0.7928, "step": 6998 }, { "epoch": 0.1240821246404942, "grad_norm": 2.96875, "learning_rate": 4.8202681611381795e-05, "loss": 0.8467, "step": 7000 }, { "epoch": 0.12411757667610578, "grad_norm": 2.890625, "learning_rate": 4.8201641661898625e-05, "loss": 0.8613, "step": 7002 }, { "epoch": 0.12415302871171734, "grad_norm": 2.765625, "learning_rate": 4.820060142286379e-05, "loss": 0.8444, "step": 7004 }, { "epoch": 0.12418848074732891, "grad_norm": 2.59375, "learning_rate": 4.819956089429028e-05, "loss": 0.8008, "step": 7006 }, { "epoch": 0.12422393278294049, "grad_norm": 3.015625, "learning_rate": 4.8198520076191085e-05, "loss": 0.8038, "step": 7008 }, { "epoch": 0.12425938481855205, "grad_norm": 2.703125, "learning_rate": 4.819747896857919e-05, "loss": 0.8439, "step": 7010 }, { "epoch": 0.12429483685416362, "grad_norm": 2.671875, "learning_rate": 4.819643757146759e-05, "loss": 0.8163, "step": 7012 }, { "epoch": 0.1243302888897752, "grad_norm": 2.796875, "learning_rate": 4.819539588486929e-05, "loss": 0.8019, "step": 7014 }, { "epoch": 0.12436574092538676, "grad_norm": 2.484375, "learning_rate": 4.819435390879726e-05, "loss": 0.8193, "step": 7016 }, { "epoch": 0.12440119296099833, "grad_norm": 2.671875, "learning_rate": 4.8193311643264543e-05, "loss": 0.8196, "step": 7018 }, { "epoch": 0.1244366449966099, "grad_norm": 2.734375, "learning_rate": 4.819226908828412e-05, "loss": 0.8168, "step": 7020 }, { "epoch": 0.12447209703222147, "grad_norm": 2.703125, "learning_rate": 4.819122624386902e-05, "loss": 0.8314, "step": 7022 }, { "epoch": 0.12450754906783304, "grad_norm": 2.765625, "learning_rate": 4.819018311003223e-05, "loss": 0.7862, "step": 7024 }, { "epoch": 0.12454300110344461, "grad_norm": 3.046875, "learning_rate": 4.818913968678679e-05, "loss": 0.8643, "step": 7026 }, { "epoch": 0.12457845313905618, "grad_norm": 2.578125, "learning_rate": 4.818809597414572e-05, "loss": 0.8531, "step": 7028 }, { "epoch": 0.12461390517466775, "grad_norm": 2.5625, "learning_rate": 4.818705197212204e-05, "loss": 0.8179, "step": 7030 }, { "epoch": 0.12464935721027932, "grad_norm": 2.65625, "learning_rate": 4.818600768072878e-05, "loss": 0.786, "step": 7032 }, { "epoch": 0.12468480924589088, "grad_norm": 2.765625, "learning_rate": 4.818496309997898e-05, "loss": 0.7907, "step": 7034 }, { "epoch": 0.12472026128150246, "grad_norm": 2.671875, "learning_rate": 4.8183918229885664e-05, "loss": 0.7994, "step": 7036 }, { "epoch": 0.12475571331711403, "grad_norm": 2.78125, "learning_rate": 4.8182873070461874e-05, "loss": 0.8645, "step": 7038 }, { "epoch": 0.1247911653527256, "grad_norm": 2.84375, "learning_rate": 4.818182762172066e-05, "loss": 0.8562, "step": 7040 }, { "epoch": 0.12482661738833717, "grad_norm": 2.9375, "learning_rate": 4.818078188367506e-05, "loss": 0.8527, "step": 7042 }, { "epoch": 0.12486206942394873, "grad_norm": 3.46875, "learning_rate": 4.8179735856338144e-05, "loss": 0.8573, "step": 7044 }, { "epoch": 0.1248975214595603, "grad_norm": 3.046875, "learning_rate": 4.8178689539722946e-05, "loss": 0.8259, "step": 7046 }, { "epoch": 0.12493297349517188, "grad_norm": 2.421875, "learning_rate": 4.817764293384253e-05, "loss": 0.8099, "step": 7048 }, { "epoch": 0.12496842553078344, "grad_norm": 2.90625, "learning_rate": 4.817659603870995e-05, "loss": 0.8493, "step": 7050 }, { "epoch": 0.12500387756639503, "grad_norm": 3.203125, "learning_rate": 4.817554885433829e-05, "loss": 0.8322, "step": 7052 }, { "epoch": 0.1250393296020066, "grad_norm": 2.75, "learning_rate": 4.8174501380740605e-05, "loss": 0.8546, "step": 7054 }, { "epoch": 0.12507478163761815, "grad_norm": 2.78125, "learning_rate": 4.817345361792996e-05, "loss": 0.864, "step": 7056 }, { "epoch": 0.12511023367322974, "grad_norm": 2.671875, "learning_rate": 4.8172405565919456e-05, "loss": 0.7856, "step": 7058 }, { "epoch": 0.1251456857088413, "grad_norm": 2.8125, "learning_rate": 4.8171357224722144e-05, "loss": 0.8015, "step": 7060 }, { "epoch": 0.12518113774445286, "grad_norm": 2.65625, "learning_rate": 4.817030859435113e-05, "loss": 0.8243, "step": 7062 }, { "epoch": 0.12521658978006445, "grad_norm": 2.546875, "learning_rate": 4.816925967481949e-05, "loss": 0.796, "step": 7064 }, { "epoch": 0.125252041815676, "grad_norm": 2.609375, "learning_rate": 4.816821046614031e-05, "loss": 0.8522, "step": 7066 }, { "epoch": 0.12528749385128757, "grad_norm": 2.875, "learning_rate": 4.816716096832669e-05, "loss": 0.7957, "step": 7068 }, { "epoch": 0.12532294588689916, "grad_norm": 3.375, "learning_rate": 4.816611118139173e-05, "loss": 0.8612, "step": 7070 }, { "epoch": 0.12535839792251072, "grad_norm": 2.609375, "learning_rate": 4.816506110534852e-05, "loss": 0.8112, "step": 7072 }, { "epoch": 0.12539384995812228, "grad_norm": 2.828125, "learning_rate": 4.8164010740210176e-05, "loss": 0.7945, "step": 7074 }, { "epoch": 0.12542930199373387, "grad_norm": 2.5625, "learning_rate": 4.8162960085989806e-05, "loss": 0.8379, "step": 7076 }, { "epoch": 0.12546475402934543, "grad_norm": 2.796875, "learning_rate": 4.816190914270051e-05, "loss": 0.7771, "step": 7078 }, { "epoch": 0.125500206064957, "grad_norm": 2.625, "learning_rate": 4.816085791035543e-05, "loss": 0.7982, "step": 7080 }, { "epoch": 0.12553565810056858, "grad_norm": 3.046875, "learning_rate": 4.815980638896765e-05, "loss": 0.8187, "step": 7082 }, { "epoch": 0.12557111013618014, "grad_norm": 3.0, "learning_rate": 4.8158754578550315e-05, "loss": 0.8395, "step": 7084 }, { "epoch": 0.1256065621717917, "grad_norm": 2.609375, "learning_rate": 4.815770247911655e-05, "loss": 0.8151, "step": 7086 }, { "epoch": 0.12564201420740326, "grad_norm": 2.921875, "learning_rate": 4.815665009067948e-05, "loss": 0.838, "step": 7088 }, { "epoch": 0.12567746624301485, "grad_norm": 2.828125, "learning_rate": 4.815559741325223e-05, "loss": 0.8211, "step": 7090 }, { "epoch": 0.1257129182786264, "grad_norm": 2.796875, "learning_rate": 4.815454444684796e-05, "loss": 0.8336, "step": 7092 }, { "epoch": 0.12574837031423797, "grad_norm": 2.921875, "learning_rate": 4.8153491191479795e-05, "loss": 0.8184, "step": 7094 }, { "epoch": 0.12578382234984956, "grad_norm": 2.484375, "learning_rate": 4.8152437647160884e-05, "loss": 0.8088, "step": 7096 }, { "epoch": 0.12581927438546112, "grad_norm": 2.875, "learning_rate": 4.815138381390437e-05, "loss": 0.8312, "step": 7098 }, { "epoch": 0.12585472642107268, "grad_norm": 2.390625, "learning_rate": 4.81503296917234e-05, "loss": 0.7815, "step": 7100 }, { "epoch": 0.12589017845668427, "grad_norm": 2.71875, "learning_rate": 4.814927528063116e-05, "loss": 0.8262, "step": 7102 }, { "epoch": 0.12592563049229583, "grad_norm": 2.609375, "learning_rate": 4.814822058064077e-05, "loss": 0.8283, "step": 7104 }, { "epoch": 0.1259610825279074, "grad_norm": 2.703125, "learning_rate": 4.814716559176541e-05, "loss": 0.8609, "step": 7106 }, { "epoch": 0.12599653456351897, "grad_norm": 2.84375, "learning_rate": 4.8146110314018245e-05, "loss": 0.8504, "step": 7108 }, { "epoch": 0.12603198659913054, "grad_norm": 2.796875, "learning_rate": 4.814505474741244e-05, "loss": 0.8211, "step": 7110 }, { "epoch": 0.1260674386347421, "grad_norm": 2.828125, "learning_rate": 4.814399889196119e-05, "loss": 0.8408, "step": 7112 }, { "epoch": 0.12610289067035368, "grad_norm": 2.515625, "learning_rate": 4.8142942747677634e-05, "loss": 0.8322, "step": 7114 }, { "epoch": 0.12613834270596525, "grad_norm": 2.75, "learning_rate": 4.814188631457498e-05, "loss": 0.79, "step": 7116 }, { "epoch": 0.1261737947415768, "grad_norm": 2.96875, "learning_rate": 4.81408295926664e-05, "loss": 0.8469, "step": 7118 }, { "epoch": 0.1262092467771884, "grad_norm": 2.9375, "learning_rate": 4.81397725819651e-05, "loss": 0.8656, "step": 7120 }, { "epoch": 0.12624469881279995, "grad_norm": 2.5625, "learning_rate": 4.813871528248425e-05, "loss": 0.8063, "step": 7122 }, { "epoch": 0.12628015084841152, "grad_norm": 2.765625, "learning_rate": 4.813765769423705e-05, "loss": 0.7747, "step": 7124 }, { "epoch": 0.1263156028840231, "grad_norm": 2.84375, "learning_rate": 4.8136599817236706e-05, "loss": 0.8538, "step": 7126 }, { "epoch": 0.12635105491963466, "grad_norm": 2.421875, "learning_rate": 4.8135541651496414e-05, "loss": 0.7959, "step": 7128 }, { "epoch": 0.12638650695524623, "grad_norm": 2.84375, "learning_rate": 4.8134483197029376e-05, "loss": 0.7977, "step": 7130 }, { "epoch": 0.1264219589908578, "grad_norm": 2.65625, "learning_rate": 4.813342445384881e-05, "loss": 0.8197, "step": 7132 }, { "epoch": 0.12645741102646937, "grad_norm": 2.765625, "learning_rate": 4.8132365421967926e-05, "loss": 0.8116, "step": 7134 }, { "epoch": 0.12649286306208093, "grad_norm": 2.671875, "learning_rate": 4.813130610139994e-05, "loss": 0.8418, "step": 7136 }, { "epoch": 0.12652831509769252, "grad_norm": 2.6875, "learning_rate": 4.813024649215807e-05, "loss": 0.8424, "step": 7138 }, { "epoch": 0.12656376713330408, "grad_norm": 2.734375, "learning_rate": 4.812918659425555e-05, "loss": 0.7927, "step": 7140 }, { "epoch": 0.12659921916891564, "grad_norm": 2.796875, "learning_rate": 4.812812640770559e-05, "loss": 0.8142, "step": 7142 }, { "epoch": 0.12663467120452723, "grad_norm": 2.546875, "learning_rate": 4.8127065932521434e-05, "loss": 0.8158, "step": 7144 }, { "epoch": 0.1266701232401388, "grad_norm": 2.765625, "learning_rate": 4.8126005168716305e-05, "loss": 0.853, "step": 7146 }, { "epoch": 0.12670557527575035, "grad_norm": 2.75, "learning_rate": 4.812494411630345e-05, "loss": 0.844, "step": 7148 }, { "epoch": 0.12674102731136194, "grad_norm": 2.765625, "learning_rate": 4.8123882775296113e-05, "loss": 0.84, "step": 7150 }, { "epoch": 0.1267764793469735, "grad_norm": 2.59375, "learning_rate": 4.812282114570753e-05, "loss": 0.8119, "step": 7152 }, { "epoch": 0.12681193138258506, "grad_norm": 2.96875, "learning_rate": 4.812175922755096e-05, "loss": 0.8149, "step": 7154 }, { "epoch": 0.12684738341819665, "grad_norm": 2.9375, "learning_rate": 4.812069702083965e-05, "loss": 0.8097, "step": 7156 }, { "epoch": 0.1268828354538082, "grad_norm": 2.625, "learning_rate": 4.8119634525586856e-05, "loss": 0.8152, "step": 7158 }, { "epoch": 0.12691828748941977, "grad_norm": 3.015625, "learning_rate": 4.811857174180584e-05, "loss": 0.8176, "step": 7160 }, { "epoch": 0.12695373952503136, "grad_norm": 2.546875, "learning_rate": 4.811750866950986e-05, "loss": 0.804, "step": 7162 }, { "epoch": 0.12698919156064292, "grad_norm": 2.640625, "learning_rate": 4.81164453087122e-05, "loss": 0.8052, "step": 7164 }, { "epoch": 0.12702464359625448, "grad_norm": 2.796875, "learning_rate": 4.8115381659426105e-05, "loss": 0.8712, "step": 7166 }, { "epoch": 0.12706009563186607, "grad_norm": 2.421875, "learning_rate": 4.811431772166486e-05, "loss": 0.8033, "step": 7168 }, { "epoch": 0.12709554766747763, "grad_norm": 3.109375, "learning_rate": 4.8113253495441745e-05, "loss": 0.8239, "step": 7170 }, { "epoch": 0.1271309997030892, "grad_norm": 2.875, "learning_rate": 4.811218898077005e-05, "loss": 0.8461, "step": 7172 }, { "epoch": 0.12716645173870078, "grad_norm": 2.640625, "learning_rate": 4.811112417766304e-05, "loss": 0.8235, "step": 7174 }, { "epoch": 0.12720190377431234, "grad_norm": 2.828125, "learning_rate": 4.811005908613402e-05, "loss": 0.8135, "step": 7176 }, { "epoch": 0.1272373558099239, "grad_norm": 2.640625, "learning_rate": 4.810899370619627e-05, "loss": 0.8182, "step": 7178 }, { "epoch": 0.1272728078455355, "grad_norm": 2.84375, "learning_rate": 4.81079280378631e-05, "loss": 0.812, "step": 7180 }, { "epoch": 0.12730825988114705, "grad_norm": 2.875, "learning_rate": 4.81068620811478e-05, "loss": 0.8564, "step": 7182 }, { "epoch": 0.1273437119167586, "grad_norm": 2.953125, "learning_rate": 4.810579583606367e-05, "loss": 0.8287, "step": 7184 }, { "epoch": 0.1273791639523702, "grad_norm": 2.828125, "learning_rate": 4.810472930262402e-05, "loss": 0.8113, "step": 7186 }, { "epoch": 0.12741461598798176, "grad_norm": 2.90625, "learning_rate": 4.810366248084216e-05, "loss": 0.8531, "step": 7188 }, { "epoch": 0.12745006802359332, "grad_norm": 2.5625, "learning_rate": 4.810259537073141e-05, "loss": 0.7819, "step": 7190 }, { "epoch": 0.1274855200592049, "grad_norm": 3.015625, "learning_rate": 4.8101527972305075e-05, "loss": 0.8039, "step": 7192 }, { "epoch": 0.12752097209481647, "grad_norm": 2.984375, "learning_rate": 4.810046028557649e-05, "loss": 0.8555, "step": 7194 }, { "epoch": 0.12755642413042803, "grad_norm": 2.890625, "learning_rate": 4.8099392310558966e-05, "loss": 0.8321, "step": 7196 }, { "epoch": 0.12759187616603962, "grad_norm": 2.828125, "learning_rate": 4.809832404726584e-05, "loss": 0.8268, "step": 7198 }, { "epoch": 0.12762732820165118, "grad_norm": 2.671875, "learning_rate": 4.8097255495710435e-05, "loss": 0.845, "step": 7200 }, { "epoch": 0.12766278023726274, "grad_norm": 2.8125, "learning_rate": 4.80961866559061e-05, "loss": 0.8416, "step": 7202 }, { "epoch": 0.12769823227287433, "grad_norm": 3.015625, "learning_rate": 4.809511752786616e-05, "loss": 0.8385, "step": 7204 }, { "epoch": 0.1277336843084859, "grad_norm": 2.359375, "learning_rate": 4.809404811160397e-05, "loss": 0.7694, "step": 7206 }, { "epoch": 0.12776913634409745, "grad_norm": 2.734375, "learning_rate": 4.809297840713287e-05, "loss": 0.8489, "step": 7208 }, { "epoch": 0.12780458837970904, "grad_norm": 2.546875, "learning_rate": 4.8091908414466206e-05, "loss": 0.88, "step": 7210 }, { "epoch": 0.1278400404153206, "grad_norm": 2.71875, "learning_rate": 4.8090838133617334e-05, "loss": 0.8352, "step": 7212 }, { "epoch": 0.12787549245093216, "grad_norm": 2.765625, "learning_rate": 4.808976756459961e-05, "loss": 0.8255, "step": 7214 }, { "epoch": 0.12791094448654375, "grad_norm": 2.546875, "learning_rate": 4.80886967074264e-05, "loss": 0.8162, "step": 7216 }, { "epoch": 0.1279463965221553, "grad_norm": 2.625, "learning_rate": 4.808762556211106e-05, "loss": 0.823, "step": 7218 }, { "epoch": 0.12798184855776687, "grad_norm": 2.625, "learning_rate": 4.808655412866697e-05, "loss": 0.7868, "step": 7220 }, { "epoch": 0.12801730059337846, "grad_norm": 2.734375, "learning_rate": 4.8085482407107483e-05, "loss": 0.8302, "step": 7222 }, { "epoch": 0.12805275262899002, "grad_norm": 2.6875, "learning_rate": 4.808441039744599e-05, "loss": 0.7836, "step": 7224 }, { "epoch": 0.12808820466460158, "grad_norm": 3.015625, "learning_rate": 4.8083338099695864e-05, "loss": 0.8161, "step": 7226 }, { "epoch": 0.12812365670021317, "grad_norm": 2.703125, "learning_rate": 4.8082265513870484e-05, "loss": 0.7852, "step": 7228 }, { "epoch": 0.12815910873582473, "grad_norm": 2.78125, "learning_rate": 4.808119263998324e-05, "loss": 0.8264, "step": 7230 }, { "epoch": 0.1281945607714363, "grad_norm": 2.546875, "learning_rate": 4.808011947804751e-05, "loss": 0.7741, "step": 7232 }, { "epoch": 0.12823001280704788, "grad_norm": 3.140625, "learning_rate": 4.807904602807671e-05, "loss": 0.8024, "step": 7234 }, { "epoch": 0.12826546484265944, "grad_norm": 2.671875, "learning_rate": 4.807797229008422e-05, "loss": 0.8464, "step": 7236 }, { "epoch": 0.128300916878271, "grad_norm": 2.765625, "learning_rate": 4.8076898264083435e-05, "loss": 0.8072, "step": 7238 }, { "epoch": 0.1283363689138826, "grad_norm": 2.875, "learning_rate": 4.8075823950087774e-05, "loss": 0.784, "step": 7240 }, { "epoch": 0.12837182094949415, "grad_norm": 3.078125, "learning_rate": 4.807474934811063e-05, "loss": 0.8259, "step": 7242 }, { "epoch": 0.1284072729851057, "grad_norm": 2.828125, "learning_rate": 4.8073674458165416e-05, "loss": 0.7873, "step": 7244 }, { "epoch": 0.1284427250207173, "grad_norm": 2.734375, "learning_rate": 4.8072599280265565e-05, "loss": 0.8289, "step": 7246 }, { "epoch": 0.12847817705632886, "grad_norm": 2.765625, "learning_rate": 4.807152381442447e-05, "loss": 0.8145, "step": 7248 }, { "epoch": 0.12851362909194042, "grad_norm": 2.84375, "learning_rate": 4.807044806065557e-05, "loss": 0.8007, "step": 7250 }, { "epoch": 0.128549081127552, "grad_norm": 2.71875, "learning_rate": 4.806937201897228e-05, "loss": 0.8343, "step": 7252 }, { "epoch": 0.12858453316316357, "grad_norm": 2.875, "learning_rate": 4.8068295689388035e-05, "loss": 0.8283, "step": 7254 }, { "epoch": 0.12861998519877513, "grad_norm": 2.828125, "learning_rate": 4.806721907191626e-05, "loss": 0.8135, "step": 7256 }, { "epoch": 0.1286554372343867, "grad_norm": 2.625, "learning_rate": 4.8066142166570397e-05, "loss": 0.8934, "step": 7258 }, { "epoch": 0.12869088926999828, "grad_norm": 2.546875, "learning_rate": 4.806506497336388e-05, "loss": 0.8221, "step": 7260 }, { "epoch": 0.12872634130560984, "grad_norm": 2.953125, "learning_rate": 4.8063987492310156e-05, "loss": 0.8494, "step": 7262 }, { "epoch": 0.1287617933412214, "grad_norm": 2.578125, "learning_rate": 4.806290972342268e-05, "loss": 0.7975, "step": 7264 }, { "epoch": 0.128797245376833, "grad_norm": 2.578125, "learning_rate": 4.806183166671489e-05, "loss": 0.801, "step": 7266 }, { "epoch": 0.12883269741244455, "grad_norm": 2.6875, "learning_rate": 4.8060753322200244e-05, "loss": 0.8517, "step": 7268 }, { "epoch": 0.1288681494480561, "grad_norm": 2.828125, "learning_rate": 4.80596746898922e-05, "loss": 0.8417, "step": 7270 }, { "epoch": 0.1289036014836677, "grad_norm": 2.671875, "learning_rate": 4.8058595769804224e-05, "loss": 0.8524, "step": 7272 }, { "epoch": 0.12893905351927926, "grad_norm": 2.671875, "learning_rate": 4.805751656194977e-05, "loss": 0.817, "step": 7274 }, { "epoch": 0.12897450555489082, "grad_norm": 2.640625, "learning_rate": 4.8056437066342315e-05, "loss": 0.8042, "step": 7276 }, { "epoch": 0.1290099575905024, "grad_norm": 2.625, "learning_rate": 4.805535728299533e-05, "loss": 0.8078, "step": 7278 }, { "epoch": 0.12904540962611397, "grad_norm": 2.765625, "learning_rate": 4.805427721192228e-05, "loss": 0.8814, "step": 7280 }, { "epoch": 0.12908086166172553, "grad_norm": 2.765625, "learning_rate": 4.805319685313666e-05, "loss": 0.8336, "step": 7282 }, { "epoch": 0.12911631369733712, "grad_norm": 2.828125, "learning_rate": 4.805211620665194e-05, "loss": 0.8131, "step": 7284 }, { "epoch": 0.12915176573294868, "grad_norm": 2.828125, "learning_rate": 4.805103527248161e-05, "loss": 0.8335, "step": 7286 }, { "epoch": 0.12918721776856024, "grad_norm": 2.703125, "learning_rate": 4.804995405063916e-05, "loss": 0.8351, "step": 7288 }, { "epoch": 0.12922266980417182, "grad_norm": 2.859375, "learning_rate": 4.804887254113809e-05, "loss": 0.8125, "step": 7290 }, { "epoch": 0.12925812183978339, "grad_norm": 2.75, "learning_rate": 4.804779074399189e-05, "loss": 0.8221, "step": 7292 }, { "epoch": 0.12929357387539495, "grad_norm": 3.0, "learning_rate": 4.8046708659214054e-05, "loss": 0.8297, "step": 7294 }, { "epoch": 0.12932902591100653, "grad_norm": 2.6875, "learning_rate": 4.8045626286818106e-05, "loss": 0.8106, "step": 7296 }, { "epoch": 0.1293644779466181, "grad_norm": 2.484375, "learning_rate": 4.804454362681754e-05, "loss": 0.8042, "step": 7298 }, { "epoch": 0.12939992998222966, "grad_norm": 2.9375, "learning_rate": 4.804346067922587e-05, "loss": 0.821, "step": 7300 }, { "epoch": 0.12943538201784124, "grad_norm": 3.046875, "learning_rate": 4.80423774440566e-05, "loss": 0.7792, "step": 7302 }, { "epoch": 0.1294708340534528, "grad_norm": 2.671875, "learning_rate": 4.804129392132327e-05, "loss": 0.8335, "step": 7304 }, { "epoch": 0.12950628608906437, "grad_norm": 2.6875, "learning_rate": 4.804021011103939e-05, "loss": 0.8099, "step": 7306 }, { "epoch": 0.12954173812467595, "grad_norm": 2.734375, "learning_rate": 4.8039126013218474e-05, "loss": 0.8458, "step": 7308 }, { "epoch": 0.12957719016028751, "grad_norm": 2.671875, "learning_rate": 4.803804162787408e-05, "loss": 0.8252, "step": 7310 }, { "epoch": 0.12961264219589907, "grad_norm": 2.953125, "learning_rate": 4.803695695501972e-05, "loss": 0.8265, "step": 7312 }, { "epoch": 0.12964809423151066, "grad_norm": 2.734375, "learning_rate": 4.803587199466893e-05, "loss": 0.8698, "step": 7314 }, { "epoch": 0.12968354626712222, "grad_norm": 2.71875, "learning_rate": 4.8034786746835256e-05, "loss": 0.8338, "step": 7316 }, { "epoch": 0.12971899830273378, "grad_norm": 2.453125, "learning_rate": 4.803370121153225e-05, "loss": 0.861, "step": 7318 }, { "epoch": 0.12975445033834537, "grad_norm": 2.734375, "learning_rate": 4.8032615388773445e-05, "loss": 0.8264, "step": 7320 }, { "epoch": 0.12978990237395693, "grad_norm": 2.5625, "learning_rate": 4.8031529278572394e-05, "loss": 0.839, "step": 7322 }, { "epoch": 0.1298253544095685, "grad_norm": 2.828125, "learning_rate": 4.803044288094266e-05, "loss": 0.8518, "step": 7324 }, { "epoch": 0.12986080644518008, "grad_norm": 2.90625, "learning_rate": 4.802935619589779e-05, "loss": 0.8324, "step": 7326 }, { "epoch": 0.12989625848079164, "grad_norm": 2.859375, "learning_rate": 4.802826922345136e-05, "loss": 0.7853, "step": 7328 }, { "epoch": 0.1299317105164032, "grad_norm": 2.921875, "learning_rate": 4.802718196361692e-05, "loss": 0.8865, "step": 7330 }, { "epoch": 0.1299671625520148, "grad_norm": 2.796875, "learning_rate": 4.802609441640805e-05, "loss": 0.8392, "step": 7332 }, { "epoch": 0.13000261458762635, "grad_norm": 3.0, "learning_rate": 4.8025006581838314e-05, "loss": 0.8358, "step": 7334 }, { "epoch": 0.1300380666232379, "grad_norm": 2.78125, "learning_rate": 4.802391845992129e-05, "loss": 0.8317, "step": 7336 }, { "epoch": 0.1300735186588495, "grad_norm": 2.671875, "learning_rate": 4.802283005067057e-05, "loss": 0.8186, "step": 7338 }, { "epoch": 0.13010897069446106, "grad_norm": 2.75, "learning_rate": 4.802174135409971e-05, "loss": 0.8265, "step": 7340 }, { "epoch": 0.13014442273007262, "grad_norm": 2.421875, "learning_rate": 4.802065237022233e-05, "loss": 0.822, "step": 7342 }, { "epoch": 0.1301798747656842, "grad_norm": 2.765625, "learning_rate": 4.801956309905199e-05, "loss": 0.8397, "step": 7344 }, { "epoch": 0.13021532680129577, "grad_norm": 2.765625, "learning_rate": 4.80184735406023e-05, "loss": 0.7997, "step": 7346 }, { "epoch": 0.13025077883690733, "grad_norm": 2.71875, "learning_rate": 4.8017383694886855e-05, "loss": 0.7548, "step": 7348 }, { "epoch": 0.13028623087251892, "grad_norm": 3.078125, "learning_rate": 4.8016293561919256e-05, "loss": 0.85, "step": 7350 }, { "epoch": 0.13032168290813048, "grad_norm": 2.90625, "learning_rate": 4.8015203141713114e-05, "loss": 0.8169, "step": 7352 }, { "epoch": 0.13035713494374204, "grad_norm": 2.640625, "learning_rate": 4.801411243428202e-05, "loss": 0.7821, "step": 7354 }, { "epoch": 0.13039258697935363, "grad_norm": 2.6875, "learning_rate": 4.80130214396396e-05, "loss": 0.819, "step": 7356 }, { "epoch": 0.1304280390149652, "grad_norm": 2.890625, "learning_rate": 4.801193015779947e-05, "loss": 0.8274, "step": 7358 }, { "epoch": 0.13046349105057675, "grad_norm": 2.734375, "learning_rate": 4.801083858877524e-05, "loss": 0.8147, "step": 7360 }, { "epoch": 0.13049894308618834, "grad_norm": 2.765625, "learning_rate": 4.800974673258054e-05, "loss": 0.8461, "step": 7362 }, { "epoch": 0.1305343951217999, "grad_norm": 2.78125, "learning_rate": 4.8008654589228984e-05, "loss": 0.8115, "step": 7364 }, { "epoch": 0.13056984715741146, "grad_norm": 2.640625, "learning_rate": 4.800756215873422e-05, "loss": 0.7553, "step": 7366 }, { "epoch": 0.13060529919302305, "grad_norm": 2.640625, "learning_rate": 4.8006469441109874e-05, "loss": 0.7717, "step": 7368 }, { "epoch": 0.1306407512286346, "grad_norm": 2.609375, "learning_rate": 4.8005376436369576e-05, "loss": 0.7747, "step": 7370 }, { "epoch": 0.13067620326424617, "grad_norm": 2.9375, "learning_rate": 4.800428314452697e-05, "loss": 0.7978, "step": 7372 }, { "epoch": 0.13071165529985776, "grad_norm": 3.15625, "learning_rate": 4.800318956559571e-05, "loss": 0.8016, "step": 7374 }, { "epoch": 0.13074710733546932, "grad_norm": 2.703125, "learning_rate": 4.800209569958943e-05, "loss": 0.8473, "step": 7376 }, { "epoch": 0.13078255937108088, "grad_norm": 2.921875, "learning_rate": 4.80010015465218e-05, "loss": 0.8325, "step": 7378 }, { "epoch": 0.13081801140669247, "grad_norm": 2.671875, "learning_rate": 4.799990710640645e-05, "loss": 0.8087, "step": 7380 }, { "epoch": 0.13085346344230403, "grad_norm": 2.53125, "learning_rate": 4.799881237925704e-05, "loss": 0.7808, "step": 7382 }, { "epoch": 0.1308889154779156, "grad_norm": 2.734375, "learning_rate": 4.799771736508725e-05, "loss": 0.8534, "step": 7384 }, { "epoch": 0.13092436751352718, "grad_norm": 2.6875, "learning_rate": 4.7996622063910744e-05, "loss": 0.7919, "step": 7386 }, { "epoch": 0.13095981954913874, "grad_norm": 3.25, "learning_rate": 4.7995526475741174e-05, "loss": 0.8613, "step": 7388 }, { "epoch": 0.1309952715847503, "grad_norm": 2.703125, "learning_rate": 4.799443060059223e-05, "loss": 0.8076, "step": 7390 }, { "epoch": 0.1310307236203619, "grad_norm": 2.734375, "learning_rate": 4.7993334438477576e-05, "loss": 0.8378, "step": 7392 }, { "epoch": 0.13106617565597345, "grad_norm": 2.625, "learning_rate": 4.7992237989410904e-05, "loss": 0.7977, "step": 7394 }, { "epoch": 0.131101627691585, "grad_norm": 2.65625, "learning_rate": 4.799114125340589e-05, "loss": 0.8135, "step": 7396 }, { "epoch": 0.1311370797271966, "grad_norm": 3.015625, "learning_rate": 4.7990044230476215e-05, "loss": 0.8572, "step": 7398 }, { "epoch": 0.13117253176280816, "grad_norm": 2.796875, "learning_rate": 4.798894692063559e-05, "loss": 0.8426, "step": 7400 }, { "epoch": 0.13120798379841972, "grad_norm": 2.71875, "learning_rate": 4.798784932389768e-05, "loss": 0.8142, "step": 7402 }, { "epoch": 0.1312434358340313, "grad_norm": 2.765625, "learning_rate": 4.798675144027621e-05, "loss": 0.845, "step": 7404 }, { "epoch": 0.13127888786964287, "grad_norm": 2.78125, "learning_rate": 4.798565326978486e-05, "loss": 0.7994, "step": 7406 }, { "epoch": 0.13131433990525443, "grad_norm": 2.734375, "learning_rate": 4.798455481243735e-05, "loss": 0.8078, "step": 7408 }, { "epoch": 0.13134979194086602, "grad_norm": 2.25, "learning_rate": 4.798345606824739e-05, "loss": 0.7955, "step": 7410 }, { "epoch": 0.13138524397647758, "grad_norm": 2.78125, "learning_rate": 4.7982357037228676e-05, "loss": 0.7976, "step": 7412 }, { "epoch": 0.13142069601208914, "grad_norm": 2.609375, "learning_rate": 4.798125771939493e-05, "loss": 0.8133, "step": 7414 }, { "epoch": 0.13145614804770073, "grad_norm": 2.8125, "learning_rate": 4.798015811475989e-05, "loss": 0.7703, "step": 7416 }, { "epoch": 0.1314916000833123, "grad_norm": 2.96875, "learning_rate": 4.7979058223337246e-05, "loss": 0.8613, "step": 7418 }, { "epoch": 0.13152705211892385, "grad_norm": 2.75, "learning_rate": 4.797795804514075e-05, "loss": 0.81, "step": 7420 }, { "epoch": 0.13156250415453544, "grad_norm": 2.71875, "learning_rate": 4.797685758018413e-05, "loss": 0.7829, "step": 7422 }, { "epoch": 0.131597956190147, "grad_norm": 3.046875, "learning_rate": 4.79757568284811e-05, "loss": 0.8287, "step": 7424 }, { "epoch": 0.13163340822575856, "grad_norm": 2.84375, "learning_rate": 4.797465579004542e-05, "loss": 0.8734, "step": 7426 }, { "epoch": 0.13166886026137012, "grad_norm": 2.84375, "learning_rate": 4.797355446489081e-05, "loss": 0.8128, "step": 7428 }, { "epoch": 0.1317043122969817, "grad_norm": 3.015625, "learning_rate": 4.7972452853031035e-05, "loss": 0.7994, "step": 7430 }, { "epoch": 0.13173976433259327, "grad_norm": 2.71875, "learning_rate": 4.797135095447983e-05, "loss": 0.7838, "step": 7432 }, { "epoch": 0.13177521636820483, "grad_norm": 2.65625, "learning_rate": 4.797024876925095e-05, "loss": 0.7476, "step": 7434 }, { "epoch": 0.13181066840381642, "grad_norm": 2.71875, "learning_rate": 4.796914629735815e-05, "loss": 0.8098, "step": 7436 }, { "epoch": 0.13184612043942798, "grad_norm": 2.625, "learning_rate": 4.796804353881519e-05, "loss": 0.8402, "step": 7438 }, { "epoch": 0.13188157247503954, "grad_norm": 2.703125, "learning_rate": 4.7966940493635825e-05, "loss": 0.8256, "step": 7440 }, { "epoch": 0.13191702451065113, "grad_norm": 2.859375, "learning_rate": 4.796583716183383e-05, "loss": 0.8504, "step": 7442 }, { "epoch": 0.1319524765462627, "grad_norm": 2.90625, "learning_rate": 4.7964733543422975e-05, "loss": 0.7709, "step": 7444 }, { "epoch": 0.13198792858187425, "grad_norm": 2.609375, "learning_rate": 4.7963629638417015e-05, "loss": 0.7819, "step": 7446 }, { "epoch": 0.13202338061748584, "grad_norm": 2.53125, "learning_rate": 4.7962525446829757e-05, "loss": 0.825, "step": 7448 }, { "epoch": 0.1320588326530974, "grad_norm": 2.6875, "learning_rate": 4.7961420968674955e-05, "loss": 0.8014, "step": 7450 }, { "epoch": 0.13209428468870896, "grad_norm": 2.5, "learning_rate": 4.796031620396641e-05, "loss": 0.7856, "step": 7452 }, { "epoch": 0.13212973672432055, "grad_norm": 2.453125, "learning_rate": 4.795921115271789e-05, "loss": 0.8327, "step": 7454 }, { "epoch": 0.1321651887599321, "grad_norm": 2.703125, "learning_rate": 4.79581058149432e-05, "loss": 0.8167, "step": 7456 }, { "epoch": 0.13220064079554367, "grad_norm": 2.765625, "learning_rate": 4.7957000190656134e-05, "loss": 0.7662, "step": 7458 }, { "epoch": 0.13223609283115526, "grad_norm": 2.921875, "learning_rate": 4.7955894279870483e-05, "loss": 0.8741, "step": 7460 }, { "epoch": 0.13227154486676682, "grad_norm": 2.5625, "learning_rate": 4.7954788082600055e-05, "loss": 0.808, "step": 7462 }, { "epoch": 0.13230699690237838, "grad_norm": 2.609375, "learning_rate": 4.795368159885866e-05, "loss": 0.7918, "step": 7464 }, { "epoch": 0.13234244893798996, "grad_norm": 2.78125, "learning_rate": 4.7952574828660086e-05, "loss": 0.8284, "step": 7466 }, { "epoch": 0.13237790097360153, "grad_norm": 2.796875, "learning_rate": 4.7951467772018164e-05, "loss": 0.8582, "step": 7468 }, { "epoch": 0.1324133530092131, "grad_norm": 2.5625, "learning_rate": 4.7950360428946705e-05, "loss": 0.8033, "step": 7470 }, { "epoch": 0.13244880504482467, "grad_norm": 3.03125, "learning_rate": 4.794925279945953e-05, "loss": 0.8375, "step": 7472 }, { "epoch": 0.13248425708043624, "grad_norm": 2.90625, "learning_rate": 4.794814488357046e-05, "loss": 0.8369, "step": 7474 }, { "epoch": 0.1325197091160478, "grad_norm": 2.609375, "learning_rate": 4.7947036681293325e-05, "loss": 0.787, "step": 7476 }, { "epoch": 0.13255516115165938, "grad_norm": 2.953125, "learning_rate": 4.7945928192641944e-05, "loss": 0.8105, "step": 7478 }, { "epoch": 0.13259061318727094, "grad_norm": 2.90625, "learning_rate": 4.7944819417630165e-05, "loss": 0.8264, "step": 7480 }, { "epoch": 0.1326260652228825, "grad_norm": 2.671875, "learning_rate": 4.7943710356271816e-05, "loss": 0.8561, "step": 7482 }, { "epoch": 0.1326615172584941, "grad_norm": 2.90625, "learning_rate": 4.794260100858074e-05, "loss": 0.7978, "step": 7484 }, { "epoch": 0.13269696929410565, "grad_norm": 2.921875, "learning_rate": 4.794149137457078e-05, "loss": 0.8171, "step": 7486 }, { "epoch": 0.13273242132971722, "grad_norm": 2.671875, "learning_rate": 4.794038145425579e-05, "loss": 0.8441, "step": 7488 }, { "epoch": 0.1327678733653288, "grad_norm": 2.859375, "learning_rate": 4.793927124764962e-05, "loss": 0.7825, "step": 7490 }, { "epoch": 0.13280332540094036, "grad_norm": 2.546875, "learning_rate": 4.7938160754766114e-05, "loss": 0.837, "step": 7492 }, { "epoch": 0.13283877743655192, "grad_norm": 2.71875, "learning_rate": 4.793704997561915e-05, "loss": 0.8297, "step": 7494 }, { "epoch": 0.1328742294721635, "grad_norm": 2.578125, "learning_rate": 4.793593891022257e-05, "loss": 0.7936, "step": 7496 }, { "epoch": 0.13290968150777507, "grad_norm": 2.984375, "learning_rate": 4.793482755859026e-05, "loss": 0.8175, "step": 7498 }, { "epoch": 0.13294513354338663, "grad_norm": 2.84375, "learning_rate": 4.793371592073607e-05, "loss": 0.826, "step": 7500 }, { "epoch": 0.13298058557899822, "grad_norm": 2.703125, "learning_rate": 4.793260399667388e-05, "loss": 0.7818, "step": 7502 }, { "epoch": 0.13301603761460978, "grad_norm": 2.90625, "learning_rate": 4.793149178641758e-05, "loss": 0.8198, "step": 7504 }, { "epoch": 0.13305148965022134, "grad_norm": 2.765625, "learning_rate": 4.793037928998103e-05, "loss": 0.8036, "step": 7506 }, { "epoch": 0.13308694168583293, "grad_norm": 2.75, "learning_rate": 4.7929266507378125e-05, "loss": 0.8362, "step": 7508 }, { "epoch": 0.1331223937214445, "grad_norm": 3.0, "learning_rate": 4.792815343862275e-05, "loss": 0.8377, "step": 7510 }, { "epoch": 0.13315784575705605, "grad_norm": 2.84375, "learning_rate": 4.792704008372879e-05, "loss": 0.7947, "step": 7512 }, { "epoch": 0.13319329779266764, "grad_norm": 2.96875, "learning_rate": 4.792592644271015e-05, "loss": 0.7883, "step": 7514 }, { "epoch": 0.1332287498282792, "grad_norm": 2.75, "learning_rate": 4.792481251558073e-05, "loss": 0.8371, "step": 7516 }, { "epoch": 0.13326420186389076, "grad_norm": 2.671875, "learning_rate": 4.792369830235441e-05, "loss": 0.8336, "step": 7518 }, { "epoch": 0.13329965389950235, "grad_norm": 3.03125, "learning_rate": 4.792258380304512e-05, "loss": 0.8234, "step": 7520 }, { "epoch": 0.1333351059351139, "grad_norm": 2.5625, "learning_rate": 4.7921469017666756e-05, "loss": 0.7981, "step": 7522 }, { "epoch": 0.13337055797072547, "grad_norm": 2.953125, "learning_rate": 4.792035394623323e-05, "loss": 0.8175, "step": 7524 }, { "epoch": 0.13340601000633706, "grad_norm": 2.765625, "learning_rate": 4.791923858875847e-05, "loss": 0.8514, "step": 7526 }, { "epoch": 0.13344146204194862, "grad_norm": 2.8125, "learning_rate": 4.791812294525638e-05, "loss": 0.8058, "step": 7528 }, { "epoch": 0.13347691407756018, "grad_norm": 2.515625, "learning_rate": 4.791700701574089e-05, "loss": 0.843, "step": 7530 }, { "epoch": 0.13351236611317177, "grad_norm": 2.6875, "learning_rate": 4.7915890800225926e-05, "loss": 0.8108, "step": 7532 }, { "epoch": 0.13354781814878333, "grad_norm": 2.90625, "learning_rate": 4.791477429872542e-05, "loss": 0.8333, "step": 7534 }, { "epoch": 0.1335832701843949, "grad_norm": 2.65625, "learning_rate": 4.7913657511253296e-05, "loss": 0.835, "step": 7536 }, { "epoch": 0.13361872222000648, "grad_norm": 2.796875, "learning_rate": 4.7912540437823506e-05, "loss": 0.8367, "step": 7538 }, { "epoch": 0.13365417425561804, "grad_norm": 2.75, "learning_rate": 4.791142307844998e-05, "loss": 0.8028, "step": 7540 }, { "epoch": 0.1336896262912296, "grad_norm": 2.5625, "learning_rate": 4.7910305433146664e-05, "loss": 0.7917, "step": 7542 }, { "epoch": 0.1337250783268412, "grad_norm": 2.484375, "learning_rate": 4.790918750192751e-05, "loss": 0.7507, "step": 7544 }, { "epoch": 0.13376053036245275, "grad_norm": 2.8125, "learning_rate": 4.790806928480647e-05, "loss": 0.8264, "step": 7546 }, { "epoch": 0.1337959823980643, "grad_norm": 2.625, "learning_rate": 4.79069507817975e-05, "loss": 0.787, "step": 7548 }, { "epoch": 0.1338314344336759, "grad_norm": 2.765625, "learning_rate": 4.790583199291455e-05, "loss": 0.8439, "step": 7550 }, { "epoch": 0.13386688646928746, "grad_norm": 2.8125, "learning_rate": 4.790471291817159e-05, "loss": 0.8312, "step": 7552 }, { "epoch": 0.13390233850489902, "grad_norm": 2.578125, "learning_rate": 4.790359355758258e-05, "loss": 0.8275, "step": 7554 }, { "epoch": 0.1339377905405106, "grad_norm": 2.65625, "learning_rate": 4.79024739111615e-05, "loss": 0.807, "step": 7556 }, { "epoch": 0.13397324257612217, "grad_norm": 2.71875, "learning_rate": 4.7901353978922306e-05, "loss": 0.8654, "step": 7558 }, { "epoch": 0.13400869461173373, "grad_norm": 2.46875, "learning_rate": 4.7900233760878986e-05, "loss": 0.8133, "step": 7560 }, { "epoch": 0.13404414664734532, "grad_norm": 2.640625, "learning_rate": 4.789911325704552e-05, "loss": 0.7941, "step": 7562 }, { "epoch": 0.13407959868295688, "grad_norm": 2.71875, "learning_rate": 4.789799246743589e-05, "loss": 0.8172, "step": 7564 }, { "epoch": 0.13411505071856844, "grad_norm": 2.453125, "learning_rate": 4.789687139206409e-05, "loss": 0.8277, "step": 7566 }, { "epoch": 0.13415050275418003, "grad_norm": 2.484375, "learning_rate": 4.78957500309441e-05, "loss": 0.8077, "step": 7568 }, { "epoch": 0.1341859547897916, "grad_norm": 3.09375, "learning_rate": 4.789462838408991e-05, "loss": 0.8342, "step": 7570 }, { "epoch": 0.13422140682540315, "grad_norm": 2.421875, "learning_rate": 4.789350645151554e-05, "loss": 0.8314, "step": 7572 }, { "epoch": 0.13425685886101474, "grad_norm": 2.703125, "learning_rate": 4.789238423323497e-05, "loss": 0.829, "step": 7574 }, { "epoch": 0.1342923108966263, "grad_norm": 2.671875, "learning_rate": 4.789126172926222e-05, "loss": 0.8237, "step": 7576 }, { "epoch": 0.13432776293223786, "grad_norm": 2.796875, "learning_rate": 4.7890138939611285e-05, "loss": 0.8177, "step": 7578 }, { "epoch": 0.13436321496784945, "grad_norm": 2.984375, "learning_rate": 4.788901586429618e-05, "loss": 0.8511, "step": 7580 }, { "epoch": 0.134398667003461, "grad_norm": 2.640625, "learning_rate": 4.7887892503330936e-05, "loss": 0.8095, "step": 7582 }, { "epoch": 0.13443411903907257, "grad_norm": 2.5625, "learning_rate": 4.7886768856729546e-05, "loss": 0.8151, "step": 7584 }, { "epoch": 0.13446957107468416, "grad_norm": 2.578125, "learning_rate": 4.788564492450606e-05, "loss": 0.8282, "step": 7586 }, { "epoch": 0.13450502311029572, "grad_norm": 2.828125, "learning_rate": 4.7884520706674485e-05, "loss": 0.7904, "step": 7588 }, { "epoch": 0.13454047514590728, "grad_norm": 2.828125, "learning_rate": 4.788339620324887e-05, "loss": 0.8623, "step": 7590 }, { "epoch": 0.13457592718151887, "grad_norm": 2.828125, "learning_rate": 4.788227141424322e-05, "loss": 0.8727, "step": 7592 }, { "epoch": 0.13461137921713043, "grad_norm": 2.53125, "learning_rate": 4.78811463396716e-05, "loss": 0.7833, "step": 7594 }, { "epoch": 0.134646831252742, "grad_norm": 2.75, "learning_rate": 4.788002097954804e-05, "loss": 0.8188, "step": 7596 }, { "epoch": 0.13468228328835358, "grad_norm": 2.921875, "learning_rate": 4.787889533388658e-05, "loss": 0.8437, "step": 7598 }, { "epoch": 0.13471773532396514, "grad_norm": 2.75, "learning_rate": 4.787776940270127e-05, "loss": 0.788, "step": 7600 }, { "epoch": 0.1347531873595767, "grad_norm": 3.03125, "learning_rate": 4.787664318600615e-05, "loss": 0.8419, "step": 7602 }, { "epoch": 0.13478863939518826, "grad_norm": 2.4375, "learning_rate": 4.787551668381531e-05, "loss": 0.7876, "step": 7604 }, { "epoch": 0.13482409143079985, "grad_norm": 3.09375, "learning_rate": 4.787438989614278e-05, "loss": 0.8196, "step": 7606 }, { "epoch": 0.1348595434664114, "grad_norm": 2.875, "learning_rate": 4.7873262823002627e-05, "loss": 0.8218, "step": 7608 }, { "epoch": 0.13489499550202297, "grad_norm": 2.671875, "learning_rate": 4.787213546440892e-05, "loss": 0.8086, "step": 7610 }, { "epoch": 0.13493044753763456, "grad_norm": 3.03125, "learning_rate": 4.7871007820375725e-05, "loss": 0.7893, "step": 7612 }, { "epoch": 0.13496589957324612, "grad_norm": 2.875, "learning_rate": 4.786987989091711e-05, "loss": 0.8129, "step": 7614 }, { "epoch": 0.13500135160885768, "grad_norm": 2.90625, "learning_rate": 4.786875167604716e-05, "loss": 0.8359, "step": 7616 }, { "epoch": 0.13503680364446927, "grad_norm": 2.578125, "learning_rate": 4.7867623175779955e-05, "loss": 0.8402, "step": 7618 }, { "epoch": 0.13507225568008083, "grad_norm": 2.890625, "learning_rate": 4.786649439012958e-05, "loss": 0.8359, "step": 7620 }, { "epoch": 0.1351077077156924, "grad_norm": 2.828125, "learning_rate": 4.786536531911011e-05, "loss": 0.8427, "step": 7622 }, { "epoch": 0.13514315975130398, "grad_norm": 2.640625, "learning_rate": 4.7864235962735646e-05, "loss": 0.8282, "step": 7624 }, { "epoch": 0.13517861178691554, "grad_norm": 2.859375, "learning_rate": 4.7863106321020285e-05, "loss": 0.7969, "step": 7626 }, { "epoch": 0.1352140638225271, "grad_norm": 2.9375, "learning_rate": 4.7861976393978115e-05, "loss": 0.7827, "step": 7628 }, { "epoch": 0.13524951585813869, "grad_norm": 2.828125, "learning_rate": 4.7860846181623244e-05, "loss": 0.8338, "step": 7630 }, { "epoch": 0.13528496789375025, "grad_norm": 2.71875, "learning_rate": 4.785971568396977e-05, "loss": 0.8179, "step": 7632 }, { "epoch": 0.1353204199293618, "grad_norm": 2.8125, "learning_rate": 4.78585849010318e-05, "loss": 0.8643, "step": 7634 }, { "epoch": 0.1353558719649734, "grad_norm": 2.6875, "learning_rate": 4.785745383282346e-05, "loss": 0.8289, "step": 7636 }, { "epoch": 0.13539132400058496, "grad_norm": 2.6875, "learning_rate": 4.785632247935886e-05, "loss": 0.8129, "step": 7638 }, { "epoch": 0.13542677603619652, "grad_norm": 2.671875, "learning_rate": 4.785519084065211e-05, "loss": 0.8034, "step": 7640 }, { "epoch": 0.1354622280718081, "grad_norm": 3.15625, "learning_rate": 4.7854058916717336e-05, "loss": 0.8267, "step": 7642 }, { "epoch": 0.13549768010741967, "grad_norm": 2.609375, "learning_rate": 4.7852926707568676e-05, "loss": 0.7633, "step": 7644 }, { "epoch": 0.13553313214303123, "grad_norm": 2.8125, "learning_rate": 4.785179421322025e-05, "loss": 0.8112, "step": 7646 }, { "epoch": 0.13556858417864281, "grad_norm": 2.84375, "learning_rate": 4.785066143368618e-05, "loss": 0.8385, "step": 7648 }, { "epoch": 0.13560403621425438, "grad_norm": 3.046875, "learning_rate": 4.784952836898062e-05, "loss": 0.8185, "step": 7650 }, { "epoch": 0.13563948824986594, "grad_norm": 2.78125, "learning_rate": 4.784839501911771e-05, "loss": 0.8011, "step": 7652 }, { "epoch": 0.13567494028547752, "grad_norm": 2.671875, "learning_rate": 4.7847261384111585e-05, "loss": 0.8231, "step": 7654 }, { "epoch": 0.13571039232108909, "grad_norm": 2.84375, "learning_rate": 4.7846127463976395e-05, "loss": 0.7639, "step": 7656 }, { "epoch": 0.13574584435670065, "grad_norm": 2.921875, "learning_rate": 4.78449932587263e-05, "loss": 0.8732, "step": 7658 }, { "epoch": 0.13578129639231223, "grad_norm": 2.671875, "learning_rate": 4.784385876837545e-05, "loss": 0.7955, "step": 7660 }, { "epoch": 0.1358167484279238, "grad_norm": 2.53125, "learning_rate": 4.784272399293799e-05, "loss": 0.848, "step": 7662 }, { "epoch": 0.13585220046353536, "grad_norm": 3.109375, "learning_rate": 4.784158893242809e-05, "loss": 0.8288, "step": 7664 }, { "epoch": 0.13588765249914694, "grad_norm": 2.640625, "learning_rate": 4.784045358685993e-05, "loss": 0.763, "step": 7666 }, { "epoch": 0.1359231045347585, "grad_norm": 2.78125, "learning_rate": 4.783931795624766e-05, "loss": 0.7886, "step": 7668 }, { "epoch": 0.13595855657037006, "grad_norm": 2.578125, "learning_rate": 4.783818204060546e-05, "loss": 0.8637, "step": 7670 }, { "epoch": 0.13599400860598165, "grad_norm": 2.96875, "learning_rate": 4.78370458399475e-05, "loss": 0.814, "step": 7672 }, { "epoch": 0.13602946064159321, "grad_norm": 2.390625, "learning_rate": 4.7835909354287975e-05, "loss": 0.8021, "step": 7674 }, { "epoch": 0.13606491267720477, "grad_norm": 2.8125, "learning_rate": 4.7834772583641054e-05, "loss": 0.8465, "step": 7676 }, { "epoch": 0.13610036471281636, "grad_norm": 2.703125, "learning_rate": 4.783363552802092e-05, "loss": 0.8472, "step": 7678 }, { "epoch": 0.13613581674842792, "grad_norm": 3.09375, "learning_rate": 4.783249818744178e-05, "loss": 0.8313, "step": 7680 }, { "epoch": 0.13617126878403948, "grad_norm": 2.796875, "learning_rate": 4.783136056191781e-05, "loss": 0.8137, "step": 7682 }, { "epoch": 0.13620672081965107, "grad_norm": 2.703125, "learning_rate": 4.783022265146322e-05, "loss": 0.8334, "step": 7684 }, { "epoch": 0.13624217285526263, "grad_norm": 2.96875, "learning_rate": 4.782908445609221e-05, "loss": 0.8134, "step": 7686 }, { "epoch": 0.1362776248908742, "grad_norm": 2.828125, "learning_rate": 4.782794597581898e-05, "loss": 0.835, "step": 7688 }, { "epoch": 0.13631307692648578, "grad_norm": 2.859375, "learning_rate": 4.782680721065773e-05, "loss": 0.8511, "step": 7690 }, { "epoch": 0.13634852896209734, "grad_norm": 2.84375, "learning_rate": 4.7825668160622686e-05, "loss": 0.8473, "step": 7692 }, { "epoch": 0.1363839809977089, "grad_norm": 2.546875, "learning_rate": 4.7824528825728055e-05, "loss": 0.8053, "step": 7694 }, { "epoch": 0.1364194330333205, "grad_norm": 2.765625, "learning_rate": 4.782338920598807e-05, "loss": 0.8145, "step": 7696 }, { "epoch": 0.13645488506893205, "grad_norm": 2.8125, "learning_rate": 4.782224930141693e-05, "loss": 0.8105, "step": 7698 }, { "epoch": 0.1364903371045436, "grad_norm": 2.71875, "learning_rate": 4.7821109112028876e-05, "loss": 0.7639, "step": 7700 }, { "epoch": 0.1365257891401552, "grad_norm": 3.125, "learning_rate": 4.781996863783813e-05, "loss": 0.8154, "step": 7702 }, { "epoch": 0.13656124117576676, "grad_norm": 2.875, "learning_rate": 4.781882787885893e-05, "loss": 0.7771, "step": 7704 }, { "epoch": 0.13659669321137832, "grad_norm": 2.59375, "learning_rate": 4.7817686835105513e-05, "loss": 0.8352, "step": 7706 }, { "epoch": 0.1366321452469899, "grad_norm": 2.84375, "learning_rate": 4.781654550659211e-05, "loss": 0.8448, "step": 7708 }, { "epoch": 0.13666759728260147, "grad_norm": 2.515625, "learning_rate": 4.781540389333298e-05, "loss": 0.8205, "step": 7710 }, { "epoch": 0.13670304931821303, "grad_norm": 2.765625, "learning_rate": 4.781426199534236e-05, "loss": 0.8376, "step": 7712 }, { "epoch": 0.13673850135382462, "grad_norm": 2.734375, "learning_rate": 4.78131198126345e-05, "loss": 0.8491, "step": 7714 }, { "epoch": 0.13677395338943618, "grad_norm": 2.796875, "learning_rate": 4.781197734522366e-05, "loss": 0.8213, "step": 7716 }, { "epoch": 0.13680940542504774, "grad_norm": 2.71875, "learning_rate": 4.781083459312409e-05, "loss": 0.8204, "step": 7718 }, { "epoch": 0.13684485746065933, "grad_norm": 2.546875, "learning_rate": 4.780969155635006e-05, "loss": 0.7855, "step": 7720 }, { "epoch": 0.1368803094962709, "grad_norm": 2.75, "learning_rate": 4.7808548234915826e-05, "loss": 0.7865, "step": 7722 }, { "epoch": 0.13691576153188245, "grad_norm": 2.90625, "learning_rate": 4.7807404628835664e-05, "loss": 0.8088, "step": 7724 }, { "epoch": 0.13695121356749404, "grad_norm": 2.75, "learning_rate": 4.780626073812383e-05, "loss": 0.8477, "step": 7726 }, { "epoch": 0.1369866656031056, "grad_norm": 2.6875, "learning_rate": 4.780511656279463e-05, "loss": 0.8656, "step": 7728 }, { "epoch": 0.13702211763871716, "grad_norm": 2.609375, "learning_rate": 4.7803972102862314e-05, "loss": 0.8328, "step": 7730 }, { "epoch": 0.13705756967432875, "grad_norm": 2.796875, "learning_rate": 4.780282735834119e-05, "loss": 0.7767, "step": 7732 }, { "epoch": 0.1370930217099403, "grad_norm": 2.625, "learning_rate": 4.780168232924551e-05, "loss": 0.8444, "step": 7734 }, { "epoch": 0.13712847374555187, "grad_norm": 2.578125, "learning_rate": 4.78005370155896e-05, "loss": 0.8089, "step": 7736 }, { "epoch": 0.13716392578116346, "grad_norm": 2.90625, "learning_rate": 4.7799391417387727e-05, "loss": 0.8253, "step": 7738 }, { "epoch": 0.13719937781677502, "grad_norm": 2.703125, "learning_rate": 4.77982455346542e-05, "loss": 0.8302, "step": 7740 }, { "epoch": 0.13723482985238658, "grad_norm": 2.75, "learning_rate": 4.779709936740332e-05, "loss": 0.8361, "step": 7742 }, { "epoch": 0.13727028188799817, "grad_norm": 2.671875, "learning_rate": 4.779595291564939e-05, "loss": 0.8076, "step": 7744 }, { "epoch": 0.13730573392360973, "grad_norm": 2.8125, "learning_rate": 4.77948061794067e-05, "loss": 0.8185, "step": 7746 }, { "epoch": 0.1373411859592213, "grad_norm": 2.640625, "learning_rate": 4.7793659158689594e-05, "loss": 0.7821, "step": 7748 }, { "epoch": 0.13737663799483288, "grad_norm": 2.84375, "learning_rate": 4.779251185351237e-05, "loss": 0.8564, "step": 7750 }, { "epoch": 0.13741209003044444, "grad_norm": 2.484375, "learning_rate": 4.779136426388934e-05, "loss": 0.8062, "step": 7752 }, { "epoch": 0.137447542066056, "grad_norm": 2.671875, "learning_rate": 4.779021638983483e-05, "loss": 0.8118, "step": 7754 }, { "epoch": 0.1374829941016676, "grad_norm": 2.71875, "learning_rate": 4.7789068231363165e-05, "loss": 0.8128, "step": 7756 }, { "epoch": 0.13751844613727915, "grad_norm": 2.8125, "learning_rate": 4.7787919788488675e-05, "loss": 0.8501, "step": 7758 }, { "epoch": 0.1375538981728907, "grad_norm": 2.984375, "learning_rate": 4.77867710612257e-05, "loss": 0.7931, "step": 7760 }, { "epoch": 0.1375893502085023, "grad_norm": 3.03125, "learning_rate": 4.778562204958856e-05, "loss": 0.882, "step": 7762 }, { "epoch": 0.13762480224411386, "grad_norm": 2.953125, "learning_rate": 4.7784472753591606e-05, "loss": 0.8161, "step": 7764 }, { "epoch": 0.13766025427972542, "grad_norm": 3.1875, "learning_rate": 4.778332317324918e-05, "loss": 0.8651, "step": 7766 }, { "epoch": 0.137695706315337, "grad_norm": 2.671875, "learning_rate": 4.7782173308575625e-05, "loss": 0.823, "step": 7768 }, { "epoch": 0.13773115835094857, "grad_norm": 3.109375, "learning_rate": 4.7781023159585295e-05, "loss": 0.8671, "step": 7770 }, { "epoch": 0.13776661038656013, "grad_norm": 2.71875, "learning_rate": 4.777987272629253e-05, "loss": 0.8153, "step": 7772 }, { "epoch": 0.1378020624221717, "grad_norm": 2.765625, "learning_rate": 4.7778722008711704e-05, "loss": 0.9034, "step": 7774 }, { "epoch": 0.13783751445778328, "grad_norm": 2.71875, "learning_rate": 4.7777571006857174e-05, "loss": 0.8068, "step": 7776 }, { "epoch": 0.13787296649339484, "grad_norm": 2.6875, "learning_rate": 4.777641972074331e-05, "loss": 0.7982, "step": 7778 }, { "epoch": 0.1379084185290064, "grad_norm": 2.8125, "learning_rate": 4.7775268150384454e-05, "loss": 0.8481, "step": 7780 }, { "epoch": 0.137943870564618, "grad_norm": 2.703125, "learning_rate": 4.7774116295795e-05, "loss": 0.8235, "step": 7782 }, { "epoch": 0.13797932260022955, "grad_norm": 2.765625, "learning_rate": 4.777296415698933e-05, "loss": 0.7464, "step": 7784 }, { "epoch": 0.1380147746358411, "grad_norm": 2.765625, "learning_rate": 4.7771811733981797e-05, "loss": 0.8036, "step": 7786 }, { "epoch": 0.1380502266714527, "grad_norm": 2.578125, "learning_rate": 4.777065902678681e-05, "loss": 0.7683, "step": 7788 }, { "epoch": 0.13808567870706426, "grad_norm": 3.03125, "learning_rate": 4.776950603541873e-05, "loss": 0.8879, "step": 7790 }, { "epoch": 0.13812113074267582, "grad_norm": 2.875, "learning_rate": 4.776835275989196e-05, "loss": 0.7814, "step": 7792 }, { "epoch": 0.1381565827782874, "grad_norm": 2.703125, "learning_rate": 4.776719920022089e-05, "loss": 0.8611, "step": 7794 }, { "epoch": 0.13819203481389897, "grad_norm": 2.625, "learning_rate": 4.776604535641992e-05, "loss": 0.8142, "step": 7796 }, { "epoch": 0.13822748684951053, "grad_norm": 3.015625, "learning_rate": 4.776489122850344e-05, "loss": 0.8255, "step": 7798 }, { "epoch": 0.13826293888512212, "grad_norm": 2.65625, "learning_rate": 4.776373681648586e-05, "loss": 0.7773, "step": 7800 }, { "epoch": 0.13829839092073368, "grad_norm": 2.328125, "learning_rate": 4.776258212038159e-05, "loss": 0.7941, "step": 7802 }, { "epoch": 0.13833384295634524, "grad_norm": 2.921875, "learning_rate": 4.7761427140205034e-05, "loss": 0.7812, "step": 7804 }, { "epoch": 0.13836929499195683, "grad_norm": 2.65625, "learning_rate": 4.7760271875970606e-05, "loss": 0.7889, "step": 7806 }, { "epoch": 0.1384047470275684, "grad_norm": 2.78125, "learning_rate": 4.7759116327692726e-05, "loss": 0.8312, "step": 7808 }, { "epoch": 0.13844019906317995, "grad_norm": 2.609375, "learning_rate": 4.775796049538582e-05, "loss": 0.846, "step": 7810 }, { "epoch": 0.13847565109879154, "grad_norm": 2.765625, "learning_rate": 4.77568043790643e-05, "loss": 0.7984, "step": 7812 }, { "epoch": 0.1385111031344031, "grad_norm": 2.890625, "learning_rate": 4.77556479787426e-05, "loss": 0.7918, "step": 7814 }, { "epoch": 0.13854655517001466, "grad_norm": 2.75, "learning_rate": 4.7754491294435165e-05, "loss": 0.8044, "step": 7816 }, { "epoch": 0.13858200720562625, "grad_norm": 2.640625, "learning_rate": 4.775333432615641e-05, "loss": 0.791, "step": 7818 }, { "epoch": 0.1386174592412378, "grad_norm": 2.5625, "learning_rate": 4.775217707392078e-05, "loss": 0.7833, "step": 7820 }, { "epoch": 0.13865291127684937, "grad_norm": 2.703125, "learning_rate": 4.7751019537742725e-05, "loss": 0.8009, "step": 7822 }, { "epoch": 0.13868836331246095, "grad_norm": 2.84375, "learning_rate": 4.774986171763668e-05, "loss": 0.8026, "step": 7824 }, { "epoch": 0.13872381534807252, "grad_norm": 2.921875, "learning_rate": 4.7748703613617095e-05, "loss": 0.7954, "step": 7826 }, { "epoch": 0.13875926738368408, "grad_norm": 2.59375, "learning_rate": 4.7747545225698434e-05, "loss": 0.8351, "step": 7828 }, { "epoch": 0.13879471941929566, "grad_norm": 2.53125, "learning_rate": 4.774638655389514e-05, "loss": 0.8444, "step": 7830 }, { "epoch": 0.13883017145490723, "grad_norm": 2.75, "learning_rate": 4.7745227598221687e-05, "loss": 0.8925, "step": 7832 }, { "epoch": 0.13886562349051879, "grad_norm": 2.625, "learning_rate": 4.774406835869253e-05, "loss": 0.7889, "step": 7834 }, { "epoch": 0.13890107552613037, "grad_norm": 2.828125, "learning_rate": 4.7742908835322136e-05, "loss": 0.8259, "step": 7836 }, { "epoch": 0.13893652756174193, "grad_norm": 2.6875, "learning_rate": 4.774174902812498e-05, "loss": 0.8031, "step": 7838 }, { "epoch": 0.1389719795973535, "grad_norm": 2.5, "learning_rate": 4.774058893711553e-05, "loss": 0.7957, "step": 7840 }, { "epoch": 0.13900743163296508, "grad_norm": 3.140625, "learning_rate": 4.7739428562308266e-05, "loss": 0.7898, "step": 7842 }, { "epoch": 0.13904288366857664, "grad_norm": 2.53125, "learning_rate": 4.773826790371767e-05, "loss": 0.7799, "step": 7844 }, { "epoch": 0.1390783357041882, "grad_norm": 2.5625, "learning_rate": 4.773710696135822e-05, "loss": 0.8168, "step": 7846 }, { "epoch": 0.1391137877397998, "grad_norm": 3.359375, "learning_rate": 4.773594573524442e-05, "loss": 0.8682, "step": 7848 }, { "epoch": 0.13914923977541135, "grad_norm": 2.921875, "learning_rate": 4.773478422539075e-05, "loss": 0.8098, "step": 7850 }, { "epoch": 0.13918469181102291, "grad_norm": 2.671875, "learning_rate": 4.773362243181171e-05, "loss": 0.7966, "step": 7852 }, { "epoch": 0.1392201438466345, "grad_norm": 2.609375, "learning_rate": 4.773246035452179e-05, "loss": 0.828, "step": 7854 }, { "epoch": 0.13925559588224606, "grad_norm": 2.703125, "learning_rate": 4.773129799353551e-05, "loss": 0.8082, "step": 7856 }, { "epoch": 0.13929104791785762, "grad_norm": 2.65625, "learning_rate": 4.773013534886735e-05, "loss": 0.8269, "step": 7858 }, { "epoch": 0.1393264999534692, "grad_norm": 2.9375, "learning_rate": 4.772897242053186e-05, "loss": 0.8295, "step": 7860 }, { "epoch": 0.13936195198908077, "grad_norm": 3.078125, "learning_rate": 4.772780920854351e-05, "loss": 0.8027, "step": 7862 }, { "epoch": 0.13939740402469233, "grad_norm": 2.84375, "learning_rate": 4.772664571291684e-05, "loss": 0.8514, "step": 7864 }, { "epoch": 0.13943285606030392, "grad_norm": 2.578125, "learning_rate": 4.772548193366636e-05, "loss": 0.7971, "step": 7866 }, { "epoch": 0.13946830809591548, "grad_norm": 2.734375, "learning_rate": 4.772431787080661e-05, "loss": 0.8007, "step": 7868 }, { "epoch": 0.13950376013152704, "grad_norm": 2.484375, "learning_rate": 4.7723153524352096e-05, "loss": 0.8558, "step": 7870 }, { "epoch": 0.13953921216713863, "grad_norm": 2.796875, "learning_rate": 4.772198889431736e-05, "loss": 0.8324, "step": 7872 }, { "epoch": 0.1395746642027502, "grad_norm": 3.015625, "learning_rate": 4.7720823980716934e-05, "loss": 0.8482, "step": 7874 }, { "epoch": 0.13961011623836175, "grad_norm": 3.078125, "learning_rate": 4.771965878356536e-05, "loss": 0.8482, "step": 7876 }, { "epoch": 0.13964556827397334, "grad_norm": 3.265625, "learning_rate": 4.771849330287718e-05, "loss": 0.8271, "step": 7878 }, { "epoch": 0.1396810203095849, "grad_norm": 3.0625, "learning_rate": 4.7717327538666935e-05, "loss": 0.8187, "step": 7880 }, { "epoch": 0.13971647234519646, "grad_norm": 2.46875, "learning_rate": 4.771616149094917e-05, "loss": 0.817, "step": 7882 }, { "epoch": 0.13975192438080805, "grad_norm": 2.546875, "learning_rate": 4.771499515973844e-05, "loss": 0.7927, "step": 7884 }, { "epoch": 0.1397873764164196, "grad_norm": 2.8125, "learning_rate": 4.7713828545049303e-05, "loss": 0.8102, "step": 7886 }, { "epoch": 0.13982282845203117, "grad_norm": 2.671875, "learning_rate": 4.7712661646896316e-05, "loss": 0.8759, "step": 7888 }, { "epoch": 0.13985828048764276, "grad_norm": 2.84375, "learning_rate": 4.771149446529405e-05, "loss": 0.847, "step": 7890 }, { "epoch": 0.13989373252325432, "grad_norm": 2.8125, "learning_rate": 4.771032700025706e-05, "loss": 0.8383, "step": 7892 }, { "epoch": 0.13992918455886588, "grad_norm": 2.390625, "learning_rate": 4.770915925179991e-05, "loss": 0.8035, "step": 7894 }, { "epoch": 0.13996463659447747, "grad_norm": 2.8125, "learning_rate": 4.7707991219937194e-05, "loss": 0.8306, "step": 7896 }, { "epoch": 0.14000008863008903, "grad_norm": 2.71875, "learning_rate": 4.770682290468347e-05, "loss": 0.8405, "step": 7898 }, { "epoch": 0.1400355406657006, "grad_norm": 2.828125, "learning_rate": 4.7705654306053326e-05, "loss": 0.8105, "step": 7900 }, { "epoch": 0.14007099270131218, "grad_norm": 2.609375, "learning_rate": 4.770448542406135e-05, "loss": 0.7714, "step": 7902 }, { "epoch": 0.14010644473692374, "grad_norm": 2.65625, "learning_rate": 4.770331625872212e-05, "loss": 0.7847, "step": 7904 }, { "epoch": 0.1401418967725353, "grad_norm": 2.9375, "learning_rate": 4.770214681005024e-05, "loss": 0.8186, "step": 7906 }, { "epoch": 0.1401773488081469, "grad_norm": 2.640625, "learning_rate": 4.7700977078060286e-05, "loss": 0.799, "step": 7908 }, { "epoch": 0.14021280084375845, "grad_norm": 2.65625, "learning_rate": 4.7699807062766876e-05, "loss": 0.7855, "step": 7910 }, { "epoch": 0.14024825287937, "grad_norm": 2.859375, "learning_rate": 4.7698636764184597e-05, "loss": 0.848, "step": 7912 }, { "epoch": 0.1402837049149816, "grad_norm": 2.90625, "learning_rate": 4.769746618232805e-05, "loss": 0.8345, "step": 7914 }, { "epoch": 0.14031915695059316, "grad_norm": 2.765625, "learning_rate": 4.769629531721187e-05, "loss": 0.7926, "step": 7916 }, { "epoch": 0.14035460898620472, "grad_norm": 2.671875, "learning_rate": 4.769512416885064e-05, "loss": 0.8247, "step": 7918 }, { "epoch": 0.1403900610218163, "grad_norm": 2.859375, "learning_rate": 4.7693952737259e-05, "loss": 0.8146, "step": 7920 }, { "epoch": 0.14042551305742787, "grad_norm": 2.578125, "learning_rate": 4.7692781022451536e-05, "loss": 0.8202, "step": 7922 }, { "epoch": 0.14046096509303943, "grad_norm": 2.71875, "learning_rate": 4.7691609024442905e-05, "loss": 0.819, "step": 7924 }, { "epoch": 0.14049641712865102, "grad_norm": 2.5625, "learning_rate": 4.7690436743247727e-05, "loss": 0.8124, "step": 7926 }, { "epoch": 0.14053186916426258, "grad_norm": 2.640625, "learning_rate": 4.768926417888061e-05, "loss": 0.7961, "step": 7928 }, { "epoch": 0.14056732119987414, "grad_norm": 2.5625, "learning_rate": 4.76880913313562e-05, "loss": 0.7982, "step": 7930 }, { "epoch": 0.14060277323548573, "grad_norm": 2.71875, "learning_rate": 4.7686918200689144e-05, "loss": 0.8538, "step": 7932 }, { "epoch": 0.1406382252710973, "grad_norm": 2.8125, "learning_rate": 4.768574478689408e-05, "loss": 0.7942, "step": 7934 }, { "epoch": 0.14067367730670885, "grad_norm": 2.953125, "learning_rate": 4.768457108998564e-05, "loss": 0.802, "step": 7936 }, { "epoch": 0.14070912934232044, "grad_norm": 2.46875, "learning_rate": 4.768339710997847e-05, "loss": 0.8082, "step": 7938 }, { "epoch": 0.140744581377932, "grad_norm": 2.859375, "learning_rate": 4.768222284688724e-05, "loss": 0.7945, "step": 7940 }, { "epoch": 0.14078003341354356, "grad_norm": 2.890625, "learning_rate": 4.7681048300726584e-05, "loss": 0.845, "step": 7942 }, { "epoch": 0.14081548544915512, "grad_norm": 2.8125, "learning_rate": 4.767987347151118e-05, "loss": 0.8232, "step": 7944 }, { "epoch": 0.1408509374847667, "grad_norm": 2.65625, "learning_rate": 4.767869835925567e-05, "loss": 0.8091, "step": 7946 }, { "epoch": 0.14088638952037827, "grad_norm": 2.4375, "learning_rate": 4.767752296397473e-05, "loss": 0.7925, "step": 7948 }, { "epoch": 0.14092184155598983, "grad_norm": 2.6875, "learning_rate": 4.767634728568303e-05, "loss": 0.8356, "step": 7950 }, { "epoch": 0.14095729359160142, "grad_norm": 3.078125, "learning_rate": 4.7675171324395236e-05, "loss": 0.8526, "step": 7952 }, { "epoch": 0.14099274562721298, "grad_norm": 2.671875, "learning_rate": 4.767399508012603e-05, "loss": 0.8233, "step": 7954 }, { "epoch": 0.14102819766282454, "grad_norm": 2.734375, "learning_rate": 4.767281855289009e-05, "loss": 0.8237, "step": 7956 }, { "epoch": 0.14106364969843613, "grad_norm": 2.359375, "learning_rate": 4.767164174270208e-05, "loss": 0.8088, "step": 7958 }, { "epoch": 0.1410991017340477, "grad_norm": 2.703125, "learning_rate": 4.767046464957672e-05, "loss": 0.8382, "step": 7960 }, { "epoch": 0.14113455376965925, "grad_norm": 2.859375, "learning_rate": 4.7669287273528676e-05, "loss": 0.7871, "step": 7962 }, { "epoch": 0.14117000580527084, "grad_norm": 2.671875, "learning_rate": 4.766810961457265e-05, "loss": 0.8088, "step": 7964 }, { "epoch": 0.1412054578408824, "grad_norm": 2.71875, "learning_rate": 4.7666931672723346e-05, "loss": 0.8341, "step": 7966 }, { "epoch": 0.14124090987649396, "grad_norm": 2.9375, "learning_rate": 4.766575344799544e-05, "loss": 0.816, "step": 7968 }, { "epoch": 0.14127636191210555, "grad_norm": 2.640625, "learning_rate": 4.7664574940403666e-05, "loss": 0.8126, "step": 7970 }, { "epoch": 0.1413118139477171, "grad_norm": 2.515625, "learning_rate": 4.7663396149962715e-05, "loss": 0.7982, "step": 7972 }, { "epoch": 0.14134726598332867, "grad_norm": 2.734375, "learning_rate": 4.76622170766873e-05, "loss": 0.7979, "step": 7974 }, { "epoch": 0.14138271801894026, "grad_norm": 2.78125, "learning_rate": 4.766103772059213e-05, "loss": 0.8553, "step": 7976 }, { "epoch": 0.14141817005455182, "grad_norm": 2.578125, "learning_rate": 4.7659858081691936e-05, "loss": 0.7936, "step": 7978 }, { "epoch": 0.14145362209016338, "grad_norm": 2.65625, "learning_rate": 4.7658678160001425e-05, "loss": 0.8434, "step": 7980 }, { "epoch": 0.14148907412577497, "grad_norm": 2.78125, "learning_rate": 4.7657497955535334e-05, "loss": 0.8208, "step": 7982 }, { "epoch": 0.14152452616138653, "grad_norm": 3.0, "learning_rate": 4.765631746830839e-05, "loss": 0.8071, "step": 7984 }, { "epoch": 0.1415599781969981, "grad_norm": 2.515625, "learning_rate": 4.7655136698335326e-05, "loss": 0.8083, "step": 7986 }, { "epoch": 0.14159543023260968, "grad_norm": 2.796875, "learning_rate": 4.7653955645630866e-05, "loss": 0.7766, "step": 7988 }, { "epoch": 0.14163088226822124, "grad_norm": 3.28125, "learning_rate": 4.765277431020976e-05, "loss": 0.8276, "step": 7990 }, { "epoch": 0.1416663343038328, "grad_norm": 2.609375, "learning_rate": 4.7651592692086756e-05, "loss": 0.8471, "step": 7992 }, { "epoch": 0.14170178633944439, "grad_norm": 2.671875, "learning_rate": 4.7650410791276584e-05, "loss": 0.8432, "step": 7994 }, { "epoch": 0.14173723837505595, "grad_norm": 2.828125, "learning_rate": 4.764922860779401e-05, "loss": 0.7798, "step": 7996 }, { "epoch": 0.1417726904106675, "grad_norm": 3.03125, "learning_rate": 4.764804614165377e-05, "loss": 0.763, "step": 7998 }, { "epoch": 0.1418081424462791, "grad_norm": 2.96875, "learning_rate": 4.7646863392870644e-05, "loss": 0.7884, "step": 8000 }, { "epoch": 0.14184359448189066, "grad_norm": 2.859375, "learning_rate": 4.764568036145938e-05, "loss": 0.819, "step": 8002 }, { "epoch": 0.14187904651750222, "grad_norm": 2.9375, "learning_rate": 4.764449704743473e-05, "loss": 0.8361, "step": 8004 }, { "epoch": 0.1419144985531138, "grad_norm": 2.78125, "learning_rate": 4.764331345081148e-05, "loss": 0.829, "step": 8006 }, { "epoch": 0.14194995058872537, "grad_norm": 2.921875, "learning_rate": 4.76421295716044e-05, "loss": 0.8125, "step": 8008 }, { "epoch": 0.14198540262433693, "grad_norm": 2.6875, "learning_rate": 4.7640945409828255e-05, "loss": 0.848, "step": 8010 }, { "epoch": 0.14202085465994851, "grad_norm": 2.828125, "learning_rate": 4.763976096549782e-05, "loss": 0.8195, "step": 8012 }, { "epoch": 0.14205630669556008, "grad_norm": 2.96875, "learning_rate": 4.7638576238627886e-05, "loss": 0.7931, "step": 8014 }, { "epoch": 0.14209175873117164, "grad_norm": 2.71875, "learning_rate": 4.763739122923324e-05, "loss": 0.8273, "step": 8016 }, { "epoch": 0.14212721076678322, "grad_norm": 2.8125, "learning_rate": 4.763620593732867e-05, "loss": 0.8068, "step": 8018 }, { "epoch": 0.14216266280239478, "grad_norm": 2.6875, "learning_rate": 4.763502036292896e-05, "loss": 0.7955, "step": 8020 }, { "epoch": 0.14219811483800635, "grad_norm": 2.828125, "learning_rate": 4.763383450604891e-05, "loss": 0.8171, "step": 8022 }, { "epoch": 0.14223356687361793, "grad_norm": 2.859375, "learning_rate": 4.763264836670332e-05, "loss": 0.8004, "step": 8024 }, { "epoch": 0.1422690189092295, "grad_norm": 3.140625, "learning_rate": 4.7631461944906994e-05, "loss": 0.8078, "step": 8026 }, { "epoch": 0.14230447094484105, "grad_norm": 3.171875, "learning_rate": 4.763027524067473e-05, "loss": 0.8291, "step": 8028 }, { "epoch": 0.14233992298045264, "grad_norm": 2.734375, "learning_rate": 4.7629088254021354e-05, "loss": 0.8019, "step": 8030 }, { "epoch": 0.1423753750160642, "grad_norm": 3.09375, "learning_rate": 4.762790098496166e-05, "loss": 0.7968, "step": 8032 }, { "epoch": 0.14241082705167576, "grad_norm": 2.921875, "learning_rate": 4.7626713433510485e-05, "loss": 0.7821, "step": 8034 }, { "epoch": 0.14244627908728735, "grad_norm": 2.9375, "learning_rate": 4.762552559968264e-05, "loss": 0.8094, "step": 8036 }, { "epoch": 0.1424817311228989, "grad_norm": 2.640625, "learning_rate": 4.762433748349294e-05, "loss": 0.8488, "step": 8038 }, { "epoch": 0.14251718315851047, "grad_norm": 3.046875, "learning_rate": 4.762314908495622e-05, "loss": 0.8179, "step": 8040 }, { "epoch": 0.14255263519412206, "grad_norm": 3.125, "learning_rate": 4.7621960404087316e-05, "loss": 0.8421, "step": 8042 }, { "epoch": 0.14258808722973362, "grad_norm": 2.90625, "learning_rate": 4.7620771440901056e-05, "loss": 0.7974, "step": 8044 }, { "epoch": 0.14262353926534518, "grad_norm": 3.03125, "learning_rate": 4.761958219541228e-05, "loss": 0.8883, "step": 8046 }, { "epoch": 0.14265899130095677, "grad_norm": 2.875, "learning_rate": 4.761839266763583e-05, "loss": 0.8937, "step": 8048 }, { "epoch": 0.14269444333656833, "grad_norm": 2.765625, "learning_rate": 4.761720285758655e-05, "loss": 0.7914, "step": 8050 }, { "epoch": 0.1427298953721799, "grad_norm": 2.84375, "learning_rate": 4.761601276527929e-05, "loss": 0.7956, "step": 8052 }, { "epoch": 0.14276534740779148, "grad_norm": 2.90625, "learning_rate": 4.76148223907289e-05, "loss": 0.8424, "step": 8054 }, { "epoch": 0.14280079944340304, "grad_norm": 2.59375, "learning_rate": 4.761363173395024e-05, "loss": 0.8371, "step": 8056 }, { "epoch": 0.1428362514790146, "grad_norm": 2.75, "learning_rate": 4.761244079495817e-05, "loss": 0.8222, "step": 8058 }, { "epoch": 0.1428717035146262, "grad_norm": 2.875, "learning_rate": 4.761124957376754e-05, "loss": 0.7927, "step": 8060 }, { "epoch": 0.14290715555023775, "grad_norm": 2.609375, "learning_rate": 4.761005807039323e-05, "loss": 0.8782, "step": 8062 }, { "epoch": 0.1429426075858493, "grad_norm": 2.765625, "learning_rate": 4.7608866284850104e-05, "loss": 0.8218, "step": 8064 }, { "epoch": 0.1429780596214609, "grad_norm": 2.78125, "learning_rate": 4.7607674217153034e-05, "loss": 0.8434, "step": 8066 }, { "epoch": 0.14301351165707246, "grad_norm": 3.421875, "learning_rate": 4.760648186731689e-05, "loss": 0.849, "step": 8068 }, { "epoch": 0.14304896369268402, "grad_norm": 2.671875, "learning_rate": 4.7605289235356574e-05, "loss": 0.7891, "step": 8070 }, { "epoch": 0.1430844157282956, "grad_norm": 2.90625, "learning_rate": 4.760409632128695e-05, "loss": 0.8308, "step": 8072 }, { "epoch": 0.14311986776390717, "grad_norm": 2.484375, "learning_rate": 4.7602903125122914e-05, "loss": 0.801, "step": 8074 }, { "epoch": 0.14315531979951873, "grad_norm": 2.71875, "learning_rate": 4.760170964687935e-05, "loss": 0.8058, "step": 8076 }, { "epoch": 0.14319077183513032, "grad_norm": 2.84375, "learning_rate": 4.760051588657117e-05, "loss": 0.8482, "step": 8078 }, { "epoch": 0.14322622387074188, "grad_norm": 2.71875, "learning_rate": 4.759932184421325e-05, "loss": 0.78, "step": 8080 }, { "epoch": 0.14326167590635344, "grad_norm": 2.828125, "learning_rate": 4.75981275198205e-05, "loss": 0.8269, "step": 8082 }, { "epoch": 0.14329712794196503, "grad_norm": 2.734375, "learning_rate": 4.759693291340783e-05, "loss": 0.8253, "step": 8084 }, { "epoch": 0.1433325799775766, "grad_norm": 2.953125, "learning_rate": 4.759573802499014e-05, "loss": 0.7828, "step": 8086 }, { "epoch": 0.14336803201318815, "grad_norm": 3.125, "learning_rate": 4.759454285458235e-05, "loss": 0.8468, "step": 8088 }, { "epoch": 0.14340348404879974, "grad_norm": 2.40625, "learning_rate": 4.759334740219937e-05, "loss": 0.8235, "step": 8090 }, { "epoch": 0.1434389360844113, "grad_norm": 2.640625, "learning_rate": 4.7592151667856125e-05, "loss": 0.7874, "step": 8092 }, { "epoch": 0.14347438812002286, "grad_norm": 2.53125, "learning_rate": 4.759095565156752e-05, "loss": 0.768, "step": 8094 }, { "epoch": 0.14350984015563445, "grad_norm": 2.828125, "learning_rate": 4.75897593533485e-05, "loss": 0.8154, "step": 8096 }, { "epoch": 0.143545292191246, "grad_norm": 2.734375, "learning_rate": 4.758856277321398e-05, "loss": 0.8223, "step": 8098 }, { "epoch": 0.14358074422685757, "grad_norm": 2.671875, "learning_rate": 4.758736591117892e-05, "loss": 0.8145, "step": 8100 }, { "epoch": 0.14361619626246916, "grad_norm": 2.453125, "learning_rate": 4.7586168767258227e-05, "loss": 0.7969, "step": 8102 }, { "epoch": 0.14365164829808072, "grad_norm": 3.046875, "learning_rate": 4.758497134146686e-05, "loss": 0.8054, "step": 8104 }, { "epoch": 0.14368710033369228, "grad_norm": 2.796875, "learning_rate": 4.758377363381974e-05, "loss": 0.8435, "step": 8106 }, { "epoch": 0.14372255236930387, "grad_norm": 2.953125, "learning_rate": 4.7582575644331836e-05, "loss": 0.8286, "step": 8108 }, { "epoch": 0.14375800440491543, "grad_norm": 2.859375, "learning_rate": 4.758137737301809e-05, "loss": 0.8173, "step": 8110 }, { "epoch": 0.143793456440527, "grad_norm": 2.640625, "learning_rate": 4.7580178819893465e-05, "loss": 0.8239, "step": 8112 }, { "epoch": 0.14382890847613855, "grad_norm": 2.734375, "learning_rate": 4.75789799849729e-05, "loss": 0.8259, "step": 8114 }, { "epoch": 0.14386436051175014, "grad_norm": 2.8125, "learning_rate": 4.757778086827138e-05, "loss": 0.8239, "step": 8116 }, { "epoch": 0.1438998125473617, "grad_norm": 2.75, "learning_rate": 4.757658146980385e-05, "loss": 0.8177, "step": 8118 }, { "epoch": 0.14393526458297326, "grad_norm": 2.65625, "learning_rate": 4.7575381789585296e-05, "loss": 0.809, "step": 8120 }, { "epoch": 0.14397071661858485, "grad_norm": 3.078125, "learning_rate": 4.7574181827630666e-05, "loss": 0.8368, "step": 8122 }, { "epoch": 0.1440061686541964, "grad_norm": 2.796875, "learning_rate": 4.757298158395496e-05, "loss": 0.8409, "step": 8124 }, { "epoch": 0.14404162068980797, "grad_norm": 2.734375, "learning_rate": 4.757178105857313e-05, "loss": 0.8275, "step": 8126 }, { "epoch": 0.14407707272541956, "grad_norm": 2.6875, "learning_rate": 4.757058025150018e-05, "loss": 0.8308, "step": 8128 }, { "epoch": 0.14411252476103112, "grad_norm": 2.9375, "learning_rate": 4.7569379162751094e-05, "loss": 0.804, "step": 8130 }, { "epoch": 0.14414797679664268, "grad_norm": 2.6875, "learning_rate": 4.756817779234086e-05, "loss": 0.8118, "step": 8132 }, { "epoch": 0.14418342883225427, "grad_norm": 2.4375, "learning_rate": 4.756697614028446e-05, "loss": 0.8067, "step": 8134 }, { "epoch": 0.14421888086786583, "grad_norm": 2.765625, "learning_rate": 4.75657742065969e-05, "loss": 0.8501, "step": 8136 }, { "epoch": 0.1442543329034774, "grad_norm": 2.9375, "learning_rate": 4.7564571991293184e-05, "loss": 0.7915, "step": 8138 }, { "epoch": 0.14428978493908898, "grad_norm": 2.78125, "learning_rate": 4.75633694943883e-05, "loss": 0.7987, "step": 8140 }, { "epoch": 0.14432523697470054, "grad_norm": 2.796875, "learning_rate": 4.756216671589727e-05, "loss": 0.8148, "step": 8142 }, { "epoch": 0.1443606890103121, "grad_norm": 2.828125, "learning_rate": 4.75609636558351e-05, "loss": 0.8114, "step": 8144 }, { "epoch": 0.1443961410459237, "grad_norm": 2.703125, "learning_rate": 4.7559760314216794e-05, "loss": 0.826, "step": 8146 }, { "epoch": 0.14443159308153525, "grad_norm": 2.890625, "learning_rate": 4.755855669105739e-05, "loss": 0.8442, "step": 8148 }, { "epoch": 0.1444670451171468, "grad_norm": 3.0, "learning_rate": 4.755735278637189e-05, "loss": 0.834, "step": 8150 }, { "epoch": 0.1445024971527584, "grad_norm": 2.65625, "learning_rate": 4.755614860017533e-05, "loss": 0.7876, "step": 8152 }, { "epoch": 0.14453794918836996, "grad_norm": 2.796875, "learning_rate": 4.7554944132482724e-05, "loss": 0.8251, "step": 8154 }, { "epoch": 0.14457340122398152, "grad_norm": 2.65625, "learning_rate": 4.755373938330912e-05, "loss": 0.813, "step": 8156 }, { "epoch": 0.1446088532595931, "grad_norm": 2.609375, "learning_rate": 4.755253435266955e-05, "loss": 0.8088, "step": 8158 }, { "epoch": 0.14464430529520467, "grad_norm": 2.6875, "learning_rate": 4.755132904057904e-05, "loss": 0.8135, "step": 8160 }, { "epoch": 0.14467975733081623, "grad_norm": 2.609375, "learning_rate": 4.7550123447052646e-05, "loss": 0.7945, "step": 8162 }, { "epoch": 0.14471520936642782, "grad_norm": 2.59375, "learning_rate": 4.754891757210541e-05, "loss": 0.8046, "step": 8164 }, { "epoch": 0.14475066140203938, "grad_norm": 2.6875, "learning_rate": 4.754771141575237e-05, "loss": 0.8994, "step": 8166 }, { "epoch": 0.14478611343765094, "grad_norm": 2.625, "learning_rate": 4.75465049780086e-05, "loss": 0.8353, "step": 8168 }, { "epoch": 0.14482156547326253, "grad_norm": 2.65625, "learning_rate": 4.754529825888914e-05, "loss": 0.7793, "step": 8170 }, { "epoch": 0.1448570175088741, "grad_norm": 2.671875, "learning_rate": 4.754409125840905e-05, "loss": 0.8158, "step": 8172 }, { "epoch": 0.14489246954448565, "grad_norm": 2.859375, "learning_rate": 4.75428839765834e-05, "loss": 0.8244, "step": 8174 }, { "epoch": 0.14492792158009724, "grad_norm": 2.765625, "learning_rate": 4.754167641342725e-05, "loss": 0.7988, "step": 8176 }, { "epoch": 0.1449633736157088, "grad_norm": 3.15625, "learning_rate": 4.754046856895568e-05, "loss": 0.8256, "step": 8178 }, { "epoch": 0.14499882565132036, "grad_norm": 2.75, "learning_rate": 4.753926044318375e-05, "loss": 0.8186, "step": 8180 }, { "epoch": 0.14503427768693194, "grad_norm": 2.703125, "learning_rate": 4.7538052036126545e-05, "loss": 0.7826, "step": 8182 }, { "epoch": 0.1450697297225435, "grad_norm": 2.78125, "learning_rate": 4.753684334779914e-05, "loss": 0.8254, "step": 8184 }, { "epoch": 0.14510518175815507, "grad_norm": 2.6875, "learning_rate": 4.7535634378216636e-05, "loss": 0.8321, "step": 8186 }, { "epoch": 0.14514063379376665, "grad_norm": 3.171875, "learning_rate": 4.7534425127394106e-05, "loss": 0.7746, "step": 8188 }, { "epoch": 0.14517608582937822, "grad_norm": 2.546875, "learning_rate": 4.7533215595346636e-05, "loss": 0.8301, "step": 8190 }, { "epoch": 0.14521153786498978, "grad_norm": 2.703125, "learning_rate": 4.753200578208934e-05, "loss": 0.813, "step": 8192 }, { "epoch": 0.14524698990060136, "grad_norm": 2.875, "learning_rate": 4.75307956876373e-05, "loss": 0.808, "step": 8194 }, { "epoch": 0.14528244193621292, "grad_norm": 2.765625, "learning_rate": 4.752958531200562e-05, "loss": 0.8109, "step": 8196 }, { "epoch": 0.14531789397182449, "grad_norm": 2.828125, "learning_rate": 4.7528374655209407e-05, "loss": 0.823, "step": 8198 }, { "epoch": 0.14535334600743607, "grad_norm": 2.984375, "learning_rate": 4.752716371726378e-05, "loss": 0.8127, "step": 8200 }, { "epoch": 0.14538879804304763, "grad_norm": 2.796875, "learning_rate": 4.752595249818383e-05, "loss": 0.8062, "step": 8202 }, { "epoch": 0.1454242500786592, "grad_norm": 2.71875, "learning_rate": 4.752474099798469e-05, "loss": 0.7996, "step": 8204 }, { "epoch": 0.14545970211427078, "grad_norm": 2.59375, "learning_rate": 4.752352921668147e-05, "loss": 0.8083, "step": 8206 }, { "epoch": 0.14549515414988234, "grad_norm": 2.765625, "learning_rate": 4.752231715428931e-05, "loss": 0.8477, "step": 8208 }, { "epoch": 0.1455306061854939, "grad_norm": 2.78125, "learning_rate": 4.752110481082331e-05, "loss": 0.841, "step": 8210 }, { "epoch": 0.1455660582211055, "grad_norm": 2.859375, "learning_rate": 4.751989218629861e-05, "loss": 0.8154, "step": 8212 }, { "epoch": 0.14560151025671705, "grad_norm": 2.59375, "learning_rate": 4.751867928073036e-05, "loss": 0.7936, "step": 8214 }, { "epoch": 0.14563696229232861, "grad_norm": 2.96875, "learning_rate": 4.751746609413367e-05, "loss": 0.8145, "step": 8216 }, { "epoch": 0.1456724143279402, "grad_norm": 3.171875, "learning_rate": 4.75162526265237e-05, "loss": 0.7813, "step": 8218 }, { "epoch": 0.14570786636355176, "grad_norm": 3.0, "learning_rate": 4.7515038877915584e-05, "loss": 0.8669, "step": 8220 }, { "epoch": 0.14574331839916332, "grad_norm": 2.609375, "learning_rate": 4.7513824848324474e-05, "loss": 0.8316, "step": 8222 }, { "epoch": 0.1457787704347749, "grad_norm": 3.0625, "learning_rate": 4.751261053776552e-05, "loss": 0.8761, "step": 8224 }, { "epoch": 0.14581422247038647, "grad_norm": 2.78125, "learning_rate": 4.751139594625388e-05, "loss": 0.8457, "step": 8226 }, { "epoch": 0.14584967450599803, "grad_norm": 2.890625, "learning_rate": 4.751018107380469e-05, "loss": 0.8049, "step": 8228 }, { "epoch": 0.14588512654160962, "grad_norm": 2.828125, "learning_rate": 4.750896592043315e-05, "loss": 0.82, "step": 8230 }, { "epoch": 0.14592057857722118, "grad_norm": 2.96875, "learning_rate": 4.7507750486154387e-05, "loss": 0.8268, "step": 8232 }, { "epoch": 0.14595603061283274, "grad_norm": 3.03125, "learning_rate": 4.7506534770983595e-05, "loss": 0.8836, "step": 8234 }, { "epoch": 0.14599148264844433, "grad_norm": 3.171875, "learning_rate": 4.750531877493594e-05, "loss": 0.8191, "step": 8236 }, { "epoch": 0.1460269346840559, "grad_norm": 2.703125, "learning_rate": 4.7504102498026584e-05, "loss": 0.8321, "step": 8238 }, { "epoch": 0.14606238671966745, "grad_norm": 2.734375, "learning_rate": 4.7502885940270723e-05, "loss": 0.8084, "step": 8240 }, { "epoch": 0.14609783875527904, "grad_norm": 2.78125, "learning_rate": 4.7501669101683535e-05, "loss": 0.7722, "step": 8242 }, { "epoch": 0.1461332907908906, "grad_norm": 2.953125, "learning_rate": 4.750045198228019e-05, "loss": 0.8216, "step": 8244 }, { "epoch": 0.14616874282650216, "grad_norm": 2.71875, "learning_rate": 4.7499234582075905e-05, "loss": 0.813, "step": 8246 }, { "epoch": 0.14620419486211375, "grad_norm": 2.6875, "learning_rate": 4.749801690108585e-05, "loss": 0.8364, "step": 8248 }, { "epoch": 0.1462396468977253, "grad_norm": 2.875, "learning_rate": 4.749679893932524e-05, "loss": 0.8664, "step": 8250 }, { "epoch": 0.14627509893333687, "grad_norm": 2.671875, "learning_rate": 4.7495580696809254e-05, "loss": 0.8306, "step": 8252 }, { "epoch": 0.14631055096894846, "grad_norm": 2.75, "learning_rate": 4.7494362173553114e-05, "loss": 0.817, "step": 8254 }, { "epoch": 0.14634600300456002, "grad_norm": 2.765625, "learning_rate": 4.7493143369572013e-05, "loss": 0.8451, "step": 8256 }, { "epoch": 0.14638145504017158, "grad_norm": 2.65625, "learning_rate": 4.749192428488117e-05, "loss": 0.8197, "step": 8258 }, { "epoch": 0.14641690707578317, "grad_norm": 2.828125, "learning_rate": 4.7490704919495796e-05, "loss": 0.8499, "step": 8260 }, { "epoch": 0.14645235911139473, "grad_norm": 2.953125, "learning_rate": 4.748948527343112e-05, "loss": 0.8043, "step": 8262 }, { "epoch": 0.1464878111470063, "grad_norm": 3.171875, "learning_rate": 4.748826534670234e-05, "loss": 0.8115, "step": 8264 }, { "epoch": 0.14652326318261788, "grad_norm": 2.90625, "learning_rate": 4.748704513932469e-05, "loss": 0.8248, "step": 8266 }, { "epoch": 0.14655871521822944, "grad_norm": 2.859375, "learning_rate": 4.748582465131341e-05, "loss": 0.8252, "step": 8268 }, { "epoch": 0.146594167253841, "grad_norm": 2.765625, "learning_rate": 4.748460388268372e-05, "loss": 0.8104, "step": 8270 }, { "epoch": 0.1466296192894526, "grad_norm": 2.6875, "learning_rate": 4.748338283345085e-05, "loss": 0.8272, "step": 8272 }, { "epoch": 0.14666507132506415, "grad_norm": 2.4375, "learning_rate": 4.7482161503630053e-05, "loss": 0.7902, "step": 8274 }, { "epoch": 0.1467005233606757, "grad_norm": 2.828125, "learning_rate": 4.7480939893236556e-05, "loss": 0.8334, "step": 8276 }, { "epoch": 0.1467359753962873, "grad_norm": 2.890625, "learning_rate": 4.7479718002285615e-05, "loss": 0.7837, "step": 8278 }, { "epoch": 0.14677142743189886, "grad_norm": 2.5625, "learning_rate": 4.747849583079248e-05, "loss": 0.7474, "step": 8280 }, { "epoch": 0.14680687946751042, "grad_norm": 3.109375, "learning_rate": 4.747727337877239e-05, "loss": 0.8168, "step": 8282 }, { "epoch": 0.14684233150312198, "grad_norm": 2.78125, "learning_rate": 4.747605064624062e-05, "loss": 0.8059, "step": 8284 }, { "epoch": 0.14687778353873357, "grad_norm": 3.09375, "learning_rate": 4.747482763321241e-05, "loss": 0.8225, "step": 8286 }, { "epoch": 0.14691323557434513, "grad_norm": 2.734375, "learning_rate": 4.7473604339703034e-05, "loss": 0.8391, "step": 8288 }, { "epoch": 0.1469486876099567, "grad_norm": 2.84375, "learning_rate": 4.747238076572777e-05, "loss": 0.8136, "step": 8290 }, { "epoch": 0.14698413964556828, "grad_norm": 2.609375, "learning_rate": 4.747115691130185e-05, "loss": 0.8232, "step": 8292 }, { "epoch": 0.14701959168117984, "grad_norm": 2.796875, "learning_rate": 4.746993277644059e-05, "loss": 0.8548, "step": 8294 }, { "epoch": 0.1470550437167914, "grad_norm": 2.75, "learning_rate": 4.746870836115924e-05, "loss": 0.8169, "step": 8296 }, { "epoch": 0.147090495752403, "grad_norm": 2.71875, "learning_rate": 4.74674836654731e-05, "loss": 0.8498, "step": 8298 }, { "epoch": 0.14712594778801455, "grad_norm": 2.609375, "learning_rate": 4.7466258689397434e-05, "loss": 0.8027, "step": 8300 }, { "epoch": 0.1471613998236261, "grad_norm": 2.984375, "learning_rate": 4.7465033432947546e-05, "loss": 0.8488, "step": 8302 }, { "epoch": 0.1471968518592377, "grad_norm": 2.71875, "learning_rate": 4.746380789613871e-05, "loss": 0.842, "step": 8304 }, { "epoch": 0.14723230389484926, "grad_norm": 2.6875, "learning_rate": 4.746258207898624e-05, "loss": 0.8198, "step": 8306 }, { "epoch": 0.14726775593046082, "grad_norm": 2.640625, "learning_rate": 4.746135598150542e-05, "loss": 0.8277, "step": 8308 }, { "epoch": 0.1473032079660724, "grad_norm": 2.890625, "learning_rate": 4.746012960371156e-05, "loss": 0.8142, "step": 8310 }, { "epoch": 0.14733866000168397, "grad_norm": 2.671875, "learning_rate": 4.745890294561995e-05, "loss": 0.7867, "step": 8312 }, { "epoch": 0.14737411203729553, "grad_norm": 2.765625, "learning_rate": 4.745767600724592e-05, "loss": 0.8592, "step": 8314 }, { "epoch": 0.14740956407290712, "grad_norm": 2.734375, "learning_rate": 4.745644878860478e-05, "loss": 0.8329, "step": 8316 }, { "epoch": 0.14744501610851868, "grad_norm": 2.640625, "learning_rate": 4.7455221289711814e-05, "loss": 0.799, "step": 8318 }, { "epoch": 0.14748046814413024, "grad_norm": 2.84375, "learning_rate": 4.745399351058237e-05, "loss": 0.8623, "step": 8320 }, { "epoch": 0.14751592017974183, "grad_norm": 2.828125, "learning_rate": 4.7452765451231776e-05, "loss": 0.8279, "step": 8322 }, { "epoch": 0.1475513722153534, "grad_norm": 2.84375, "learning_rate": 4.745153711167534e-05, "loss": 0.8329, "step": 8324 }, { "epoch": 0.14758682425096495, "grad_norm": 3.03125, "learning_rate": 4.745030849192839e-05, "loss": 0.8266, "step": 8326 }, { "epoch": 0.14762227628657654, "grad_norm": 2.734375, "learning_rate": 4.744907959200627e-05, "loss": 0.779, "step": 8328 }, { "epoch": 0.1476577283221881, "grad_norm": 2.75, "learning_rate": 4.744785041192431e-05, "loss": 0.8216, "step": 8330 }, { "epoch": 0.14769318035779966, "grad_norm": 2.65625, "learning_rate": 4.7446620951697856e-05, "loss": 0.8291, "step": 8332 }, { "epoch": 0.14772863239341125, "grad_norm": 3.0, "learning_rate": 4.744539121134225e-05, "loss": 0.8277, "step": 8334 }, { "epoch": 0.1477640844290228, "grad_norm": 2.40625, "learning_rate": 4.744416119087283e-05, "loss": 0.8255, "step": 8336 }, { "epoch": 0.14779953646463437, "grad_norm": 2.796875, "learning_rate": 4.744293089030496e-05, "loss": 0.8148, "step": 8338 }, { "epoch": 0.14783498850024596, "grad_norm": 2.6875, "learning_rate": 4.744170030965398e-05, "loss": 0.8207, "step": 8340 }, { "epoch": 0.14787044053585752, "grad_norm": 2.71875, "learning_rate": 4.7440469448935264e-05, "loss": 0.8454, "step": 8342 }, { "epoch": 0.14790589257146908, "grad_norm": 2.875, "learning_rate": 4.743923830816416e-05, "loss": 0.8418, "step": 8344 }, { "epoch": 0.14794134460708067, "grad_norm": 3.0625, "learning_rate": 4.743800688735603e-05, "loss": 0.8134, "step": 8346 }, { "epoch": 0.14797679664269223, "grad_norm": 2.71875, "learning_rate": 4.743677518652625e-05, "loss": 0.8243, "step": 8348 }, { "epoch": 0.1480122486783038, "grad_norm": 2.765625, "learning_rate": 4.743554320569019e-05, "loss": 0.7993, "step": 8350 }, { "epoch": 0.14804770071391538, "grad_norm": 2.765625, "learning_rate": 4.743431094486323e-05, "loss": 0.8062, "step": 8352 }, { "epoch": 0.14808315274952694, "grad_norm": 3.015625, "learning_rate": 4.743307840406073e-05, "loss": 0.8124, "step": 8354 }, { "epoch": 0.1481186047851385, "grad_norm": 3.0, "learning_rate": 4.7431845583298084e-05, "loss": 0.8467, "step": 8356 }, { "epoch": 0.14815405682075009, "grad_norm": 2.671875, "learning_rate": 4.7430612482590685e-05, "loss": 0.7828, "step": 8358 }, { "epoch": 0.14818950885636165, "grad_norm": 2.828125, "learning_rate": 4.742937910195391e-05, "loss": 0.7977, "step": 8360 }, { "epoch": 0.1482249608919732, "grad_norm": 2.671875, "learning_rate": 4.742814544140316e-05, "loss": 0.8005, "step": 8362 }, { "epoch": 0.1482604129275848, "grad_norm": 2.609375, "learning_rate": 4.742691150095383e-05, "loss": 0.8071, "step": 8364 }, { "epoch": 0.14829586496319636, "grad_norm": 2.875, "learning_rate": 4.74256772806213e-05, "loss": 0.821, "step": 8366 }, { "epoch": 0.14833131699880792, "grad_norm": 3.0625, "learning_rate": 4.7424442780421003e-05, "loss": 0.835, "step": 8368 }, { "epoch": 0.1483667690344195, "grad_norm": 3.03125, "learning_rate": 4.742320800036832e-05, "loss": 0.7975, "step": 8370 }, { "epoch": 0.14840222107003107, "grad_norm": 2.625, "learning_rate": 4.742197294047869e-05, "loss": 0.8017, "step": 8372 }, { "epoch": 0.14843767310564263, "grad_norm": 2.875, "learning_rate": 4.742073760076749e-05, "loss": 0.863, "step": 8374 }, { "epoch": 0.14847312514125421, "grad_norm": 2.390625, "learning_rate": 4.741950198125016e-05, "loss": 0.7757, "step": 8376 }, { "epoch": 0.14850857717686577, "grad_norm": 2.6875, "learning_rate": 4.7418266081942116e-05, "loss": 0.8262, "step": 8378 }, { "epoch": 0.14854402921247734, "grad_norm": 2.75, "learning_rate": 4.741702990285878e-05, "loss": 0.8187, "step": 8380 }, { "epoch": 0.14857948124808892, "grad_norm": 2.328125, "learning_rate": 4.7415793444015574e-05, "loss": 0.7889, "step": 8382 }, { "epoch": 0.14861493328370048, "grad_norm": 2.828125, "learning_rate": 4.741455670542795e-05, "loss": 0.7936, "step": 8384 }, { "epoch": 0.14865038531931205, "grad_norm": 2.59375, "learning_rate": 4.741331968711131e-05, "loss": 0.8174, "step": 8386 }, { "epoch": 0.14868583735492363, "grad_norm": 2.75, "learning_rate": 4.741208238908111e-05, "loss": 0.8257, "step": 8388 }, { "epoch": 0.1487212893905352, "grad_norm": 2.859375, "learning_rate": 4.7410844811352806e-05, "loss": 0.8034, "step": 8390 }, { "epoch": 0.14875674142614675, "grad_norm": 3.015625, "learning_rate": 4.740960695394181e-05, "loss": 0.8, "step": 8392 }, { "epoch": 0.14879219346175834, "grad_norm": 2.8125, "learning_rate": 4.7408368816863596e-05, "loss": 0.817, "step": 8394 }, { "epoch": 0.1488276454973699, "grad_norm": 3.078125, "learning_rate": 4.7407130400133605e-05, "loss": 0.7515, "step": 8396 }, { "epoch": 0.14886309753298146, "grad_norm": 2.921875, "learning_rate": 4.7405891703767294e-05, "loss": 0.8348, "step": 8398 }, { "epoch": 0.14889854956859305, "grad_norm": 2.9375, "learning_rate": 4.740465272778012e-05, "loss": 0.851, "step": 8400 }, { "epoch": 0.1489340016042046, "grad_norm": 2.46875, "learning_rate": 4.740341347218754e-05, "loss": 0.8495, "step": 8402 }, { "epoch": 0.14896945363981617, "grad_norm": 2.765625, "learning_rate": 4.7402173937005035e-05, "loss": 0.8287, "step": 8404 }, { "epoch": 0.14900490567542776, "grad_norm": 3.09375, "learning_rate": 4.7400934122248066e-05, "loss": 0.8723, "step": 8406 }, { "epoch": 0.14904035771103932, "grad_norm": 3.296875, "learning_rate": 4.7399694027932094e-05, "loss": 0.8113, "step": 8408 }, { "epoch": 0.14907580974665088, "grad_norm": 3.390625, "learning_rate": 4.7398453654072616e-05, "loss": 0.8134, "step": 8410 }, { "epoch": 0.14911126178226247, "grad_norm": 3.171875, "learning_rate": 4.7397213000685104e-05, "loss": 0.7777, "step": 8412 }, { "epoch": 0.14914671381787403, "grad_norm": 2.65625, "learning_rate": 4.739597206778503e-05, "loss": 0.8102, "step": 8414 }, { "epoch": 0.1491821658534856, "grad_norm": 2.453125, "learning_rate": 4.7394730855387895e-05, "loss": 0.8204, "step": 8416 }, { "epoch": 0.14921761788909718, "grad_norm": 2.9375, "learning_rate": 4.739348936350918e-05, "loss": 0.7826, "step": 8418 }, { "epoch": 0.14925306992470874, "grad_norm": 2.46875, "learning_rate": 4.7392247592164384e-05, "loss": 0.8538, "step": 8420 }, { "epoch": 0.1492885219603203, "grad_norm": 2.734375, "learning_rate": 4.739100554136901e-05, "loss": 0.8514, "step": 8422 }, { "epoch": 0.1493239739959319, "grad_norm": 2.65625, "learning_rate": 4.738976321113854e-05, "loss": 0.8351, "step": 8424 }, { "epoch": 0.14935942603154345, "grad_norm": 2.625, "learning_rate": 4.738852060148849e-05, "loss": 0.7661, "step": 8426 }, { "epoch": 0.149394878067155, "grad_norm": 2.53125, "learning_rate": 4.738727771243437e-05, "loss": 0.8071, "step": 8428 }, { "epoch": 0.1494303301027666, "grad_norm": 2.65625, "learning_rate": 4.7386034543991674e-05, "loss": 0.7708, "step": 8430 }, { "epoch": 0.14946578213837816, "grad_norm": 2.875, "learning_rate": 4.738479109617594e-05, "loss": 0.8564, "step": 8432 }, { "epoch": 0.14950123417398972, "grad_norm": 2.859375, "learning_rate": 4.738354736900268e-05, "loss": 0.8329, "step": 8434 }, { "epoch": 0.1495366862096013, "grad_norm": 2.8125, "learning_rate": 4.73823033624874e-05, "loss": 0.8144, "step": 8436 }, { "epoch": 0.14957213824521287, "grad_norm": 2.625, "learning_rate": 4.738105907664565e-05, "loss": 0.8032, "step": 8438 }, { "epoch": 0.14960759028082443, "grad_norm": 2.734375, "learning_rate": 4.737981451149293e-05, "loss": 0.8256, "step": 8440 }, { "epoch": 0.14964304231643602, "grad_norm": 2.703125, "learning_rate": 4.737856966704479e-05, "loss": 0.7986, "step": 8442 }, { "epoch": 0.14967849435204758, "grad_norm": 3.484375, "learning_rate": 4.737732454331677e-05, "loss": 0.8465, "step": 8444 }, { "epoch": 0.14971394638765914, "grad_norm": 3.0625, "learning_rate": 4.737607914032439e-05, "loss": 0.8307, "step": 8446 }, { "epoch": 0.14974939842327073, "grad_norm": 2.96875, "learning_rate": 4.737483345808321e-05, "loss": 0.8057, "step": 8448 }, { "epoch": 0.1497848504588823, "grad_norm": 2.484375, "learning_rate": 4.737358749660877e-05, "loss": 0.8325, "step": 8450 }, { "epoch": 0.14982030249449385, "grad_norm": 2.609375, "learning_rate": 4.737234125591661e-05, "loss": 0.8536, "step": 8452 }, { "epoch": 0.1498557545301054, "grad_norm": 2.78125, "learning_rate": 4.7371094736022295e-05, "loss": 0.8113, "step": 8454 }, { "epoch": 0.149891206565717, "grad_norm": 2.734375, "learning_rate": 4.736984793694138e-05, "loss": 0.8725, "step": 8456 }, { "epoch": 0.14992665860132856, "grad_norm": 2.59375, "learning_rate": 4.736860085868942e-05, "loss": 0.8384, "step": 8458 }, { "epoch": 0.14996211063694012, "grad_norm": 2.9375, "learning_rate": 4.736735350128199e-05, "loss": 0.8097, "step": 8460 }, { "epoch": 0.1499975626725517, "grad_norm": 2.453125, "learning_rate": 4.736610586473463e-05, "loss": 0.7781, "step": 8462 }, { "epoch": 0.15003301470816327, "grad_norm": 2.734375, "learning_rate": 4.736485794906294e-05, "loss": 0.8082, "step": 8464 }, { "epoch": 0.15006846674377483, "grad_norm": 2.6875, "learning_rate": 4.7363609754282466e-05, "loss": 0.8256, "step": 8466 }, { "epoch": 0.15010391877938642, "grad_norm": 2.65625, "learning_rate": 4.736236128040882e-05, "loss": 0.8137, "step": 8468 }, { "epoch": 0.15013937081499798, "grad_norm": 2.734375, "learning_rate": 4.736111252745755e-05, "loss": 0.8189, "step": 8470 }, { "epoch": 0.15017482285060954, "grad_norm": 2.640625, "learning_rate": 4.7359863495444254e-05, "loss": 0.8005, "step": 8472 }, { "epoch": 0.15021027488622113, "grad_norm": 2.984375, "learning_rate": 4.735861418438452e-05, "loss": 0.8336, "step": 8474 }, { "epoch": 0.1502457269218327, "grad_norm": 2.75, "learning_rate": 4.735736459429394e-05, "loss": 0.8146, "step": 8476 }, { "epoch": 0.15028117895744425, "grad_norm": 2.828125, "learning_rate": 4.735611472518811e-05, "loss": 0.8229, "step": 8478 }, { "epoch": 0.15031663099305584, "grad_norm": 2.703125, "learning_rate": 4.7354864577082616e-05, "loss": 0.821, "step": 8480 }, { "epoch": 0.1503520830286674, "grad_norm": 2.84375, "learning_rate": 4.7353614149993074e-05, "loss": 0.7703, "step": 8482 }, { "epoch": 0.15038753506427896, "grad_norm": 2.546875, "learning_rate": 4.735236344393508e-05, "loss": 0.7681, "step": 8484 }, { "epoch": 0.15042298709989055, "grad_norm": 2.609375, "learning_rate": 4.735111245892425e-05, "loss": 0.8142, "step": 8486 }, { "epoch": 0.1504584391355021, "grad_norm": 2.46875, "learning_rate": 4.734986119497619e-05, "loss": 0.8485, "step": 8488 }, { "epoch": 0.15049389117111367, "grad_norm": 2.71875, "learning_rate": 4.734860965210651e-05, "loss": 0.8056, "step": 8490 }, { "epoch": 0.15052934320672526, "grad_norm": 3.15625, "learning_rate": 4.734735783033085e-05, "loss": 0.8393, "step": 8492 }, { "epoch": 0.15056479524233682, "grad_norm": 2.703125, "learning_rate": 4.734610572966481e-05, "loss": 0.8382, "step": 8494 }, { "epoch": 0.15060024727794838, "grad_norm": 2.546875, "learning_rate": 4.734485335012403e-05, "loss": 0.8161, "step": 8496 }, { "epoch": 0.15063569931355997, "grad_norm": 3.03125, "learning_rate": 4.734360069172413e-05, "loss": 0.8473, "step": 8498 }, { "epoch": 0.15067115134917153, "grad_norm": 2.6875, "learning_rate": 4.7342347754480745e-05, "loss": 0.8288, "step": 8500 }, { "epoch": 0.1507066033847831, "grad_norm": 2.65625, "learning_rate": 4.734109453840952e-05, "loss": 0.8326, "step": 8502 }, { "epoch": 0.15074205542039468, "grad_norm": 3.015625, "learning_rate": 4.7339841043526085e-05, "loss": 0.8067, "step": 8504 }, { "epoch": 0.15077750745600624, "grad_norm": 2.96875, "learning_rate": 4.733858726984609e-05, "loss": 0.7916, "step": 8506 }, { "epoch": 0.1508129594916178, "grad_norm": 2.859375, "learning_rate": 4.7337333217385173e-05, "loss": 0.816, "step": 8508 }, { "epoch": 0.1508484115272294, "grad_norm": 2.8125, "learning_rate": 4.7336078886158994e-05, "loss": 0.8172, "step": 8510 }, { "epoch": 0.15088386356284095, "grad_norm": 2.765625, "learning_rate": 4.7334824276183195e-05, "loss": 0.7866, "step": 8512 }, { "epoch": 0.1509193155984525, "grad_norm": 2.65625, "learning_rate": 4.733356938747345e-05, "loss": 0.7956, "step": 8514 }, { "epoch": 0.1509547676340641, "grad_norm": 2.84375, "learning_rate": 4.7332314220045417e-05, "loss": 0.7696, "step": 8516 }, { "epoch": 0.15099021966967566, "grad_norm": 2.921875, "learning_rate": 4.7331058773914736e-05, "loss": 0.8321, "step": 8518 }, { "epoch": 0.15102567170528722, "grad_norm": 2.78125, "learning_rate": 4.732980304909711e-05, "loss": 0.8247, "step": 8520 }, { "epoch": 0.1510611237408988, "grad_norm": 2.625, "learning_rate": 4.7328547045608185e-05, "loss": 0.8107, "step": 8522 }, { "epoch": 0.15109657577651037, "grad_norm": 2.75, "learning_rate": 4.7327290763463636e-05, "loss": 0.8323, "step": 8524 }, { "epoch": 0.15113202781212193, "grad_norm": 2.78125, "learning_rate": 4.732603420267916e-05, "loss": 0.8015, "step": 8526 }, { "epoch": 0.15116747984773352, "grad_norm": 2.90625, "learning_rate": 4.7324777363270424e-05, "loss": 0.8537, "step": 8528 }, { "epoch": 0.15120293188334508, "grad_norm": 3.03125, "learning_rate": 4.7323520245253114e-05, "loss": 0.8273, "step": 8530 }, { "epoch": 0.15123838391895664, "grad_norm": 2.875, "learning_rate": 4.732226284864293e-05, "loss": 0.8292, "step": 8532 }, { "epoch": 0.15127383595456823, "grad_norm": 2.59375, "learning_rate": 4.7321005173455546e-05, "loss": 0.8115, "step": 8534 }, { "epoch": 0.15130928799017979, "grad_norm": 2.71875, "learning_rate": 4.731974721970667e-05, "loss": 0.7805, "step": 8536 }, { "epoch": 0.15134474002579135, "grad_norm": 2.984375, "learning_rate": 4.7318488987411994e-05, "loss": 0.84, "step": 8538 }, { "epoch": 0.15138019206140294, "grad_norm": 2.78125, "learning_rate": 4.7317230476587225e-05, "loss": 0.8011, "step": 8540 }, { "epoch": 0.1514156440970145, "grad_norm": 2.78125, "learning_rate": 4.7315971687248076e-05, "loss": 0.7949, "step": 8542 }, { "epoch": 0.15145109613262606, "grad_norm": 2.703125, "learning_rate": 4.731471261941024e-05, "loss": 0.7571, "step": 8544 }, { "epoch": 0.15148654816823764, "grad_norm": 2.984375, "learning_rate": 4.7313453273089445e-05, "loss": 0.8316, "step": 8546 }, { "epoch": 0.1515220002038492, "grad_norm": 2.828125, "learning_rate": 4.73121936483014e-05, "loss": 0.8164, "step": 8548 }, { "epoch": 0.15155745223946077, "grad_norm": 2.859375, "learning_rate": 4.7310933745061813e-05, "loss": 0.8136, "step": 8550 }, { "epoch": 0.15159290427507235, "grad_norm": 2.5625, "learning_rate": 4.7309673563386426e-05, "loss": 0.8086, "step": 8552 }, { "epoch": 0.15162835631068391, "grad_norm": 2.65625, "learning_rate": 4.730841310329096e-05, "loss": 0.8417, "step": 8554 }, { "epoch": 0.15166380834629548, "grad_norm": 2.828125, "learning_rate": 4.730715236479115e-05, "loss": 0.8629, "step": 8556 }, { "epoch": 0.15169926038190706, "grad_norm": 2.921875, "learning_rate": 4.730589134790272e-05, "loss": 0.7892, "step": 8558 }, { "epoch": 0.15173471241751862, "grad_norm": 2.859375, "learning_rate": 4.730463005264142e-05, "loss": 0.8418, "step": 8560 }, { "epoch": 0.15177016445313019, "grad_norm": 2.953125, "learning_rate": 4.7303368479022974e-05, "loss": 0.8379, "step": 8562 }, { "epoch": 0.15180561648874177, "grad_norm": 2.625, "learning_rate": 4.730210662706314e-05, "loss": 0.8355, "step": 8564 }, { "epoch": 0.15184106852435333, "grad_norm": 2.9375, "learning_rate": 4.730084449677766e-05, "loss": 0.8156, "step": 8566 }, { "epoch": 0.1518765205599649, "grad_norm": 2.421875, "learning_rate": 4.7299582088182284e-05, "loss": 0.8068, "step": 8568 }, { "epoch": 0.15191197259557648, "grad_norm": 2.671875, "learning_rate": 4.729831940129277e-05, "loss": 0.7711, "step": 8570 }, { "epoch": 0.15194742463118804, "grad_norm": 2.640625, "learning_rate": 4.729705643612486e-05, "loss": 0.8524, "step": 8572 }, { "epoch": 0.1519828766667996, "grad_norm": 2.671875, "learning_rate": 4.729579319269435e-05, "loss": 0.7844, "step": 8574 }, { "epoch": 0.1520183287024112, "grad_norm": 2.6875, "learning_rate": 4.729452967101697e-05, "loss": 0.8294, "step": 8576 }, { "epoch": 0.15205378073802275, "grad_norm": 2.78125, "learning_rate": 4.729326587110852e-05, "loss": 0.7946, "step": 8578 }, { "epoch": 0.15208923277363431, "grad_norm": 2.6875, "learning_rate": 4.729200179298474e-05, "loss": 0.8395, "step": 8580 }, { "epoch": 0.1521246848092459, "grad_norm": 2.765625, "learning_rate": 4.729073743666143e-05, "loss": 0.8131, "step": 8582 }, { "epoch": 0.15216013684485746, "grad_norm": 2.875, "learning_rate": 4.728947280215435e-05, "loss": 0.7983, "step": 8584 }, { "epoch": 0.15219558888046902, "grad_norm": 2.5, "learning_rate": 4.72882078894793e-05, "loss": 0.8259, "step": 8586 }, { "epoch": 0.1522310409160806, "grad_norm": 2.65625, "learning_rate": 4.728694269865205e-05, "loss": 0.7883, "step": 8588 }, { "epoch": 0.15226649295169217, "grad_norm": 2.703125, "learning_rate": 4.728567722968841e-05, "loss": 0.7958, "step": 8590 }, { "epoch": 0.15230194498730373, "grad_norm": 2.859375, "learning_rate": 4.728441148260415e-05, "loss": 0.768, "step": 8592 }, { "epoch": 0.15233739702291532, "grad_norm": 2.734375, "learning_rate": 4.728314545741508e-05, "loss": 0.8115, "step": 8594 }, { "epoch": 0.15237284905852688, "grad_norm": 2.859375, "learning_rate": 4.728187915413699e-05, "loss": 0.8365, "step": 8596 }, { "epoch": 0.15240830109413844, "grad_norm": 2.75, "learning_rate": 4.72806125727857e-05, "loss": 0.8323, "step": 8598 }, { "epoch": 0.15244375312975003, "grad_norm": 2.96875, "learning_rate": 4.7279345713377e-05, "loss": 0.8174, "step": 8600 }, { "epoch": 0.1524792051653616, "grad_norm": 2.71875, "learning_rate": 4.72780785759267e-05, "loss": 0.7958, "step": 8602 }, { "epoch": 0.15251465720097315, "grad_norm": 2.859375, "learning_rate": 4.727681116045063e-05, "loss": 0.8139, "step": 8604 }, { "epoch": 0.15255010923658474, "grad_norm": 2.703125, "learning_rate": 4.727554346696459e-05, "loss": 0.8109, "step": 8606 }, { "epoch": 0.1525855612721963, "grad_norm": 2.8125, "learning_rate": 4.727427549548441e-05, "loss": 0.8242, "step": 8608 }, { "epoch": 0.15262101330780786, "grad_norm": 2.609375, "learning_rate": 4.727300724602591e-05, "loss": 0.8205, "step": 8610 }, { "epoch": 0.15265646534341945, "grad_norm": 2.640625, "learning_rate": 4.727173871860492e-05, "loss": 0.8197, "step": 8612 }, { "epoch": 0.152691917379031, "grad_norm": 2.71875, "learning_rate": 4.727046991323726e-05, "loss": 0.7871, "step": 8614 }, { "epoch": 0.15272736941464257, "grad_norm": 2.75, "learning_rate": 4.7269200829938784e-05, "loss": 0.8023, "step": 8616 }, { "epoch": 0.15276282145025416, "grad_norm": 2.765625, "learning_rate": 4.7267931468725326e-05, "loss": 0.8246, "step": 8618 }, { "epoch": 0.15279827348586572, "grad_norm": 2.796875, "learning_rate": 4.726666182961271e-05, "loss": 0.8302, "step": 8620 }, { "epoch": 0.15283372552147728, "grad_norm": 2.75, "learning_rate": 4.7265391912616796e-05, "loss": 0.8324, "step": 8622 }, { "epoch": 0.15286917755708884, "grad_norm": 2.78125, "learning_rate": 4.726412171775343e-05, "loss": 0.8154, "step": 8624 }, { "epoch": 0.15290462959270043, "grad_norm": 2.6875, "learning_rate": 4.7262851245038456e-05, "loss": 0.8289, "step": 8626 }, { "epoch": 0.152940081628312, "grad_norm": 2.578125, "learning_rate": 4.7261580494487745e-05, "loss": 0.8119, "step": 8628 }, { "epoch": 0.15297553366392355, "grad_norm": 2.78125, "learning_rate": 4.726030946611714e-05, "loss": 0.7956, "step": 8630 }, { "epoch": 0.15301098569953514, "grad_norm": 2.9375, "learning_rate": 4.7259038159942514e-05, "loss": 0.7943, "step": 8632 }, { "epoch": 0.1530464377351467, "grad_norm": 2.921875, "learning_rate": 4.725776657597972e-05, "loss": 0.8546, "step": 8634 }, { "epoch": 0.15308188977075826, "grad_norm": 2.890625, "learning_rate": 4.725649471424464e-05, "loss": 0.8512, "step": 8636 }, { "epoch": 0.15311734180636985, "grad_norm": 2.484375, "learning_rate": 4.7255222574753144e-05, "loss": 0.8184, "step": 8638 }, { "epoch": 0.1531527938419814, "grad_norm": 2.75, "learning_rate": 4.7253950157521106e-05, "loss": 0.8186, "step": 8640 }, { "epoch": 0.15318824587759297, "grad_norm": 2.875, "learning_rate": 4.72526774625644e-05, "loss": 0.811, "step": 8642 }, { "epoch": 0.15322369791320456, "grad_norm": 2.84375, "learning_rate": 4.725140448989892e-05, "loss": 0.8125, "step": 8644 }, { "epoch": 0.15325914994881612, "grad_norm": 2.4375, "learning_rate": 4.725013123954054e-05, "loss": 0.8177, "step": 8646 }, { "epoch": 0.15329460198442768, "grad_norm": 2.625, "learning_rate": 4.724885771150516e-05, "loss": 0.8118, "step": 8648 }, { "epoch": 0.15333005402003927, "grad_norm": 2.703125, "learning_rate": 4.724758390580867e-05, "loss": 0.7981, "step": 8650 }, { "epoch": 0.15336550605565083, "grad_norm": 2.625, "learning_rate": 4.724630982246696e-05, "loss": 0.844, "step": 8652 }, { "epoch": 0.1534009580912624, "grad_norm": 2.921875, "learning_rate": 4.724503546149595e-05, "loss": 0.8135, "step": 8654 }, { "epoch": 0.15343641012687398, "grad_norm": 3.171875, "learning_rate": 4.724376082291152e-05, "loss": 0.8015, "step": 8656 }, { "epoch": 0.15347186216248554, "grad_norm": 2.65625, "learning_rate": 4.724248590672959e-05, "loss": 0.8211, "step": 8658 }, { "epoch": 0.1535073141980971, "grad_norm": 2.515625, "learning_rate": 4.7241210712966075e-05, "loss": 0.7986, "step": 8660 }, { "epoch": 0.1535427662337087, "grad_norm": 2.765625, "learning_rate": 4.723993524163688e-05, "loss": 0.8151, "step": 8662 }, { "epoch": 0.15357821826932025, "grad_norm": 2.765625, "learning_rate": 4.723865949275792e-05, "loss": 0.8132, "step": 8664 }, { "epoch": 0.1536136703049318, "grad_norm": 2.6875, "learning_rate": 4.723738346634513e-05, "loss": 0.797, "step": 8666 }, { "epoch": 0.1536491223405434, "grad_norm": 2.671875, "learning_rate": 4.723610716241442e-05, "loss": 0.8153, "step": 8668 }, { "epoch": 0.15368457437615496, "grad_norm": 2.65625, "learning_rate": 4.723483058098173e-05, "loss": 0.8162, "step": 8670 }, { "epoch": 0.15372002641176652, "grad_norm": 2.828125, "learning_rate": 4.723355372206297e-05, "loss": 0.8034, "step": 8672 }, { "epoch": 0.1537554784473781, "grad_norm": 2.546875, "learning_rate": 4.723227658567411e-05, "loss": 0.8138, "step": 8674 }, { "epoch": 0.15379093048298967, "grad_norm": 2.828125, "learning_rate": 4.723099917183106e-05, "loss": 0.8076, "step": 8676 }, { "epoch": 0.15382638251860123, "grad_norm": 3.0, "learning_rate": 4.7229721480549774e-05, "loss": 0.8268, "step": 8678 }, { "epoch": 0.15386183455421282, "grad_norm": 2.828125, "learning_rate": 4.722844351184619e-05, "loss": 0.8274, "step": 8680 }, { "epoch": 0.15389728658982438, "grad_norm": 2.53125, "learning_rate": 4.722716526573626e-05, "loss": 0.7815, "step": 8682 }, { "epoch": 0.15393273862543594, "grad_norm": 2.875, "learning_rate": 4.722588674223594e-05, "loss": 0.8101, "step": 8684 }, { "epoch": 0.15396819066104753, "grad_norm": 2.953125, "learning_rate": 4.722460794136117e-05, "loss": 0.8077, "step": 8686 }, { "epoch": 0.1540036426966591, "grad_norm": 2.546875, "learning_rate": 4.7223328863127944e-05, "loss": 0.8179, "step": 8688 }, { "epoch": 0.15403909473227065, "grad_norm": 3.046875, "learning_rate": 4.722204950755219e-05, "loss": 0.8085, "step": 8690 }, { "epoch": 0.15407454676788224, "grad_norm": 2.859375, "learning_rate": 4.722076987464989e-05, "loss": 0.8197, "step": 8692 }, { "epoch": 0.1541099988034938, "grad_norm": 2.609375, "learning_rate": 4.721948996443701e-05, "loss": 0.7829, "step": 8694 }, { "epoch": 0.15414545083910536, "grad_norm": 2.40625, "learning_rate": 4.7218209776929525e-05, "loss": 0.818, "step": 8696 }, { "epoch": 0.15418090287471695, "grad_norm": 2.65625, "learning_rate": 4.7216929312143396e-05, "loss": 0.8165, "step": 8698 }, { "epoch": 0.1542163549103285, "grad_norm": 2.65625, "learning_rate": 4.721564857009463e-05, "loss": 0.7966, "step": 8700 }, { "epoch": 0.15425180694594007, "grad_norm": 2.796875, "learning_rate": 4.7214367550799196e-05, "loss": 0.8127, "step": 8702 }, { "epoch": 0.15428725898155166, "grad_norm": 2.453125, "learning_rate": 4.721308625427309e-05, "loss": 0.8077, "step": 8704 }, { "epoch": 0.15432271101716322, "grad_norm": 2.875, "learning_rate": 4.7211804680532276e-05, "loss": 0.81, "step": 8706 }, { "epoch": 0.15435816305277478, "grad_norm": 2.828125, "learning_rate": 4.7210522829592774e-05, "loss": 0.8512, "step": 8708 }, { "epoch": 0.15439361508838637, "grad_norm": 2.625, "learning_rate": 4.7209240701470584e-05, "loss": 0.8265, "step": 8710 }, { "epoch": 0.15442906712399793, "grad_norm": 2.609375, "learning_rate": 4.7207958296181676e-05, "loss": 0.8358, "step": 8712 }, { "epoch": 0.1544645191596095, "grad_norm": 2.5, "learning_rate": 4.7206675613742084e-05, "loss": 0.7722, "step": 8714 }, { "epoch": 0.15449997119522108, "grad_norm": 2.78125, "learning_rate": 4.7205392654167806e-05, "loss": 0.8044, "step": 8716 }, { "epoch": 0.15453542323083264, "grad_norm": 2.578125, "learning_rate": 4.7204109417474854e-05, "loss": 0.8226, "step": 8718 }, { "epoch": 0.1545708752664442, "grad_norm": 2.78125, "learning_rate": 4.7202825903679234e-05, "loss": 0.8345, "step": 8720 }, { "epoch": 0.15460632730205578, "grad_norm": 2.65625, "learning_rate": 4.720154211279698e-05, "loss": 0.8133, "step": 8722 }, { "epoch": 0.15464177933766735, "grad_norm": 2.625, "learning_rate": 4.72002580448441e-05, "loss": 0.7914, "step": 8724 }, { "epoch": 0.1546772313732789, "grad_norm": 2.71875, "learning_rate": 4.719897369983663e-05, "loss": 0.785, "step": 8726 }, { "epoch": 0.1547126834088905, "grad_norm": 2.59375, "learning_rate": 4.7197689077790585e-05, "loss": 0.789, "step": 8728 }, { "epoch": 0.15474813544450206, "grad_norm": 2.78125, "learning_rate": 4.719640417872201e-05, "loss": 0.8307, "step": 8730 }, { "epoch": 0.15478358748011362, "grad_norm": 2.90625, "learning_rate": 4.719511900264693e-05, "loss": 0.8563, "step": 8732 }, { "epoch": 0.1548190395157252, "grad_norm": 3.125, "learning_rate": 4.719383354958138e-05, "loss": 0.8509, "step": 8734 }, { "epoch": 0.15485449155133676, "grad_norm": 2.734375, "learning_rate": 4.7192547819541423e-05, "loss": 0.8402, "step": 8736 }, { "epoch": 0.15488994358694833, "grad_norm": 3.0625, "learning_rate": 4.7191261812543084e-05, "loss": 0.8446, "step": 8738 }, { "epoch": 0.1549253956225599, "grad_norm": 3.0, "learning_rate": 4.718997552860243e-05, "loss": 0.8627, "step": 8740 }, { "epoch": 0.15496084765817147, "grad_norm": 2.546875, "learning_rate": 4.7188688967735486e-05, "loss": 0.8258, "step": 8742 }, { "epoch": 0.15499629969378304, "grad_norm": 2.921875, "learning_rate": 4.7187402129958334e-05, "loss": 0.8241, "step": 8744 }, { "epoch": 0.15503175172939462, "grad_norm": 2.6875, "learning_rate": 4.718611501528703e-05, "loss": 0.834, "step": 8746 }, { "epoch": 0.15506720376500618, "grad_norm": 2.75, "learning_rate": 4.7184827623737623e-05, "loss": 0.8331, "step": 8748 }, { "epoch": 0.15510265580061774, "grad_norm": 2.703125, "learning_rate": 4.71835399553262e-05, "loss": 0.804, "step": 8750 }, { "epoch": 0.15513810783622933, "grad_norm": 2.703125, "learning_rate": 4.718225201006881e-05, "loss": 0.8106, "step": 8752 }, { "epoch": 0.1551735598718409, "grad_norm": 2.921875, "learning_rate": 4.718096378798153e-05, "loss": 0.839, "step": 8754 }, { "epoch": 0.15520901190745245, "grad_norm": 2.734375, "learning_rate": 4.717967528908045e-05, "loss": 0.8135, "step": 8756 }, { "epoch": 0.15524446394306404, "grad_norm": 3.0, "learning_rate": 4.717838651338163e-05, "loss": 0.7937, "step": 8758 }, { "epoch": 0.1552799159786756, "grad_norm": 2.578125, "learning_rate": 4.717709746090118e-05, "loss": 0.8099, "step": 8760 }, { "epoch": 0.15531536801428716, "grad_norm": 3.125, "learning_rate": 4.717580813165517e-05, "loss": 0.7994, "step": 8762 }, { "epoch": 0.15535082004989875, "grad_norm": 2.890625, "learning_rate": 4.717451852565969e-05, "loss": 0.7946, "step": 8764 }, { "epoch": 0.1553862720855103, "grad_norm": 2.9375, "learning_rate": 4.7173228642930846e-05, "loss": 0.8289, "step": 8766 }, { "epoch": 0.15542172412112187, "grad_norm": 2.625, "learning_rate": 4.717193848348471e-05, "loss": 0.8171, "step": 8768 }, { "epoch": 0.15545717615673346, "grad_norm": 2.546875, "learning_rate": 4.7170648047337415e-05, "loss": 0.8237, "step": 8770 }, { "epoch": 0.15549262819234502, "grad_norm": 2.703125, "learning_rate": 4.7169357334505046e-05, "loss": 0.8451, "step": 8772 }, { "epoch": 0.15552808022795658, "grad_norm": 2.546875, "learning_rate": 4.7168066345003716e-05, "loss": 0.822, "step": 8774 }, { "epoch": 0.15556353226356817, "grad_norm": 2.625, "learning_rate": 4.716677507884953e-05, "loss": 0.8134, "step": 8776 }, { "epoch": 0.15559898429917973, "grad_norm": 2.640625, "learning_rate": 4.7165483536058605e-05, "loss": 0.8004, "step": 8778 }, { "epoch": 0.1556344363347913, "grad_norm": 2.890625, "learning_rate": 4.716419171664708e-05, "loss": 0.8202, "step": 8780 }, { "epoch": 0.15566988837040288, "grad_norm": 2.625, "learning_rate": 4.716289962063104e-05, "loss": 0.8158, "step": 8782 }, { "epoch": 0.15570534040601444, "grad_norm": 2.578125, "learning_rate": 4.716160724802664e-05, "loss": 0.797, "step": 8784 }, { "epoch": 0.155740792441626, "grad_norm": 2.609375, "learning_rate": 4.716031459884999e-05, "loss": 0.7925, "step": 8786 }, { "epoch": 0.1557762444772376, "grad_norm": 2.8125, "learning_rate": 4.715902167311723e-05, "loss": 0.8325, "step": 8788 }, { "epoch": 0.15581169651284915, "grad_norm": 2.65625, "learning_rate": 4.71577284708445e-05, "loss": 0.8167, "step": 8790 }, { "epoch": 0.1558471485484607, "grad_norm": 2.78125, "learning_rate": 4.7156434992047937e-05, "loss": 0.8647, "step": 8792 }, { "epoch": 0.1558826005840723, "grad_norm": 2.671875, "learning_rate": 4.715514123674367e-05, "loss": 0.8149, "step": 8794 }, { "epoch": 0.15591805261968386, "grad_norm": 2.78125, "learning_rate": 4.7153847204947866e-05, "loss": 0.7531, "step": 8796 }, { "epoch": 0.15595350465529542, "grad_norm": 2.90625, "learning_rate": 4.7152552896676656e-05, "loss": 0.8169, "step": 8798 }, { "epoch": 0.15598895669090698, "grad_norm": 2.671875, "learning_rate": 4.71512583119462e-05, "loss": 0.8065, "step": 8800 }, { "epoch": 0.15602440872651857, "grad_norm": 2.5625, "learning_rate": 4.714996345077265e-05, "loss": 0.8222, "step": 8802 }, { "epoch": 0.15605986076213013, "grad_norm": 2.65625, "learning_rate": 4.714866831317218e-05, "loss": 0.8257, "step": 8804 }, { "epoch": 0.1560953127977417, "grad_norm": 2.875, "learning_rate": 4.714737289916093e-05, "loss": 0.8318, "step": 8806 }, { "epoch": 0.15613076483335328, "grad_norm": 2.703125, "learning_rate": 4.714607720875509e-05, "loss": 0.7956, "step": 8808 }, { "epoch": 0.15616621686896484, "grad_norm": 2.890625, "learning_rate": 4.7144781241970815e-05, "loss": 0.868, "step": 8810 }, { "epoch": 0.1562016689045764, "grad_norm": 2.625, "learning_rate": 4.7143484998824284e-05, "loss": 0.8525, "step": 8812 }, { "epoch": 0.156237120940188, "grad_norm": 2.5, "learning_rate": 4.7142188479331674e-05, "loss": 0.8132, "step": 8814 }, { "epoch": 0.15627257297579955, "grad_norm": 2.46875, "learning_rate": 4.714089168350916e-05, "loss": 0.8109, "step": 8816 }, { "epoch": 0.1563080250114111, "grad_norm": 2.546875, "learning_rate": 4.713959461137293e-05, "loss": 0.8043, "step": 8818 }, { "epoch": 0.1563434770470227, "grad_norm": 2.828125, "learning_rate": 4.7138297262939173e-05, "loss": 0.8104, "step": 8820 }, { "epoch": 0.15637892908263426, "grad_norm": 2.640625, "learning_rate": 4.7136999638224076e-05, "loss": 0.8069, "step": 8822 }, { "epoch": 0.15641438111824582, "grad_norm": 2.984375, "learning_rate": 4.713570173724383e-05, "loss": 0.8431, "step": 8824 }, { "epoch": 0.1564498331538574, "grad_norm": 2.765625, "learning_rate": 4.713440356001464e-05, "loss": 0.8172, "step": 8826 }, { "epoch": 0.15648528518946897, "grad_norm": 2.484375, "learning_rate": 4.713310510655271e-05, "loss": 0.8077, "step": 8828 }, { "epoch": 0.15652073722508053, "grad_norm": 2.875, "learning_rate": 4.713180637687423e-05, "loss": 0.8036, "step": 8830 }, { "epoch": 0.15655618926069212, "grad_norm": 2.9375, "learning_rate": 4.713050737099542e-05, "loss": 0.8204, "step": 8832 }, { "epoch": 0.15659164129630368, "grad_norm": 2.671875, "learning_rate": 4.712920808893249e-05, "loss": 0.8062, "step": 8834 }, { "epoch": 0.15662709333191524, "grad_norm": 2.78125, "learning_rate": 4.712790853070165e-05, "loss": 0.8581, "step": 8836 }, { "epoch": 0.15666254536752683, "grad_norm": 2.8125, "learning_rate": 4.712660869631912e-05, "loss": 0.8173, "step": 8838 }, { "epoch": 0.1566979974031384, "grad_norm": 2.671875, "learning_rate": 4.712530858580111e-05, "loss": 0.7951, "step": 8840 }, { "epoch": 0.15673344943874995, "grad_norm": 2.5625, "learning_rate": 4.712400819916387e-05, "loss": 0.7993, "step": 8842 }, { "epoch": 0.15676890147436154, "grad_norm": 2.875, "learning_rate": 4.7122707536423615e-05, "loss": 0.787, "step": 8844 }, { "epoch": 0.1568043535099731, "grad_norm": 2.921875, "learning_rate": 4.712140659759658e-05, "loss": 0.7964, "step": 8846 }, { "epoch": 0.15683980554558466, "grad_norm": 2.5, "learning_rate": 4.7120105382698996e-05, "loss": 0.8216, "step": 8848 }, { "epoch": 0.15687525758119625, "grad_norm": 2.890625, "learning_rate": 4.71188038917471e-05, "loss": 0.8313, "step": 8850 }, { "epoch": 0.1569107096168078, "grad_norm": 2.84375, "learning_rate": 4.711750212475714e-05, "loss": 0.8019, "step": 8852 }, { "epoch": 0.15694616165241937, "grad_norm": 2.703125, "learning_rate": 4.711620008174536e-05, "loss": 0.8438, "step": 8854 }, { "epoch": 0.15698161368803096, "grad_norm": 2.9375, "learning_rate": 4.711489776272802e-05, "loss": 0.8425, "step": 8856 }, { "epoch": 0.15701706572364252, "grad_norm": 2.8125, "learning_rate": 4.711359516772135e-05, "loss": 0.8696, "step": 8858 }, { "epoch": 0.15705251775925408, "grad_norm": 2.9375, "learning_rate": 4.711229229674162e-05, "loss": 0.816, "step": 8860 }, { "epoch": 0.15708796979486567, "grad_norm": 2.796875, "learning_rate": 4.7110989149805095e-05, "loss": 0.8127, "step": 8862 }, { "epoch": 0.15712342183047723, "grad_norm": 2.734375, "learning_rate": 4.710968572692802e-05, "loss": 0.7671, "step": 8864 }, { "epoch": 0.1571588738660888, "grad_norm": 2.96875, "learning_rate": 4.710838202812668e-05, "loss": 0.8565, "step": 8866 }, { "epoch": 0.15719432590170038, "grad_norm": 2.703125, "learning_rate": 4.7107078053417335e-05, "loss": 0.8202, "step": 8868 }, { "epoch": 0.15722977793731194, "grad_norm": 2.6875, "learning_rate": 4.710577380281626e-05, "loss": 0.851, "step": 8870 }, { "epoch": 0.1572652299729235, "grad_norm": 2.75, "learning_rate": 4.710446927633973e-05, "loss": 0.763, "step": 8872 }, { "epoch": 0.1573006820085351, "grad_norm": 2.828125, "learning_rate": 4.7103164474004037e-05, "loss": 0.7902, "step": 8874 }, { "epoch": 0.15733613404414665, "grad_norm": 2.78125, "learning_rate": 4.710185939582544e-05, "loss": 0.8135, "step": 8876 }, { "epoch": 0.1573715860797582, "grad_norm": 2.78125, "learning_rate": 4.7100554041820255e-05, "loss": 0.8237, "step": 8878 }, { "epoch": 0.1574070381153698, "grad_norm": 2.484375, "learning_rate": 4.709924841200475e-05, "loss": 0.7817, "step": 8880 }, { "epoch": 0.15744249015098136, "grad_norm": 2.734375, "learning_rate": 4.709794250639523e-05, "loss": 0.7556, "step": 8882 }, { "epoch": 0.15747794218659292, "grad_norm": 2.6875, "learning_rate": 4.709663632500799e-05, "loss": 0.8198, "step": 8884 }, { "epoch": 0.1575133942222045, "grad_norm": 2.5625, "learning_rate": 4.7095329867859335e-05, "loss": 0.7842, "step": 8886 }, { "epoch": 0.15754884625781607, "grad_norm": 2.6875, "learning_rate": 4.709402313496556e-05, "loss": 0.8254, "step": 8888 }, { "epoch": 0.15758429829342763, "grad_norm": 2.609375, "learning_rate": 4.709271612634298e-05, "loss": 0.8064, "step": 8890 }, { "epoch": 0.15761975032903922, "grad_norm": 2.640625, "learning_rate": 4.7091408842007904e-05, "loss": 0.7782, "step": 8892 }, { "epoch": 0.15765520236465078, "grad_norm": 2.890625, "learning_rate": 4.709010128197665e-05, "loss": 0.8461, "step": 8894 }, { "epoch": 0.15769065440026234, "grad_norm": 3.171875, "learning_rate": 4.708879344626553e-05, "loss": 0.8172, "step": 8896 }, { "epoch": 0.15772610643587393, "grad_norm": 2.828125, "learning_rate": 4.7087485334890866e-05, "loss": 0.7925, "step": 8898 }, { "epoch": 0.15776155847148549, "grad_norm": 2.609375, "learning_rate": 4.708617694786899e-05, "loss": 0.839, "step": 8900 }, { "epoch": 0.15779701050709705, "grad_norm": 3.0625, "learning_rate": 4.7084868285216234e-05, "loss": 0.818, "step": 8902 }, { "epoch": 0.15783246254270863, "grad_norm": 2.671875, "learning_rate": 4.70835593469489e-05, "loss": 0.8456, "step": 8904 }, { "epoch": 0.1578679145783202, "grad_norm": 2.828125, "learning_rate": 4.708225013308336e-05, "loss": 0.8227, "step": 8906 }, { "epoch": 0.15790336661393176, "grad_norm": 2.984375, "learning_rate": 4.708094064363594e-05, "loss": 0.83, "step": 8908 }, { "epoch": 0.15793881864954334, "grad_norm": 2.703125, "learning_rate": 4.707963087862297e-05, "loss": 0.8352, "step": 8910 }, { "epoch": 0.1579742706851549, "grad_norm": 2.9375, "learning_rate": 4.7078320838060816e-05, "loss": 0.8151, "step": 8912 }, { "epoch": 0.15800972272076647, "grad_norm": 2.796875, "learning_rate": 4.7077010521965816e-05, "loss": 0.8006, "step": 8914 }, { "epoch": 0.15804517475637805, "grad_norm": 2.609375, "learning_rate": 4.707569993035431e-05, "loss": 0.8574, "step": 8916 }, { "epoch": 0.15808062679198961, "grad_norm": 2.75, "learning_rate": 4.707438906324267e-05, "loss": 0.7731, "step": 8918 }, { "epoch": 0.15811607882760118, "grad_norm": 2.96875, "learning_rate": 4.707307792064727e-05, "loss": 0.8476, "step": 8920 }, { "epoch": 0.15815153086321276, "grad_norm": 2.609375, "learning_rate": 4.707176650258444e-05, "loss": 0.8064, "step": 8922 }, { "epoch": 0.15818698289882432, "grad_norm": 2.640625, "learning_rate": 4.707045480907056e-05, "loss": 0.8639, "step": 8924 }, { "epoch": 0.15822243493443588, "grad_norm": 2.703125, "learning_rate": 4.706914284012201e-05, "loss": 0.8088, "step": 8926 }, { "epoch": 0.15825788697004747, "grad_norm": 2.71875, "learning_rate": 4.706783059575515e-05, "loss": 0.7744, "step": 8928 }, { "epoch": 0.15829333900565903, "grad_norm": 2.6875, "learning_rate": 4.706651807598635e-05, "loss": 0.8192, "step": 8930 }, { "epoch": 0.1583287910412706, "grad_norm": 2.890625, "learning_rate": 4.706520528083202e-05, "loss": 0.8281, "step": 8932 }, { "epoch": 0.15836424307688218, "grad_norm": 2.703125, "learning_rate": 4.706389221030851e-05, "loss": 0.7792, "step": 8934 }, { "epoch": 0.15839969511249374, "grad_norm": 2.4375, "learning_rate": 4.706257886443222e-05, "loss": 0.7977, "step": 8936 }, { "epoch": 0.1584351471481053, "grad_norm": 2.734375, "learning_rate": 4.706126524321954e-05, "loss": 0.8396, "step": 8938 }, { "epoch": 0.1584705991837169, "grad_norm": 2.6875, "learning_rate": 4.705995134668688e-05, "loss": 0.8214, "step": 8940 }, { "epoch": 0.15850605121932845, "grad_norm": 2.609375, "learning_rate": 4.7058637174850604e-05, "loss": 0.823, "step": 8942 }, { "epoch": 0.15854150325494, "grad_norm": 2.796875, "learning_rate": 4.7057322727727145e-05, "loss": 0.8337, "step": 8944 }, { "epoch": 0.1585769552905516, "grad_norm": 2.6875, "learning_rate": 4.7056008005332886e-05, "loss": 0.8593, "step": 8946 }, { "epoch": 0.15861240732616316, "grad_norm": 3.359375, "learning_rate": 4.7054693007684245e-05, "loss": 0.8306, "step": 8948 }, { "epoch": 0.15864785936177472, "grad_norm": 2.71875, "learning_rate": 4.705337773479762e-05, "loss": 0.8124, "step": 8950 }, { "epoch": 0.1586833113973863, "grad_norm": 2.515625, "learning_rate": 4.7052062186689435e-05, "loss": 0.8111, "step": 8952 }, { "epoch": 0.15871876343299787, "grad_norm": 2.859375, "learning_rate": 4.705074636337612e-05, "loss": 0.808, "step": 8954 }, { "epoch": 0.15875421546860943, "grad_norm": 2.90625, "learning_rate": 4.704943026487407e-05, "loss": 0.8285, "step": 8956 }, { "epoch": 0.15878966750422102, "grad_norm": 2.6875, "learning_rate": 4.704811389119973e-05, "loss": 0.8562, "step": 8958 }, { "epoch": 0.15882511953983258, "grad_norm": 2.515625, "learning_rate": 4.704679724236952e-05, "loss": 0.8075, "step": 8960 }, { "epoch": 0.15886057157544414, "grad_norm": 2.75, "learning_rate": 4.704548031839987e-05, "loss": 0.8646, "step": 8962 }, { "epoch": 0.15889602361105573, "grad_norm": 2.578125, "learning_rate": 4.704416311930722e-05, "loss": 0.8135, "step": 8964 }, { "epoch": 0.1589314756466673, "grad_norm": 2.84375, "learning_rate": 4.7042845645108e-05, "loss": 0.8081, "step": 8966 }, { "epoch": 0.15896692768227885, "grad_norm": 3.15625, "learning_rate": 4.7041527895818664e-05, "loss": 0.7951, "step": 8968 }, { "epoch": 0.1590023797178904, "grad_norm": 2.609375, "learning_rate": 4.704020987145565e-05, "loss": 0.7943, "step": 8970 }, { "epoch": 0.159037831753502, "grad_norm": 2.515625, "learning_rate": 4.70388915720354e-05, "loss": 0.7993, "step": 8972 }, { "epoch": 0.15907328378911356, "grad_norm": 2.65625, "learning_rate": 4.703757299757439e-05, "loss": 0.8204, "step": 8974 }, { "epoch": 0.15910873582472512, "grad_norm": 2.625, "learning_rate": 4.703625414808904e-05, "loss": 0.8297, "step": 8976 }, { "epoch": 0.1591441878603367, "grad_norm": 2.9375, "learning_rate": 4.703493502359584e-05, "loss": 0.8265, "step": 8978 }, { "epoch": 0.15917963989594827, "grad_norm": 2.75, "learning_rate": 4.703361562411124e-05, "loss": 0.8388, "step": 8980 }, { "epoch": 0.15921509193155983, "grad_norm": 3.046875, "learning_rate": 4.7032295949651693e-05, "loss": 0.8035, "step": 8982 }, { "epoch": 0.15925054396717142, "grad_norm": 2.84375, "learning_rate": 4.70309760002337e-05, "loss": 0.8611, "step": 8984 }, { "epoch": 0.15928599600278298, "grad_norm": 3.015625, "learning_rate": 4.702965577587371e-05, "loss": 0.8407, "step": 8986 }, { "epoch": 0.15932144803839454, "grad_norm": 2.71875, "learning_rate": 4.7028335276588195e-05, "loss": 0.7786, "step": 8988 }, { "epoch": 0.15935690007400613, "grad_norm": 2.859375, "learning_rate": 4.702701450239365e-05, "loss": 0.8373, "step": 8990 }, { "epoch": 0.1593923521096177, "grad_norm": 2.765625, "learning_rate": 4.7025693453306555e-05, "loss": 0.8483, "step": 8992 }, { "epoch": 0.15942780414522925, "grad_norm": 2.734375, "learning_rate": 4.702437212934339e-05, "loss": 0.8224, "step": 8994 }, { "epoch": 0.15946325618084084, "grad_norm": 2.921875, "learning_rate": 4.702305053052065e-05, "loss": 0.8397, "step": 8996 }, { "epoch": 0.1594987082164524, "grad_norm": 2.984375, "learning_rate": 4.702172865685483e-05, "loss": 0.8114, "step": 8998 }, { "epoch": 0.15953416025206396, "grad_norm": 2.859375, "learning_rate": 4.702040650836241e-05, "loss": 0.802, "step": 9000 }, { "epoch": 0.15956961228767555, "grad_norm": 2.90625, "learning_rate": 4.701908408505992e-05, "loss": 0.8164, "step": 9002 }, { "epoch": 0.1596050643232871, "grad_norm": 2.515625, "learning_rate": 4.701776138696383e-05, "loss": 0.7726, "step": 9004 }, { "epoch": 0.15964051635889867, "grad_norm": 2.984375, "learning_rate": 4.7016438414090674e-05, "loss": 0.8649, "step": 9006 }, { "epoch": 0.15967596839451026, "grad_norm": 2.6875, "learning_rate": 4.7015115166456954e-05, "loss": 0.7944, "step": 9008 }, { "epoch": 0.15971142043012182, "grad_norm": 2.765625, "learning_rate": 4.701379164407917e-05, "loss": 0.8098, "step": 9010 }, { "epoch": 0.15974687246573338, "grad_norm": 2.8125, "learning_rate": 4.701246784697386e-05, "loss": 0.8219, "step": 9012 }, { "epoch": 0.15978232450134497, "grad_norm": 2.84375, "learning_rate": 4.701114377515754e-05, "loss": 0.8315, "step": 9014 }, { "epoch": 0.15981777653695653, "grad_norm": 2.390625, "learning_rate": 4.7009819428646726e-05, "loss": 0.7966, "step": 9016 }, { "epoch": 0.1598532285725681, "grad_norm": 2.578125, "learning_rate": 4.7008494807457954e-05, "loss": 0.7995, "step": 9018 }, { "epoch": 0.15988868060817968, "grad_norm": 3.078125, "learning_rate": 4.700716991160775e-05, "loss": 0.8324, "step": 9020 }, { "epoch": 0.15992413264379124, "grad_norm": 2.78125, "learning_rate": 4.7005844741112646e-05, "loss": 0.841, "step": 9022 }, { "epoch": 0.1599595846794028, "grad_norm": 2.625, "learning_rate": 4.700451929598918e-05, "loss": 0.8093, "step": 9024 }, { "epoch": 0.1599950367150144, "grad_norm": 3.125, "learning_rate": 4.70031935762539e-05, "loss": 0.8544, "step": 9026 }, { "epoch": 0.16003048875062595, "grad_norm": 2.8125, "learning_rate": 4.7001867581923355e-05, "loss": 0.8258, "step": 9028 }, { "epoch": 0.1600659407862375, "grad_norm": 2.59375, "learning_rate": 4.700054131301407e-05, "loss": 0.7826, "step": 9030 }, { "epoch": 0.1601013928218491, "grad_norm": 2.734375, "learning_rate": 4.699921476954262e-05, "loss": 0.8419, "step": 9032 }, { "epoch": 0.16013684485746066, "grad_norm": 2.5625, "learning_rate": 4.699788795152555e-05, "loss": 0.7868, "step": 9034 }, { "epoch": 0.16017229689307222, "grad_norm": 2.5625, "learning_rate": 4.699656085897942e-05, "loss": 0.8306, "step": 9036 }, { "epoch": 0.1602077489286838, "grad_norm": 2.953125, "learning_rate": 4.69952334919208e-05, "loss": 0.8282, "step": 9038 }, { "epoch": 0.16024320096429537, "grad_norm": 2.625, "learning_rate": 4.6993905850366237e-05, "loss": 0.825, "step": 9040 }, { "epoch": 0.16027865299990693, "grad_norm": 2.8125, "learning_rate": 4.6992577934332315e-05, "loss": 0.814, "step": 9042 }, { "epoch": 0.16031410503551852, "grad_norm": 2.640625, "learning_rate": 4.6991249743835595e-05, "loss": 0.8185, "step": 9044 }, { "epoch": 0.16034955707113008, "grad_norm": 2.75, "learning_rate": 4.6989921278892665e-05, "loss": 0.8195, "step": 9046 }, { "epoch": 0.16038500910674164, "grad_norm": 3.015625, "learning_rate": 4.698859253952009e-05, "loss": 0.8143, "step": 9048 }, { "epoch": 0.16042046114235323, "grad_norm": 2.71875, "learning_rate": 4.6987263525734474e-05, "loss": 0.7709, "step": 9050 }, { "epoch": 0.1604559131779648, "grad_norm": 2.6875, "learning_rate": 4.698593423755238e-05, "loss": 0.8342, "step": 9052 }, { "epoch": 0.16049136521357635, "grad_norm": 2.65625, "learning_rate": 4.6984604674990407e-05, "loss": 0.8358, "step": 9054 }, { "epoch": 0.16052681724918794, "grad_norm": 2.9375, "learning_rate": 4.698327483806515e-05, "loss": 0.7821, "step": 9056 }, { "epoch": 0.1605622692847995, "grad_norm": 3.28125, "learning_rate": 4.698194472679319e-05, "loss": 0.8044, "step": 9058 }, { "epoch": 0.16059772132041106, "grad_norm": 3.046875, "learning_rate": 4.698061434119115e-05, "loss": 0.8232, "step": 9060 }, { "epoch": 0.16063317335602265, "grad_norm": 2.953125, "learning_rate": 4.697928368127562e-05, "loss": 0.8248, "step": 9062 }, { "epoch": 0.1606686253916342, "grad_norm": 2.84375, "learning_rate": 4.6977952747063204e-05, "loss": 0.8056, "step": 9064 }, { "epoch": 0.16070407742724577, "grad_norm": 2.78125, "learning_rate": 4.697662153857052e-05, "loss": 0.7992, "step": 9066 }, { "epoch": 0.16073952946285736, "grad_norm": 2.90625, "learning_rate": 4.697529005581417e-05, "loss": 0.8145, "step": 9068 }, { "epoch": 0.16077498149846892, "grad_norm": 2.984375, "learning_rate": 4.697395829881078e-05, "loss": 0.8375, "step": 9070 }, { "epoch": 0.16081043353408048, "grad_norm": 3.0625, "learning_rate": 4.697262626757697e-05, "loss": 0.8664, "step": 9072 }, { "epoch": 0.16084588556969207, "grad_norm": 2.734375, "learning_rate": 4.697129396212936e-05, "loss": 0.8609, "step": 9074 }, { "epoch": 0.16088133760530363, "grad_norm": 2.609375, "learning_rate": 4.696996138248457e-05, "loss": 0.7714, "step": 9076 }, { "epoch": 0.1609167896409152, "grad_norm": 2.671875, "learning_rate": 4.696862852865925e-05, "loss": 0.7998, "step": 9078 }, { "epoch": 0.16095224167652677, "grad_norm": 2.453125, "learning_rate": 4.6967295400670016e-05, "loss": 0.8001, "step": 9080 }, { "epoch": 0.16098769371213834, "grad_norm": 2.640625, "learning_rate": 4.696596199853351e-05, "loss": 0.8074, "step": 9082 }, { "epoch": 0.1610231457477499, "grad_norm": 2.71875, "learning_rate": 4.6964628322266374e-05, "loss": 0.842, "step": 9084 }, { "epoch": 0.16105859778336148, "grad_norm": 2.765625, "learning_rate": 4.696329437188525e-05, "loss": 0.7974, "step": 9086 }, { "epoch": 0.16109404981897305, "grad_norm": 2.828125, "learning_rate": 4.696196014740679e-05, "loss": 0.7829, "step": 9088 }, { "epoch": 0.1611295018545846, "grad_norm": 2.4375, "learning_rate": 4.696062564884764e-05, "loss": 0.8241, "step": 9090 }, { "epoch": 0.1611649538901962, "grad_norm": 2.8125, "learning_rate": 4.695929087622446e-05, "loss": 0.8322, "step": 9092 }, { "epoch": 0.16120040592580775, "grad_norm": 2.75, "learning_rate": 4.6957955829553904e-05, "loss": 0.779, "step": 9094 }, { "epoch": 0.16123585796141932, "grad_norm": 2.625, "learning_rate": 4.695662050885262e-05, "loss": 0.7793, "step": 9096 }, { "epoch": 0.1612713099970309, "grad_norm": 2.9375, "learning_rate": 4.69552849141373e-05, "loss": 0.8895, "step": 9098 }, { "epoch": 0.16130676203264246, "grad_norm": 2.625, "learning_rate": 4.6953949045424587e-05, "loss": 0.8151, "step": 9100 }, { "epoch": 0.16134221406825403, "grad_norm": 2.703125, "learning_rate": 4.6952612902731165e-05, "loss": 0.8219, "step": 9102 }, { "epoch": 0.1613776661038656, "grad_norm": 2.421875, "learning_rate": 4.6951276486073706e-05, "loss": 0.7823, "step": 9104 }, { "epoch": 0.16141311813947717, "grad_norm": 2.96875, "learning_rate": 4.69499397954689e-05, "loss": 0.845, "step": 9106 }, { "epoch": 0.16144857017508873, "grad_norm": 2.765625, "learning_rate": 4.6948602830933404e-05, "loss": 0.838, "step": 9108 }, { "epoch": 0.16148402221070032, "grad_norm": 2.8125, "learning_rate": 4.694726559248392e-05, "loss": 0.7893, "step": 9110 }, { "epoch": 0.16151947424631188, "grad_norm": 2.953125, "learning_rate": 4.6945928080137134e-05, "loss": 0.8171, "step": 9112 }, { "epoch": 0.16155492628192344, "grad_norm": 2.5625, "learning_rate": 4.694459029390973e-05, "loss": 0.7906, "step": 9114 }, { "epoch": 0.16159037831753503, "grad_norm": 2.765625, "learning_rate": 4.694325223381842e-05, "loss": 0.8033, "step": 9116 }, { "epoch": 0.1616258303531466, "grad_norm": 3.265625, "learning_rate": 4.694191389987988e-05, "loss": 0.8416, "step": 9118 }, { "epoch": 0.16166128238875815, "grad_norm": 3.0, "learning_rate": 4.694057529211084e-05, "loss": 0.8006, "step": 9120 }, { "epoch": 0.16169673442436974, "grad_norm": 2.8125, "learning_rate": 4.693923641052798e-05, "loss": 0.8163, "step": 9122 }, { "epoch": 0.1617321864599813, "grad_norm": 2.703125, "learning_rate": 4.693789725514802e-05, "loss": 0.8369, "step": 9124 }, { "epoch": 0.16176763849559286, "grad_norm": 2.921875, "learning_rate": 4.693655782598768e-05, "loss": 0.8142, "step": 9126 }, { "epoch": 0.16180309053120445, "grad_norm": 2.734375, "learning_rate": 4.693521812306366e-05, "loss": 0.8145, "step": 9128 }, { "epoch": 0.161838542566816, "grad_norm": 2.78125, "learning_rate": 4.6933878146392685e-05, "loss": 0.8574, "step": 9130 }, { "epoch": 0.16187399460242757, "grad_norm": 2.78125, "learning_rate": 4.693253789599148e-05, "loss": 0.8351, "step": 9132 }, { "epoch": 0.16190944663803916, "grad_norm": 2.796875, "learning_rate": 4.693119737187677e-05, "loss": 0.8218, "step": 9134 }, { "epoch": 0.16194489867365072, "grad_norm": 2.640625, "learning_rate": 4.692985657406529e-05, "loss": 0.8088, "step": 9136 }, { "epoch": 0.16198035070926228, "grad_norm": 2.484375, "learning_rate": 4.692851550257377e-05, "loss": 0.7974, "step": 9138 }, { "epoch": 0.16201580274487384, "grad_norm": 2.734375, "learning_rate": 4.6927174157418934e-05, "loss": 0.8186, "step": 9140 }, { "epoch": 0.16205125478048543, "grad_norm": 2.703125, "learning_rate": 4.6925832538617536e-05, "loss": 0.8193, "step": 9142 }, { "epoch": 0.162086706816097, "grad_norm": 2.8125, "learning_rate": 4.692449064618631e-05, "loss": 0.7591, "step": 9144 }, { "epoch": 0.16212215885170855, "grad_norm": 2.609375, "learning_rate": 4.692314848014202e-05, "loss": 0.817, "step": 9146 }, { "epoch": 0.16215761088732014, "grad_norm": 2.703125, "learning_rate": 4.6921806040501394e-05, "loss": 0.8258, "step": 9148 }, { "epoch": 0.1621930629229317, "grad_norm": 2.921875, "learning_rate": 4.6920463327281196e-05, "loss": 0.8466, "step": 9150 }, { "epoch": 0.16222851495854326, "grad_norm": 2.78125, "learning_rate": 4.691912034049818e-05, "loss": 0.8232, "step": 9152 }, { "epoch": 0.16226396699415485, "grad_norm": 3.390625, "learning_rate": 4.691777708016911e-05, "loss": 0.8537, "step": 9154 }, { "epoch": 0.1622994190297664, "grad_norm": 2.734375, "learning_rate": 4.6916433546310746e-05, "loss": 0.8061, "step": 9156 }, { "epoch": 0.16233487106537797, "grad_norm": 2.8125, "learning_rate": 4.691508973893985e-05, "loss": 0.8205, "step": 9158 }, { "epoch": 0.16237032310098956, "grad_norm": 2.53125, "learning_rate": 4.691374565807321e-05, "loss": 0.8232, "step": 9160 }, { "epoch": 0.16240577513660112, "grad_norm": 2.53125, "learning_rate": 4.691240130372758e-05, "loss": 0.7885, "step": 9162 }, { "epoch": 0.16244122717221268, "grad_norm": 2.53125, "learning_rate": 4.6911056675919754e-05, "loss": 0.8036, "step": 9164 }, { "epoch": 0.16247667920782427, "grad_norm": 2.546875, "learning_rate": 4.69097117746665e-05, "loss": 0.807, "step": 9166 }, { "epoch": 0.16251213124343583, "grad_norm": 2.828125, "learning_rate": 4.69083665999846e-05, "loss": 0.7837, "step": 9168 }, { "epoch": 0.1625475832790474, "grad_norm": 2.71875, "learning_rate": 4.690702115189086e-05, "loss": 0.8121, "step": 9170 }, { "epoch": 0.16258303531465898, "grad_norm": 2.84375, "learning_rate": 4.690567543040205e-05, "loss": 0.8367, "step": 9172 }, { "epoch": 0.16261848735027054, "grad_norm": 2.84375, "learning_rate": 4.690432943553498e-05, "loss": 0.8392, "step": 9174 }, { "epoch": 0.1626539393858821, "grad_norm": 3.1875, "learning_rate": 4.690298316730644e-05, "loss": 0.8201, "step": 9176 }, { "epoch": 0.1626893914214937, "grad_norm": 2.453125, "learning_rate": 4.690163662573323e-05, "loss": 0.7778, "step": 9178 }, { "epoch": 0.16272484345710525, "grad_norm": 2.578125, "learning_rate": 4.690028981083215e-05, "loss": 0.7806, "step": 9180 }, { "epoch": 0.1627602954927168, "grad_norm": 2.984375, "learning_rate": 4.6898942722620024e-05, "loss": 0.8281, "step": 9182 }, { "epoch": 0.1627957475283284, "grad_norm": 2.390625, "learning_rate": 4.689759536111364e-05, "loss": 0.78, "step": 9184 }, { "epoch": 0.16283119956393996, "grad_norm": 3.109375, "learning_rate": 4.6896247726329846e-05, "loss": 0.7946, "step": 9186 }, { "epoch": 0.16286665159955152, "grad_norm": 2.875, "learning_rate": 4.689489981828543e-05, "loss": 0.8081, "step": 9188 }, { "epoch": 0.1629021036351631, "grad_norm": 2.9375, "learning_rate": 4.6893551636997223e-05, "loss": 0.8461, "step": 9190 }, { "epoch": 0.16293755567077467, "grad_norm": 2.59375, "learning_rate": 4.689220318248207e-05, "loss": 0.7735, "step": 9192 }, { "epoch": 0.16297300770638623, "grad_norm": 2.578125, "learning_rate": 4.689085445475676e-05, "loss": 0.7966, "step": 9194 }, { "epoch": 0.16300845974199782, "grad_norm": 2.875, "learning_rate": 4.688950545383815e-05, "loss": 0.7951, "step": 9196 }, { "epoch": 0.16304391177760938, "grad_norm": 2.953125, "learning_rate": 4.688815617974307e-05, "loss": 0.8245, "step": 9198 }, { "epoch": 0.16307936381322094, "grad_norm": 3.015625, "learning_rate": 4.688680663248837e-05, "loss": 0.8025, "step": 9200 }, { "epoch": 0.16311481584883253, "grad_norm": 2.921875, "learning_rate": 4.688545681209087e-05, "loss": 0.8242, "step": 9202 }, { "epoch": 0.1631502678844441, "grad_norm": 2.921875, "learning_rate": 4.6884106718567435e-05, "loss": 0.7914, "step": 9204 }, { "epoch": 0.16318571992005565, "grad_norm": 2.734375, "learning_rate": 4.688275635193491e-05, "loss": 0.8505, "step": 9206 }, { "epoch": 0.16322117195566724, "grad_norm": 2.859375, "learning_rate": 4.688140571221014e-05, "loss": 0.8436, "step": 9208 }, { "epoch": 0.1632566239912788, "grad_norm": 2.6875, "learning_rate": 4.6880054799409976e-05, "loss": 0.8364, "step": 9210 }, { "epoch": 0.16329207602689036, "grad_norm": 2.84375, "learning_rate": 4.687870361355129e-05, "loss": 0.8385, "step": 9212 }, { "epoch": 0.16332752806250195, "grad_norm": 2.828125, "learning_rate": 4.6877352154650945e-05, "loss": 0.8277, "step": 9214 }, { "epoch": 0.1633629800981135, "grad_norm": 2.859375, "learning_rate": 4.6876000422725795e-05, "loss": 0.8034, "step": 9216 }, { "epoch": 0.16339843213372507, "grad_norm": 2.828125, "learning_rate": 4.6874648417792724e-05, "loss": 0.7925, "step": 9218 }, { "epoch": 0.16343388416933666, "grad_norm": 2.71875, "learning_rate": 4.687329613986859e-05, "loss": 0.7897, "step": 9220 }, { "epoch": 0.16346933620494822, "grad_norm": 2.484375, "learning_rate": 4.687194358897028e-05, "loss": 0.812, "step": 9222 }, { "epoch": 0.16350478824055978, "grad_norm": 2.75, "learning_rate": 4.687059076511467e-05, "loss": 0.7761, "step": 9224 }, { "epoch": 0.16354024027617137, "grad_norm": 2.78125, "learning_rate": 4.686923766831864e-05, "loss": 0.7916, "step": 9226 }, { "epoch": 0.16357569231178293, "grad_norm": 2.96875, "learning_rate": 4.686788429859907e-05, "loss": 0.7843, "step": 9228 }, { "epoch": 0.1636111443473945, "grad_norm": 3.015625, "learning_rate": 4.686653065597287e-05, "loss": 0.8011, "step": 9230 }, { "epoch": 0.16364659638300608, "grad_norm": 2.78125, "learning_rate": 4.686517674045693e-05, "loss": 0.8351, "step": 9232 }, { "epoch": 0.16368204841861764, "grad_norm": 2.90625, "learning_rate": 4.686382255206813e-05, "loss": 0.7966, "step": 9234 }, { "epoch": 0.1637175004542292, "grad_norm": 2.609375, "learning_rate": 4.686246809082337e-05, "loss": 0.8251, "step": 9236 }, { "epoch": 0.1637529524898408, "grad_norm": 2.921875, "learning_rate": 4.6861113356739574e-05, "loss": 0.807, "step": 9238 }, { "epoch": 0.16378840452545235, "grad_norm": 3.0, "learning_rate": 4.6859758349833626e-05, "loss": 0.8265, "step": 9240 }, { "epoch": 0.1638238565610639, "grad_norm": 2.921875, "learning_rate": 4.6858403070122456e-05, "loss": 0.8123, "step": 9242 }, { "epoch": 0.1638593085966755, "grad_norm": 2.671875, "learning_rate": 4.685704751762296e-05, "loss": 0.8313, "step": 9244 }, { "epoch": 0.16389476063228706, "grad_norm": 2.671875, "learning_rate": 4.6855691692352074e-05, "loss": 0.7875, "step": 9246 }, { "epoch": 0.16393021266789862, "grad_norm": 3.28125, "learning_rate": 4.6854335594326704e-05, "loss": 0.8297, "step": 9248 }, { "epoch": 0.1639656647035102, "grad_norm": 2.703125, "learning_rate": 4.685297922356378e-05, "loss": 0.8047, "step": 9250 }, { "epoch": 0.16400111673912177, "grad_norm": 2.609375, "learning_rate": 4.685162258008022e-05, "loss": 0.8121, "step": 9252 }, { "epoch": 0.16403656877473333, "grad_norm": 3.078125, "learning_rate": 4.6850265663892964e-05, "loss": 0.8477, "step": 9254 }, { "epoch": 0.16407202081034492, "grad_norm": 2.625, "learning_rate": 4.684890847501894e-05, "loss": 0.8041, "step": 9256 }, { "epoch": 0.16410747284595648, "grad_norm": 2.6875, "learning_rate": 4.68475510134751e-05, "loss": 0.7815, "step": 9258 }, { "epoch": 0.16414292488156804, "grad_norm": 2.484375, "learning_rate": 4.684619327927836e-05, "loss": 0.8229, "step": 9260 }, { "epoch": 0.16417837691717962, "grad_norm": 2.96875, "learning_rate": 4.6844835272445686e-05, "loss": 0.8021, "step": 9262 }, { "epoch": 0.16421382895279119, "grad_norm": 2.734375, "learning_rate": 4.684347699299402e-05, "loss": 0.8477, "step": 9264 }, { "epoch": 0.16424928098840275, "grad_norm": 2.578125, "learning_rate": 4.6842118440940306e-05, "loss": 0.7934, "step": 9266 }, { "epoch": 0.16428473302401433, "grad_norm": 2.484375, "learning_rate": 4.684075961630151e-05, "loss": 0.7917, "step": 9268 }, { "epoch": 0.1643201850596259, "grad_norm": 2.609375, "learning_rate": 4.683940051909458e-05, "loss": 0.7748, "step": 9270 }, { "epoch": 0.16435563709523746, "grad_norm": 2.875, "learning_rate": 4.6838041149336476e-05, "loss": 0.8254, "step": 9272 }, { "epoch": 0.16439108913084904, "grad_norm": 2.734375, "learning_rate": 4.683668150704417e-05, "loss": 0.8116, "step": 9274 }, { "epoch": 0.1644265411664606, "grad_norm": 2.671875, "learning_rate": 4.683532159223463e-05, "loss": 0.8229, "step": 9276 }, { "epoch": 0.16446199320207217, "grad_norm": 2.703125, "learning_rate": 4.683396140492481e-05, "loss": 0.7687, "step": 9278 }, { "epoch": 0.16449744523768375, "grad_norm": 2.71875, "learning_rate": 4.683260094513171e-05, "loss": 0.7867, "step": 9280 }, { "epoch": 0.16453289727329531, "grad_norm": 2.890625, "learning_rate": 4.6831240212872305e-05, "loss": 0.8179, "step": 9282 }, { "epoch": 0.16456834930890687, "grad_norm": 2.921875, "learning_rate": 4.6829879208163564e-05, "loss": 0.7967, "step": 9284 }, { "epoch": 0.16460380134451846, "grad_norm": 2.546875, "learning_rate": 4.682851793102248e-05, "loss": 0.7793, "step": 9286 }, { "epoch": 0.16463925338013002, "grad_norm": 2.6875, "learning_rate": 4.682715638146603e-05, "loss": 0.8315, "step": 9288 }, { "epoch": 0.16467470541574158, "grad_norm": 2.6875, "learning_rate": 4.682579455951122e-05, "loss": 0.8027, "step": 9290 }, { "epoch": 0.16471015745135317, "grad_norm": 2.84375, "learning_rate": 4.682443246517503e-05, "loss": 0.8081, "step": 9292 }, { "epoch": 0.16474560948696473, "grad_norm": 2.625, "learning_rate": 4.682307009847448e-05, "loss": 0.8358, "step": 9294 }, { "epoch": 0.1647810615225763, "grad_norm": 2.625, "learning_rate": 4.6821707459426556e-05, "loss": 0.8409, "step": 9296 }, { "epoch": 0.16481651355818788, "grad_norm": 2.890625, "learning_rate": 4.682034454804827e-05, "loss": 0.841, "step": 9298 }, { "epoch": 0.16485196559379944, "grad_norm": 2.5625, "learning_rate": 4.681898136435663e-05, "loss": 0.736, "step": 9300 }, { "epoch": 0.164887417629411, "grad_norm": 2.703125, "learning_rate": 4.6817617908368646e-05, "loss": 0.8464, "step": 9302 }, { "epoch": 0.1649228696650226, "grad_norm": 2.890625, "learning_rate": 4.681625418010134e-05, "loss": 0.8265, "step": 9304 }, { "epoch": 0.16495832170063415, "grad_norm": 2.875, "learning_rate": 4.6814890179571714e-05, "loss": 0.7931, "step": 9306 }, { "epoch": 0.1649937737362457, "grad_norm": 3.0, "learning_rate": 4.681352590679681e-05, "loss": 0.8446, "step": 9308 }, { "epoch": 0.16502922577185727, "grad_norm": 2.8125, "learning_rate": 4.681216136179365e-05, "loss": 0.7961, "step": 9310 }, { "epoch": 0.16506467780746886, "grad_norm": 2.71875, "learning_rate": 4.681079654457925e-05, "loss": 0.795, "step": 9312 }, { "epoch": 0.16510012984308042, "grad_norm": 2.78125, "learning_rate": 4.680943145517066e-05, "loss": 0.7826, "step": 9314 }, { "epoch": 0.16513558187869198, "grad_norm": 2.671875, "learning_rate": 4.68080660935849e-05, "loss": 0.7961, "step": 9316 }, { "epoch": 0.16517103391430357, "grad_norm": 2.625, "learning_rate": 4.680670045983903e-05, "loss": 0.8179, "step": 9318 }, { "epoch": 0.16520648594991513, "grad_norm": 2.375, "learning_rate": 4.6805334553950064e-05, "loss": 0.7996, "step": 9320 }, { "epoch": 0.1652419379855267, "grad_norm": 2.765625, "learning_rate": 4.6803968375935076e-05, "loss": 0.7934, "step": 9322 }, { "epoch": 0.16527739002113828, "grad_norm": 2.9375, "learning_rate": 4.68026019258111e-05, "loss": 0.8005, "step": 9324 }, { "epoch": 0.16531284205674984, "grad_norm": 2.640625, "learning_rate": 4.6801235203595195e-05, "loss": 0.8145, "step": 9326 }, { "epoch": 0.1653482940923614, "grad_norm": 2.84375, "learning_rate": 4.679986820930441e-05, "loss": 0.8435, "step": 9328 }, { "epoch": 0.165383746127973, "grad_norm": 2.8125, "learning_rate": 4.679850094295581e-05, "loss": 0.7971, "step": 9330 }, { "epoch": 0.16541919816358455, "grad_norm": 2.890625, "learning_rate": 4.6797133404566466e-05, "loss": 0.7846, "step": 9332 }, { "epoch": 0.1654546501991961, "grad_norm": 2.875, "learning_rate": 4.679576559415344e-05, "loss": 0.8079, "step": 9334 }, { "epoch": 0.1654901022348077, "grad_norm": 3.03125, "learning_rate": 4.679439751173379e-05, "loss": 0.8371, "step": 9336 }, { "epoch": 0.16552555427041926, "grad_norm": 2.71875, "learning_rate": 4.67930291573246e-05, "loss": 0.8212, "step": 9338 }, { "epoch": 0.16556100630603082, "grad_norm": 2.78125, "learning_rate": 4.679166053094295e-05, "loss": 0.7708, "step": 9340 }, { "epoch": 0.1655964583416424, "grad_norm": 2.578125, "learning_rate": 4.679029163260591e-05, "loss": 0.8194, "step": 9342 }, { "epoch": 0.16563191037725397, "grad_norm": 2.46875, "learning_rate": 4.6788922462330575e-05, "loss": 0.7825, "step": 9344 }, { "epoch": 0.16566736241286553, "grad_norm": 2.8125, "learning_rate": 4.6787553020134025e-05, "loss": 0.8312, "step": 9346 }, { "epoch": 0.16570281444847712, "grad_norm": 2.625, "learning_rate": 4.6786183306033346e-05, "loss": 0.849, "step": 9348 }, { "epoch": 0.16573826648408868, "grad_norm": 2.984375, "learning_rate": 4.678481332004564e-05, "loss": 0.8005, "step": 9350 }, { "epoch": 0.16577371851970024, "grad_norm": 3.171875, "learning_rate": 4.6783443062188e-05, "loss": 0.8119, "step": 9352 }, { "epoch": 0.16580917055531183, "grad_norm": 2.765625, "learning_rate": 4.678207253247753e-05, "loss": 0.8433, "step": 9354 }, { "epoch": 0.1658446225909234, "grad_norm": 2.640625, "learning_rate": 4.6780701730931334e-05, "loss": 0.8299, "step": 9356 }, { "epoch": 0.16588007462653495, "grad_norm": 2.703125, "learning_rate": 4.6779330657566513e-05, "loss": 0.8287, "step": 9358 }, { "epoch": 0.16591552666214654, "grad_norm": 2.875, "learning_rate": 4.677795931240018e-05, "loss": 0.8628, "step": 9360 }, { "epoch": 0.1659509786977581, "grad_norm": 2.828125, "learning_rate": 4.6776587695449455e-05, "loss": 0.8101, "step": 9362 }, { "epoch": 0.16598643073336966, "grad_norm": 2.859375, "learning_rate": 4.677521580673145e-05, "loss": 0.7773, "step": 9364 }, { "epoch": 0.16602188276898125, "grad_norm": 2.5, "learning_rate": 4.677384364626328e-05, "loss": 0.7934, "step": 9366 }, { "epoch": 0.1660573348045928, "grad_norm": 2.71875, "learning_rate": 4.6772471214062086e-05, "loss": 0.8239, "step": 9368 }, { "epoch": 0.16609278684020437, "grad_norm": 3.125, "learning_rate": 4.6771098510144984e-05, "loss": 0.7893, "step": 9370 }, { "epoch": 0.16612823887581596, "grad_norm": 2.765625, "learning_rate": 4.676972553452911e-05, "loss": 0.8154, "step": 9372 }, { "epoch": 0.16616369091142752, "grad_norm": 2.71875, "learning_rate": 4.676835228723159e-05, "loss": 0.8276, "step": 9374 }, { "epoch": 0.16619914294703908, "grad_norm": 2.96875, "learning_rate": 4.676697876826957e-05, "loss": 0.8499, "step": 9376 }, { "epoch": 0.16623459498265067, "grad_norm": 2.890625, "learning_rate": 4.676560497766019e-05, "loss": 0.8199, "step": 9378 }, { "epoch": 0.16627004701826223, "grad_norm": 2.8125, "learning_rate": 4.6764230915420596e-05, "loss": 0.8217, "step": 9380 }, { "epoch": 0.1663054990538738, "grad_norm": 2.703125, "learning_rate": 4.676285658156793e-05, "loss": 0.8098, "step": 9382 }, { "epoch": 0.16634095108948538, "grad_norm": 2.65625, "learning_rate": 4.6761481976119336e-05, "loss": 0.8065, "step": 9384 }, { "epoch": 0.16637640312509694, "grad_norm": 2.765625, "learning_rate": 4.6760107099091985e-05, "loss": 0.8115, "step": 9386 }, { "epoch": 0.1664118551607085, "grad_norm": 2.84375, "learning_rate": 4.675873195050304e-05, "loss": 0.7882, "step": 9388 }, { "epoch": 0.1664473071963201, "grad_norm": 2.625, "learning_rate": 4.6757356530369646e-05, "loss": 0.7722, "step": 9390 }, { "epoch": 0.16648275923193165, "grad_norm": 2.640625, "learning_rate": 4.6755980838708974e-05, "loss": 0.7995, "step": 9392 }, { "epoch": 0.1665182112675432, "grad_norm": 2.90625, "learning_rate": 4.6754604875538187e-05, "loss": 0.7809, "step": 9394 }, { "epoch": 0.1665536633031548, "grad_norm": 2.6875, "learning_rate": 4.6753228640874474e-05, "loss": 0.7824, "step": 9396 }, { "epoch": 0.16658911533876636, "grad_norm": 3.03125, "learning_rate": 4.6751852134734994e-05, "loss": 0.8413, "step": 9398 }, { "epoch": 0.16662456737437792, "grad_norm": 2.828125, "learning_rate": 4.6750475357136925e-05, "loss": 0.8434, "step": 9400 }, { "epoch": 0.1666600194099895, "grad_norm": 2.59375, "learning_rate": 4.6749098308097464e-05, "loss": 0.7929, "step": 9402 }, { "epoch": 0.16669547144560107, "grad_norm": 2.703125, "learning_rate": 4.674772098763378e-05, "loss": 0.8189, "step": 9404 }, { "epoch": 0.16673092348121263, "grad_norm": 3.0625, "learning_rate": 4.674634339576306e-05, "loss": 0.8326, "step": 9406 }, { "epoch": 0.16676637551682422, "grad_norm": 2.90625, "learning_rate": 4.6744965532502514e-05, "loss": 0.8473, "step": 9408 }, { "epoch": 0.16680182755243578, "grad_norm": 2.96875, "learning_rate": 4.674358739786933e-05, "loss": 0.8468, "step": 9410 }, { "epoch": 0.16683727958804734, "grad_norm": 2.75, "learning_rate": 4.6742208991880694e-05, "loss": 0.8275, "step": 9412 }, { "epoch": 0.16687273162365893, "grad_norm": 2.734375, "learning_rate": 4.6740830314553823e-05, "loss": 0.7788, "step": 9414 }, { "epoch": 0.1669081836592705, "grad_norm": 2.515625, "learning_rate": 4.6739451365905915e-05, "loss": 0.797, "step": 9416 }, { "epoch": 0.16694363569488205, "grad_norm": 3.015625, "learning_rate": 4.673807214595419e-05, "loss": 0.8269, "step": 9418 }, { "epoch": 0.16697908773049364, "grad_norm": 2.890625, "learning_rate": 4.6736692654715845e-05, "loss": 0.8637, "step": 9420 }, { "epoch": 0.1670145397661052, "grad_norm": 2.703125, "learning_rate": 4.673531289220811e-05, "loss": 0.79, "step": 9422 }, { "epoch": 0.16704999180171676, "grad_norm": 2.828125, "learning_rate": 4.6733932858448184e-05, "loss": 0.784, "step": 9424 }, { "epoch": 0.16708544383732835, "grad_norm": 2.890625, "learning_rate": 4.6732552553453316e-05, "loss": 0.7979, "step": 9426 }, { "epoch": 0.1671208958729399, "grad_norm": 2.6875, "learning_rate": 4.6731171977240713e-05, "loss": 0.8525, "step": 9428 }, { "epoch": 0.16715634790855147, "grad_norm": 2.8125, "learning_rate": 4.672979112982761e-05, "loss": 0.7991, "step": 9430 }, { "epoch": 0.16719179994416306, "grad_norm": 3.015625, "learning_rate": 4.6728410011231235e-05, "loss": 0.8529, "step": 9432 }, { "epoch": 0.16722725197977462, "grad_norm": 2.921875, "learning_rate": 4.672702862146884e-05, "loss": 0.8547, "step": 9434 }, { "epoch": 0.16726270401538618, "grad_norm": 2.796875, "learning_rate": 4.672564696055765e-05, "loss": 0.8292, "step": 9436 }, { "epoch": 0.16729815605099776, "grad_norm": 2.875, "learning_rate": 4.672426502851491e-05, "loss": 0.7923, "step": 9438 }, { "epoch": 0.16733360808660933, "grad_norm": 2.53125, "learning_rate": 4.672288282535786e-05, "loss": 0.7652, "step": 9440 }, { "epoch": 0.1673690601222209, "grad_norm": 2.71875, "learning_rate": 4.6721500351103766e-05, "loss": 0.8313, "step": 9442 }, { "epoch": 0.16740451215783247, "grad_norm": 2.953125, "learning_rate": 4.672011760576987e-05, "loss": 0.8155, "step": 9444 }, { "epoch": 0.16743996419344404, "grad_norm": 2.703125, "learning_rate": 4.671873458937342e-05, "loss": 0.8007, "step": 9446 }, { "epoch": 0.1674754162290556, "grad_norm": 2.984375, "learning_rate": 4.6717351301931704e-05, "loss": 0.8482, "step": 9448 }, { "epoch": 0.16751086826466718, "grad_norm": 3.0625, "learning_rate": 4.671596774346196e-05, "loss": 0.8175, "step": 9450 }, { "epoch": 0.16754632030027874, "grad_norm": 2.765625, "learning_rate": 4.671458391398146e-05, "loss": 0.8452, "step": 9452 }, { "epoch": 0.1675817723358903, "grad_norm": 2.875, "learning_rate": 4.6713199813507474e-05, "loss": 0.8062, "step": 9454 }, { "epoch": 0.1676172243715019, "grad_norm": 3.03125, "learning_rate": 4.6711815442057276e-05, "loss": 0.8506, "step": 9456 }, { "epoch": 0.16765267640711345, "grad_norm": 2.59375, "learning_rate": 4.671043079964815e-05, "loss": 0.7725, "step": 9458 }, { "epoch": 0.16768812844272502, "grad_norm": 2.53125, "learning_rate": 4.670904588629736e-05, "loss": 0.8275, "step": 9460 }, { "epoch": 0.1677235804783366, "grad_norm": 2.90625, "learning_rate": 4.6707660702022205e-05, "loss": 0.8068, "step": 9462 }, { "epoch": 0.16775903251394816, "grad_norm": 2.765625, "learning_rate": 4.670627524683997e-05, "loss": 0.8035, "step": 9464 }, { "epoch": 0.16779448454955972, "grad_norm": 2.640625, "learning_rate": 4.6704889520767935e-05, "loss": 0.7704, "step": 9466 }, { "epoch": 0.1678299365851713, "grad_norm": 2.46875, "learning_rate": 4.67035035238234e-05, "loss": 0.8211, "step": 9468 }, { "epoch": 0.16786538862078287, "grad_norm": 2.921875, "learning_rate": 4.6702117256023665e-05, "loss": 0.846, "step": 9470 }, { "epoch": 0.16790084065639443, "grad_norm": 2.546875, "learning_rate": 4.6700730717386024e-05, "loss": 0.8129, "step": 9472 }, { "epoch": 0.16793629269200602, "grad_norm": 2.875, "learning_rate": 4.6699343907927785e-05, "loss": 0.8938, "step": 9474 }, { "epoch": 0.16797174472761758, "grad_norm": 2.34375, "learning_rate": 4.669795682766625e-05, "loss": 0.7938, "step": 9476 }, { "epoch": 0.16800719676322914, "grad_norm": 3.125, "learning_rate": 4.6696569476618736e-05, "loss": 0.8289, "step": 9478 }, { "epoch": 0.1680426487988407, "grad_norm": 2.578125, "learning_rate": 4.669518185480255e-05, "loss": 0.8243, "step": 9480 }, { "epoch": 0.1680781008344523, "grad_norm": 2.6875, "learning_rate": 4.669379396223502e-05, "loss": 0.8155, "step": 9482 }, { "epoch": 0.16811355287006385, "grad_norm": 2.578125, "learning_rate": 4.669240579893346e-05, "loss": 0.7802, "step": 9484 }, { "epoch": 0.16814900490567541, "grad_norm": 2.75, "learning_rate": 4.669101736491519e-05, "loss": 0.8266, "step": 9486 }, { "epoch": 0.168184456941287, "grad_norm": 2.40625, "learning_rate": 4.6689628660197534e-05, "loss": 0.7776, "step": 9488 }, { "epoch": 0.16821990897689856, "grad_norm": 2.671875, "learning_rate": 4.668823968479784e-05, "loss": 0.8245, "step": 9490 }, { "epoch": 0.16825536101251012, "grad_norm": 2.578125, "learning_rate": 4.668685043873343e-05, "loss": 0.7765, "step": 9492 }, { "epoch": 0.1682908130481217, "grad_norm": 2.84375, "learning_rate": 4.6685460922021644e-05, "loss": 0.8347, "step": 9494 }, { "epoch": 0.16832626508373327, "grad_norm": 2.90625, "learning_rate": 4.668407113467983e-05, "loss": 0.8044, "step": 9496 }, { "epoch": 0.16836171711934483, "grad_norm": 2.59375, "learning_rate": 4.668268107672531e-05, "loss": 0.7938, "step": 9498 }, { "epoch": 0.16839716915495642, "grad_norm": 2.71875, "learning_rate": 4.6681290748175457e-05, "loss": 0.8243, "step": 9500 }, { "epoch": 0.16843262119056798, "grad_norm": 2.984375, "learning_rate": 4.6679900149047604e-05, "loss": 0.8175, "step": 9502 }, { "epoch": 0.16846807322617954, "grad_norm": 2.5, "learning_rate": 4.667850927935912e-05, "loss": 0.8063, "step": 9504 }, { "epoch": 0.16850352526179113, "grad_norm": 2.765625, "learning_rate": 4.6677118139127354e-05, "loss": 0.7868, "step": 9506 }, { "epoch": 0.1685389772974027, "grad_norm": 2.703125, "learning_rate": 4.6675726728369664e-05, "loss": 0.8022, "step": 9508 }, { "epoch": 0.16857442933301425, "grad_norm": 2.890625, "learning_rate": 4.667433504710342e-05, "loss": 0.8211, "step": 9510 }, { "epoch": 0.16860988136862584, "grad_norm": 2.71875, "learning_rate": 4.667294309534599e-05, "loss": 0.8062, "step": 9512 }, { "epoch": 0.1686453334042374, "grad_norm": 2.640625, "learning_rate": 4.6671550873114744e-05, "loss": 0.7539, "step": 9514 }, { "epoch": 0.16868078543984896, "grad_norm": 2.78125, "learning_rate": 4.667015838042705e-05, "loss": 0.832, "step": 9516 }, { "epoch": 0.16871623747546055, "grad_norm": 2.390625, "learning_rate": 4.6668765617300306e-05, "loss": 0.773, "step": 9518 }, { "epoch": 0.1687516895110721, "grad_norm": 2.8125, "learning_rate": 4.6667372583751875e-05, "loss": 0.832, "step": 9520 }, { "epoch": 0.16878714154668367, "grad_norm": 2.6875, "learning_rate": 4.6665979279799146e-05, "loss": 0.8179, "step": 9522 }, { "epoch": 0.16882259358229526, "grad_norm": 2.65625, "learning_rate": 4.666458570545951e-05, "loss": 0.7374, "step": 9524 }, { "epoch": 0.16885804561790682, "grad_norm": 2.796875, "learning_rate": 4.6663191860750354e-05, "loss": 0.834, "step": 9526 }, { "epoch": 0.16889349765351838, "grad_norm": 2.875, "learning_rate": 4.666179774568909e-05, "loss": 0.8472, "step": 9528 }, { "epoch": 0.16892894968912997, "grad_norm": 2.859375, "learning_rate": 4.666040336029308e-05, "loss": 0.7924, "step": 9530 }, { "epoch": 0.16896440172474153, "grad_norm": 2.953125, "learning_rate": 4.6659008704579756e-05, "loss": 0.8083, "step": 9532 }, { "epoch": 0.1689998537603531, "grad_norm": 2.65625, "learning_rate": 4.665761377856652e-05, "loss": 0.8192, "step": 9534 }, { "epoch": 0.16903530579596468, "grad_norm": 2.671875, "learning_rate": 4.6656218582270775e-05, "loss": 0.8148, "step": 9536 }, { "epoch": 0.16907075783157624, "grad_norm": 2.859375, "learning_rate": 4.665482311570992e-05, "loss": 0.8115, "step": 9538 }, { "epoch": 0.1691062098671878, "grad_norm": 3.078125, "learning_rate": 4.6653427378901395e-05, "loss": 0.8163, "step": 9540 }, { "epoch": 0.1691416619027994, "grad_norm": 2.671875, "learning_rate": 4.665203137186261e-05, "loss": 0.8194, "step": 9542 }, { "epoch": 0.16917711393841095, "grad_norm": 2.515625, "learning_rate": 4.665063509461097e-05, "loss": 0.7891, "step": 9544 }, { "epoch": 0.1692125659740225, "grad_norm": 2.796875, "learning_rate": 4.664923854716392e-05, "loss": 0.7962, "step": 9546 }, { "epoch": 0.1692480180096341, "grad_norm": 2.671875, "learning_rate": 4.664784172953888e-05, "loss": 0.7783, "step": 9548 }, { "epoch": 0.16928347004524566, "grad_norm": 2.921875, "learning_rate": 4.664644464175328e-05, "loss": 0.8456, "step": 9550 }, { "epoch": 0.16931892208085722, "grad_norm": 2.65625, "learning_rate": 4.664504728382457e-05, "loss": 0.7876, "step": 9552 }, { "epoch": 0.1693543741164688, "grad_norm": 2.609375, "learning_rate": 4.664364965577018e-05, "loss": 0.8043, "step": 9554 }, { "epoch": 0.16938982615208037, "grad_norm": 2.65625, "learning_rate": 4.664225175760754e-05, "loss": 0.7579, "step": 9556 }, { "epoch": 0.16942527818769193, "grad_norm": 2.8125, "learning_rate": 4.6640853589354114e-05, "loss": 0.8425, "step": 9558 }, { "epoch": 0.16946073022330352, "grad_norm": 2.390625, "learning_rate": 4.663945515102733e-05, "loss": 0.755, "step": 9560 }, { "epoch": 0.16949618225891508, "grad_norm": 2.546875, "learning_rate": 4.663805644264467e-05, "loss": 0.815, "step": 9562 }, { "epoch": 0.16953163429452664, "grad_norm": 2.53125, "learning_rate": 4.663665746422356e-05, "loss": 0.7912, "step": 9564 }, { "epoch": 0.16956708633013823, "grad_norm": 2.671875, "learning_rate": 4.6635258215781476e-05, "loss": 0.8059, "step": 9566 }, { "epoch": 0.1696025383657498, "grad_norm": 2.8125, "learning_rate": 4.663385869733587e-05, "loss": 0.8317, "step": 9568 }, { "epoch": 0.16963799040136135, "grad_norm": 2.859375, "learning_rate": 4.663245890890423e-05, "loss": 0.7885, "step": 9570 }, { "epoch": 0.16967344243697294, "grad_norm": 2.71875, "learning_rate": 4.663105885050399e-05, "loss": 0.7997, "step": 9572 }, { "epoch": 0.1697088944725845, "grad_norm": 2.53125, "learning_rate": 4.662965852215265e-05, "loss": 0.8182, "step": 9574 }, { "epoch": 0.16974434650819606, "grad_norm": 2.625, "learning_rate": 4.6628257923867676e-05, "loss": 0.7913, "step": 9576 }, { "epoch": 0.16977979854380765, "grad_norm": 2.625, "learning_rate": 4.6626857055666546e-05, "loss": 0.7741, "step": 9578 }, { "epoch": 0.1698152505794192, "grad_norm": 2.5625, "learning_rate": 4.662545591756675e-05, "loss": 0.7387, "step": 9580 }, { "epoch": 0.16985070261503077, "grad_norm": 2.8125, "learning_rate": 4.6624054509585755e-05, "loss": 0.8389, "step": 9582 }, { "epoch": 0.16988615465064236, "grad_norm": 3.0, "learning_rate": 4.6622652831741074e-05, "loss": 0.8325, "step": 9584 }, { "epoch": 0.16992160668625392, "grad_norm": 2.90625, "learning_rate": 4.6621250884050195e-05, "loss": 0.7699, "step": 9586 }, { "epoch": 0.16995705872186548, "grad_norm": 2.6875, "learning_rate": 4.6619848666530594e-05, "loss": 0.8166, "step": 9588 }, { "epoch": 0.16999251075747707, "grad_norm": 2.953125, "learning_rate": 4.6618446179199795e-05, "loss": 0.832, "step": 9590 }, { "epoch": 0.17002796279308863, "grad_norm": 2.6875, "learning_rate": 4.661704342207529e-05, "loss": 0.7962, "step": 9592 }, { "epoch": 0.1700634148287002, "grad_norm": 2.828125, "learning_rate": 4.661564039517458e-05, "loss": 0.7887, "step": 9594 }, { "epoch": 0.17009886686431178, "grad_norm": 2.703125, "learning_rate": 4.661423709851518e-05, "loss": 0.817, "step": 9596 }, { "epoch": 0.17013431889992334, "grad_norm": 2.90625, "learning_rate": 4.661283353211461e-05, "loss": 0.8294, "step": 9598 }, { "epoch": 0.1701697709355349, "grad_norm": 2.5, "learning_rate": 4.661142969599037e-05, "loss": 0.7648, "step": 9600 }, { "epoch": 0.17020522297114649, "grad_norm": 2.71875, "learning_rate": 4.661002559016e-05, "loss": 0.8282, "step": 9602 }, { "epoch": 0.17024067500675805, "grad_norm": 2.765625, "learning_rate": 4.6608621214641e-05, "loss": 0.8384, "step": 9604 }, { "epoch": 0.1702761270423696, "grad_norm": 2.875, "learning_rate": 4.660721656945092e-05, "loss": 0.7908, "step": 9606 }, { "epoch": 0.1703115790779812, "grad_norm": 2.484375, "learning_rate": 4.6605811654607265e-05, "loss": 0.8134, "step": 9608 }, { "epoch": 0.17034703111359276, "grad_norm": 3.0, "learning_rate": 4.660440647012759e-05, "loss": 0.7794, "step": 9610 }, { "epoch": 0.17038248314920432, "grad_norm": 2.609375, "learning_rate": 4.6603001016029425e-05, "loss": 0.8215, "step": 9612 }, { "epoch": 0.1704179351848159, "grad_norm": 2.84375, "learning_rate": 4.66015952923303e-05, "loss": 0.8334, "step": 9614 }, { "epoch": 0.17045338722042747, "grad_norm": 2.609375, "learning_rate": 4.660018929904776e-05, "loss": 0.7975, "step": 9616 }, { "epoch": 0.17048883925603903, "grad_norm": 2.828125, "learning_rate": 4.659878303619937e-05, "loss": 0.8501, "step": 9618 }, { "epoch": 0.17052429129165061, "grad_norm": 2.5625, "learning_rate": 4.659737650380265e-05, "loss": 0.7872, "step": 9620 }, { "epoch": 0.17055974332726218, "grad_norm": 2.578125, "learning_rate": 4.6595969701875184e-05, "loss": 0.8015, "step": 9622 }, { "epoch": 0.17059519536287374, "grad_norm": 2.859375, "learning_rate": 4.6594562630434505e-05, "loss": 0.7942, "step": 9624 }, { "epoch": 0.17063064739848532, "grad_norm": 2.71875, "learning_rate": 4.659315528949819e-05, "loss": 0.7957, "step": 9626 }, { "epoch": 0.17066609943409689, "grad_norm": 2.390625, "learning_rate": 4.659174767908379e-05, "loss": 0.8032, "step": 9628 }, { "epoch": 0.17070155146970845, "grad_norm": 2.828125, "learning_rate": 4.659033979920888e-05, "loss": 0.8284, "step": 9630 }, { "epoch": 0.17073700350532003, "grad_norm": 2.78125, "learning_rate": 4.658893164989102e-05, "loss": 0.8575, "step": 9632 }, { "epoch": 0.1707724555409316, "grad_norm": 2.640625, "learning_rate": 4.658752323114779e-05, "loss": 0.818, "step": 9634 }, { "epoch": 0.17080790757654316, "grad_norm": 2.921875, "learning_rate": 4.6586114542996776e-05, "loss": 0.8272, "step": 9636 }, { "epoch": 0.17084335961215474, "grad_norm": 2.734375, "learning_rate": 4.6584705585455534e-05, "loss": 0.8401, "step": 9638 }, { "epoch": 0.1708788116477663, "grad_norm": 2.625, "learning_rate": 4.6583296358541675e-05, "loss": 0.8057, "step": 9640 }, { "epoch": 0.17091426368337787, "grad_norm": 2.46875, "learning_rate": 4.658188686227277e-05, "loss": 0.776, "step": 9642 }, { "epoch": 0.17094971571898945, "grad_norm": 2.8125, "learning_rate": 4.658047709666641e-05, "loss": 0.8276, "step": 9644 }, { "epoch": 0.17098516775460101, "grad_norm": 2.828125, "learning_rate": 4.657906706174019e-05, "loss": 0.7901, "step": 9646 }, { "epoch": 0.17102061979021257, "grad_norm": 2.90625, "learning_rate": 4.65776567575117e-05, "loss": 0.8314, "step": 9648 }, { "epoch": 0.17105607182582414, "grad_norm": 2.484375, "learning_rate": 4.6576246183998564e-05, "loss": 0.7915, "step": 9650 }, { "epoch": 0.17109152386143572, "grad_norm": 2.84375, "learning_rate": 4.657483534121836e-05, "loss": 0.7905, "step": 9652 }, { "epoch": 0.17112697589704728, "grad_norm": 2.671875, "learning_rate": 4.65734242291887e-05, "loss": 0.7404, "step": 9654 }, { "epoch": 0.17116242793265884, "grad_norm": 2.5625, "learning_rate": 4.657201284792721e-05, "loss": 0.829, "step": 9656 }, { "epoch": 0.17119787996827043, "grad_norm": 2.375, "learning_rate": 4.657060119745149e-05, "loss": 0.7781, "step": 9658 }, { "epoch": 0.171233332003882, "grad_norm": 2.5, "learning_rate": 4.6569189277779154e-05, "loss": 0.785, "step": 9660 }, { "epoch": 0.17126878403949355, "grad_norm": 2.84375, "learning_rate": 4.6567777088927836e-05, "loss": 0.7846, "step": 9662 }, { "epoch": 0.17130423607510514, "grad_norm": 2.640625, "learning_rate": 4.656636463091515e-05, "loss": 0.8225, "step": 9664 }, { "epoch": 0.1713396881107167, "grad_norm": 2.796875, "learning_rate": 4.656495190375872e-05, "loss": 0.7614, "step": 9666 }, { "epoch": 0.17137514014632826, "grad_norm": 2.734375, "learning_rate": 4.656353890747619e-05, "loss": 0.8443, "step": 9668 }, { "epoch": 0.17141059218193985, "grad_norm": 3.0625, "learning_rate": 4.656212564208518e-05, "loss": 0.8243, "step": 9670 }, { "epoch": 0.1714460442175514, "grad_norm": 2.640625, "learning_rate": 4.6560712107603334e-05, "loss": 0.7915, "step": 9672 }, { "epoch": 0.17148149625316297, "grad_norm": 2.8125, "learning_rate": 4.655929830404829e-05, "loss": 0.7426, "step": 9674 }, { "epoch": 0.17151694828877456, "grad_norm": 2.953125, "learning_rate": 4.65578842314377e-05, "loss": 0.7934, "step": 9676 }, { "epoch": 0.17155240032438612, "grad_norm": 2.9375, "learning_rate": 4.65564698897892e-05, "loss": 0.8462, "step": 9678 }, { "epoch": 0.17158785235999768, "grad_norm": 2.890625, "learning_rate": 4.6555055279120444e-05, "loss": 0.8183, "step": 9680 }, { "epoch": 0.17162330439560927, "grad_norm": 2.859375, "learning_rate": 4.655364039944909e-05, "loss": 0.821, "step": 9682 }, { "epoch": 0.17165875643122083, "grad_norm": 2.78125, "learning_rate": 4.6552225250792794e-05, "loss": 0.7928, "step": 9684 }, { "epoch": 0.1716942084668324, "grad_norm": 3.140625, "learning_rate": 4.655080983316922e-05, "loss": 0.845, "step": 9686 }, { "epoch": 0.17172966050244398, "grad_norm": 2.75, "learning_rate": 4.654939414659602e-05, "loss": 0.8374, "step": 9688 }, { "epoch": 0.17176511253805554, "grad_norm": 2.609375, "learning_rate": 4.654797819109087e-05, "loss": 0.8192, "step": 9690 }, { "epoch": 0.1718005645736671, "grad_norm": 2.75, "learning_rate": 4.654656196667145e-05, "loss": 0.7864, "step": 9692 }, { "epoch": 0.1718360166092787, "grad_norm": 2.625, "learning_rate": 4.654514547335541e-05, "loss": 0.8356, "step": 9694 }, { "epoch": 0.17187146864489025, "grad_norm": 2.71875, "learning_rate": 4.6543728711160456e-05, "loss": 0.819, "step": 9696 }, { "epoch": 0.1719069206805018, "grad_norm": 2.859375, "learning_rate": 4.654231168010425e-05, "loss": 0.7734, "step": 9698 }, { "epoch": 0.1719423727161134, "grad_norm": 2.71875, "learning_rate": 4.654089438020448e-05, "loss": 0.8568, "step": 9700 }, { "epoch": 0.17197782475172496, "grad_norm": 3.0, "learning_rate": 4.653947681147883e-05, "loss": 0.8154, "step": 9702 }, { "epoch": 0.17201327678733652, "grad_norm": 2.421875, "learning_rate": 4.6538058973945004e-05, "loss": 0.7839, "step": 9704 }, { "epoch": 0.1720487288229481, "grad_norm": 2.859375, "learning_rate": 4.6536640867620686e-05, "loss": 0.8061, "step": 9706 }, { "epoch": 0.17208418085855967, "grad_norm": 2.640625, "learning_rate": 4.653522249252357e-05, "loss": 0.8139, "step": 9708 }, { "epoch": 0.17211963289417123, "grad_norm": 2.734375, "learning_rate": 4.6533803848671366e-05, "loss": 0.8192, "step": 9710 }, { "epoch": 0.17215508492978282, "grad_norm": 2.453125, "learning_rate": 4.6532384936081777e-05, "loss": 0.8006, "step": 9712 }, { "epoch": 0.17219053696539438, "grad_norm": 2.84375, "learning_rate": 4.65309657547725e-05, "loss": 0.8358, "step": 9714 }, { "epoch": 0.17222598900100594, "grad_norm": 2.75, "learning_rate": 4.652954630476127e-05, "loss": 0.8254, "step": 9716 }, { "epoch": 0.17226144103661753, "grad_norm": 2.828125, "learning_rate": 4.652812658606578e-05, "loss": 0.832, "step": 9718 }, { "epoch": 0.1722968930722291, "grad_norm": 2.515625, "learning_rate": 4.652670659870375e-05, "loss": 0.7853, "step": 9720 }, { "epoch": 0.17233234510784065, "grad_norm": 3.125, "learning_rate": 4.652528634269291e-05, "loss": 0.8334, "step": 9722 }, { "epoch": 0.17236779714345224, "grad_norm": 2.890625, "learning_rate": 4.6523865818050984e-05, "loss": 0.8368, "step": 9724 }, { "epoch": 0.1724032491790638, "grad_norm": 2.8125, "learning_rate": 4.6522445024795694e-05, "loss": 0.8178, "step": 9726 }, { "epoch": 0.17243870121467536, "grad_norm": 2.875, "learning_rate": 4.6521023962944765e-05, "loss": 0.819, "step": 9728 }, { "epoch": 0.17247415325028695, "grad_norm": 2.6875, "learning_rate": 4.651960263251594e-05, "loss": 0.8075, "step": 9730 }, { "epoch": 0.1725096052858985, "grad_norm": 2.640625, "learning_rate": 4.6518181033526966e-05, "loss": 0.8005, "step": 9732 }, { "epoch": 0.17254505732151007, "grad_norm": 2.875, "learning_rate": 4.6516759165995563e-05, "loss": 0.8483, "step": 9734 }, { "epoch": 0.17258050935712166, "grad_norm": 2.90625, "learning_rate": 4.651533702993949e-05, "loss": 0.8286, "step": 9736 }, { "epoch": 0.17261596139273322, "grad_norm": 2.84375, "learning_rate": 4.65139146253765e-05, "loss": 0.7821, "step": 9738 }, { "epoch": 0.17265141342834478, "grad_norm": 2.859375, "learning_rate": 4.6512491952324334e-05, "loss": 0.851, "step": 9740 }, { "epoch": 0.17268686546395637, "grad_norm": 2.6875, "learning_rate": 4.6511069010800745e-05, "loss": 0.7941, "step": 9742 }, { "epoch": 0.17272231749956793, "grad_norm": 2.609375, "learning_rate": 4.6509645800823494e-05, "loss": 0.7729, "step": 9744 }, { "epoch": 0.1727577695351795, "grad_norm": 2.53125, "learning_rate": 4.650822232241034e-05, "loss": 0.8009, "step": 9746 }, { "epoch": 0.17279322157079108, "grad_norm": 2.484375, "learning_rate": 4.650679857557906e-05, "loss": 0.7401, "step": 9748 }, { "epoch": 0.17282867360640264, "grad_norm": 2.71875, "learning_rate": 4.6505374560347415e-05, "loss": 0.7513, "step": 9750 }, { "epoch": 0.1728641256420142, "grad_norm": 2.765625, "learning_rate": 4.650395027673317e-05, "loss": 0.8464, "step": 9752 }, { "epoch": 0.1728995776776258, "grad_norm": 2.734375, "learning_rate": 4.650252572475411e-05, "loss": 0.8083, "step": 9754 }, { "epoch": 0.17293502971323735, "grad_norm": 3.0, "learning_rate": 4.6501100904427996e-05, "loss": 0.7998, "step": 9756 }, { "epoch": 0.1729704817488489, "grad_norm": 2.453125, "learning_rate": 4.649967581577263e-05, "loss": 0.7697, "step": 9758 }, { "epoch": 0.1730059337844605, "grad_norm": 2.90625, "learning_rate": 4.649825045880579e-05, "loss": 0.7944, "step": 9760 }, { "epoch": 0.17304138582007206, "grad_norm": 2.875, "learning_rate": 4.649682483354525e-05, "loss": 0.8214, "step": 9762 }, { "epoch": 0.17307683785568362, "grad_norm": 2.71875, "learning_rate": 4.649539894000883e-05, "loss": 0.7868, "step": 9764 }, { "epoch": 0.1731122898912952, "grad_norm": 2.546875, "learning_rate": 4.6493972778214294e-05, "loss": 0.7774, "step": 9766 }, { "epoch": 0.17314774192690677, "grad_norm": 2.96875, "learning_rate": 4.649254634817946e-05, "loss": 0.8818, "step": 9768 }, { "epoch": 0.17318319396251833, "grad_norm": 2.5, "learning_rate": 4.6491119649922124e-05, "loss": 0.7984, "step": 9770 }, { "epoch": 0.17321864599812992, "grad_norm": 2.78125, "learning_rate": 4.64896926834601e-05, "loss": 0.7955, "step": 9772 }, { "epoch": 0.17325409803374148, "grad_norm": 2.6875, "learning_rate": 4.648826544881117e-05, "loss": 0.8068, "step": 9774 }, { "epoch": 0.17328955006935304, "grad_norm": 2.4375, "learning_rate": 4.648683794599318e-05, "loss": 0.8155, "step": 9776 }, { "epoch": 0.17332500210496463, "grad_norm": 2.828125, "learning_rate": 4.648541017502392e-05, "loss": 0.8327, "step": 9778 }, { "epoch": 0.1733604541405762, "grad_norm": 2.78125, "learning_rate": 4.648398213592121e-05, "loss": 0.8585, "step": 9780 }, { "epoch": 0.17339590617618775, "grad_norm": 2.65625, "learning_rate": 4.648255382870288e-05, "loss": 0.8518, "step": 9782 }, { "epoch": 0.17343135821179934, "grad_norm": 2.765625, "learning_rate": 4.648112525338676e-05, "loss": 0.7884, "step": 9784 }, { "epoch": 0.1734668102474109, "grad_norm": 3.1875, "learning_rate": 4.647969640999066e-05, "loss": 0.8202, "step": 9786 }, { "epoch": 0.17350226228302246, "grad_norm": 2.875, "learning_rate": 4.6478267298532434e-05, "loss": 0.8754, "step": 9788 }, { "epoch": 0.17353771431863405, "grad_norm": 2.859375, "learning_rate": 4.6476837919029904e-05, "loss": 0.8141, "step": 9790 }, { "epoch": 0.1735731663542456, "grad_norm": 2.765625, "learning_rate": 4.647540827150091e-05, "loss": 0.826, "step": 9792 }, { "epoch": 0.17360861838985717, "grad_norm": 2.6875, "learning_rate": 4.647397835596329e-05, "loss": 0.804, "step": 9794 }, { "epoch": 0.17364407042546876, "grad_norm": 2.703125, "learning_rate": 4.647254817243489e-05, "loss": 0.8226, "step": 9796 }, { "epoch": 0.17367952246108032, "grad_norm": 2.703125, "learning_rate": 4.647111772093356e-05, "loss": 0.8509, "step": 9798 }, { "epoch": 0.17371497449669188, "grad_norm": 2.484375, "learning_rate": 4.646968700147717e-05, "loss": 0.7859, "step": 9800 }, { "epoch": 0.17375042653230346, "grad_norm": 2.78125, "learning_rate": 4.6468256014083546e-05, "loss": 0.7955, "step": 9802 }, { "epoch": 0.17378587856791503, "grad_norm": 2.828125, "learning_rate": 4.6466824758770555e-05, "loss": 0.8422, "step": 9804 }, { "epoch": 0.17382133060352659, "grad_norm": 2.53125, "learning_rate": 4.6465393235556066e-05, "loss": 0.8353, "step": 9806 }, { "epoch": 0.17385678263913817, "grad_norm": 3.015625, "learning_rate": 4.6463961444457934e-05, "loss": 0.8482, "step": 9808 }, { "epoch": 0.17389223467474973, "grad_norm": 2.828125, "learning_rate": 4.646252938549405e-05, "loss": 0.798, "step": 9810 }, { "epoch": 0.1739276867103613, "grad_norm": 2.53125, "learning_rate": 4.646109705868226e-05, "loss": 0.8061, "step": 9812 }, { "epoch": 0.17396313874597288, "grad_norm": 2.515625, "learning_rate": 4.645966446404044e-05, "loss": 0.756, "step": 9814 }, { "epoch": 0.17399859078158444, "grad_norm": 2.953125, "learning_rate": 4.645823160158649e-05, "loss": 0.8085, "step": 9816 }, { "epoch": 0.174034042817196, "grad_norm": 2.90625, "learning_rate": 4.645679847133827e-05, "loss": 0.83, "step": 9818 }, { "epoch": 0.17406949485280757, "grad_norm": 2.734375, "learning_rate": 4.645536507331368e-05, "loss": 0.7976, "step": 9820 }, { "epoch": 0.17410494688841915, "grad_norm": 3.140625, "learning_rate": 4.64539314075306e-05, "loss": 0.8144, "step": 9822 }, { "epoch": 0.17414039892403071, "grad_norm": 2.828125, "learning_rate": 4.645249747400693e-05, "loss": 0.7811, "step": 9824 }, { "epoch": 0.17417585095964228, "grad_norm": 2.59375, "learning_rate": 4.645106327276056e-05, "loss": 0.7529, "step": 9826 }, { "epoch": 0.17421130299525386, "grad_norm": 2.6875, "learning_rate": 4.6449628803809384e-05, "loss": 0.7915, "step": 9828 }, { "epoch": 0.17424675503086542, "grad_norm": 3.15625, "learning_rate": 4.644819406717131e-05, "loss": 0.7945, "step": 9830 }, { "epoch": 0.17428220706647699, "grad_norm": 2.828125, "learning_rate": 4.6446759062864236e-05, "loss": 0.8408, "step": 9832 }, { "epoch": 0.17431765910208857, "grad_norm": 2.640625, "learning_rate": 4.644532379090608e-05, "loss": 0.7478, "step": 9834 }, { "epoch": 0.17435311113770013, "grad_norm": 3.09375, "learning_rate": 4.644388825131475e-05, "loss": 0.7955, "step": 9836 }, { "epoch": 0.1743885631733117, "grad_norm": 2.609375, "learning_rate": 4.6442452444108166e-05, "loss": 0.8422, "step": 9838 }, { "epoch": 0.17442401520892328, "grad_norm": 2.625, "learning_rate": 4.644101636930423e-05, "loss": 0.7869, "step": 9840 }, { "epoch": 0.17445946724453484, "grad_norm": 2.6875, "learning_rate": 4.643958002692088e-05, "loss": 0.8166, "step": 9842 }, { "epoch": 0.1744949192801464, "grad_norm": 2.625, "learning_rate": 4.643814341697604e-05, "loss": 0.7817, "step": 9844 }, { "epoch": 0.174530371315758, "grad_norm": 2.90625, "learning_rate": 4.6436706539487636e-05, "loss": 0.8151, "step": 9846 }, { "epoch": 0.17456582335136955, "grad_norm": 2.859375, "learning_rate": 4.64352693944736e-05, "loss": 0.7965, "step": 9848 }, { "epoch": 0.17460127538698111, "grad_norm": 2.5625, "learning_rate": 4.643383198195186e-05, "loss": 0.7691, "step": 9850 }, { "epoch": 0.1746367274225927, "grad_norm": 2.703125, "learning_rate": 4.643239430194036e-05, "loss": 0.779, "step": 9852 }, { "epoch": 0.17467217945820426, "grad_norm": 2.765625, "learning_rate": 4.6430956354457054e-05, "loss": 0.7911, "step": 9854 }, { "epoch": 0.17470763149381582, "grad_norm": 2.609375, "learning_rate": 4.642951813951987e-05, "loss": 0.7949, "step": 9856 }, { "epoch": 0.1747430835294274, "grad_norm": 2.578125, "learning_rate": 4.642807965714676e-05, "loss": 0.8238, "step": 9858 }, { "epoch": 0.17477853556503897, "grad_norm": 2.578125, "learning_rate": 4.642664090735569e-05, "loss": 0.7866, "step": 9860 }, { "epoch": 0.17481398760065053, "grad_norm": 2.671875, "learning_rate": 4.64252018901646e-05, "loss": 0.813, "step": 9862 }, { "epoch": 0.17484943963626212, "grad_norm": 2.765625, "learning_rate": 4.642376260559145e-05, "loss": 0.7915, "step": 9864 }, { "epoch": 0.17488489167187368, "grad_norm": 2.734375, "learning_rate": 4.6422323053654205e-05, "loss": 0.8049, "step": 9866 }, { "epoch": 0.17492034370748524, "grad_norm": 2.546875, "learning_rate": 4.6420883234370826e-05, "loss": 0.8036, "step": 9868 }, { "epoch": 0.17495579574309683, "grad_norm": 3.078125, "learning_rate": 4.641944314775929e-05, "loss": 0.824, "step": 9870 }, { "epoch": 0.1749912477787084, "grad_norm": 2.9375, "learning_rate": 4.6418002793837564e-05, "loss": 0.8099, "step": 9872 }, { "epoch": 0.17502669981431995, "grad_norm": 2.734375, "learning_rate": 4.641656217262362e-05, "loss": 0.8045, "step": 9874 }, { "epoch": 0.17506215184993154, "grad_norm": 2.609375, "learning_rate": 4.6415121284135453e-05, "loss": 0.8309, "step": 9876 }, { "epoch": 0.1750976038855431, "grad_norm": 2.4375, "learning_rate": 4.641368012839102e-05, "loss": 0.8056, "step": 9878 }, { "epoch": 0.17513305592115466, "grad_norm": 2.671875, "learning_rate": 4.641223870540833e-05, "loss": 0.7938, "step": 9880 }, { "epoch": 0.17516850795676625, "grad_norm": 2.515625, "learning_rate": 4.641079701520535e-05, "loss": 0.7869, "step": 9882 }, { "epoch": 0.1752039599923778, "grad_norm": 2.5625, "learning_rate": 4.64093550578001e-05, "loss": 0.7944, "step": 9884 }, { "epoch": 0.17523941202798937, "grad_norm": 2.71875, "learning_rate": 4.640791283321054e-05, "loss": 0.7952, "step": 9886 }, { "epoch": 0.17527486406360096, "grad_norm": 2.6875, "learning_rate": 4.64064703414547e-05, "loss": 0.8147, "step": 9888 }, { "epoch": 0.17531031609921252, "grad_norm": 3.0, "learning_rate": 4.6405027582550556e-05, "loss": 0.8132, "step": 9890 }, { "epoch": 0.17534576813482408, "grad_norm": 2.609375, "learning_rate": 4.640358455651613e-05, "loss": 0.8292, "step": 9892 }, { "epoch": 0.17538122017043567, "grad_norm": 2.78125, "learning_rate": 4.640214126336943e-05, "loss": 0.7597, "step": 9894 }, { "epoch": 0.17541667220604723, "grad_norm": 2.734375, "learning_rate": 4.640069770312846e-05, "loss": 0.7683, "step": 9896 }, { "epoch": 0.1754521242416588, "grad_norm": 2.9375, "learning_rate": 4.639925387581125e-05, "loss": 0.7725, "step": 9898 }, { "epoch": 0.17548757627727038, "grad_norm": 2.890625, "learning_rate": 4.6397809781435805e-05, "loss": 0.7968, "step": 9900 }, { "epoch": 0.17552302831288194, "grad_norm": 2.453125, "learning_rate": 4.639636542002015e-05, "loss": 0.7619, "step": 9902 }, { "epoch": 0.1755584803484935, "grad_norm": 2.609375, "learning_rate": 4.639492079158231e-05, "loss": 0.8393, "step": 9904 }, { "epoch": 0.1755939323841051, "grad_norm": 3.109375, "learning_rate": 4.6393475896140306e-05, "loss": 0.8553, "step": 9906 }, { "epoch": 0.17562938441971665, "grad_norm": 2.75, "learning_rate": 4.639203073371219e-05, "loss": 0.8141, "step": 9908 }, { "epoch": 0.1756648364553282, "grad_norm": 2.484375, "learning_rate": 4.639058530431598e-05, "loss": 0.799, "step": 9910 }, { "epoch": 0.1757002884909398, "grad_norm": 2.875, "learning_rate": 4.638913960796973e-05, "loss": 0.8397, "step": 9912 }, { "epoch": 0.17573574052655136, "grad_norm": 2.765625, "learning_rate": 4.6387693644691464e-05, "loss": 0.7984, "step": 9914 }, { "epoch": 0.17577119256216292, "grad_norm": 2.375, "learning_rate": 4.638624741449924e-05, "loss": 0.7958, "step": 9916 }, { "epoch": 0.1758066445977745, "grad_norm": 2.796875, "learning_rate": 4.63848009174111e-05, "loss": 0.8291, "step": 9918 }, { "epoch": 0.17584209663338607, "grad_norm": 2.53125, "learning_rate": 4.63833541534451e-05, "loss": 0.8011, "step": 9920 }, { "epoch": 0.17587754866899763, "grad_norm": 2.640625, "learning_rate": 4.63819071226193e-05, "loss": 0.7897, "step": 9922 }, { "epoch": 0.17591300070460922, "grad_norm": 2.734375, "learning_rate": 4.638045982495174e-05, "loss": 0.8142, "step": 9924 }, { "epoch": 0.17594845274022078, "grad_norm": 3.0625, "learning_rate": 4.637901226046051e-05, "loss": 0.785, "step": 9926 }, { "epoch": 0.17598390477583234, "grad_norm": 2.609375, "learning_rate": 4.6377564429163645e-05, "loss": 0.7962, "step": 9928 }, { "epoch": 0.17601935681144393, "grad_norm": 2.515625, "learning_rate": 4.6376116331079235e-05, "loss": 0.7905, "step": 9930 }, { "epoch": 0.1760548088470555, "grad_norm": 2.515625, "learning_rate": 4.637466796622535e-05, "loss": 0.7657, "step": 9932 }, { "epoch": 0.17609026088266705, "grad_norm": 2.859375, "learning_rate": 4.637321933462006e-05, "loss": 0.795, "step": 9934 }, { "epoch": 0.17612571291827864, "grad_norm": 2.625, "learning_rate": 4.6371770436281436e-05, "loss": 0.8107, "step": 9936 }, { "epoch": 0.1761611649538902, "grad_norm": 2.46875, "learning_rate": 4.637032127122757e-05, "loss": 0.777, "step": 9938 }, { "epoch": 0.17619661698950176, "grad_norm": 2.40625, "learning_rate": 4.636887183947655e-05, "loss": 0.8037, "step": 9940 }, { "epoch": 0.17623206902511335, "grad_norm": 2.703125, "learning_rate": 4.6367422141046455e-05, "loss": 0.8038, "step": 9942 }, { "epoch": 0.1762675210607249, "grad_norm": 2.859375, "learning_rate": 4.6365972175955394e-05, "loss": 0.8317, "step": 9944 }, { "epoch": 0.17630297309633647, "grad_norm": 2.59375, "learning_rate": 4.636452194422144e-05, "loss": 0.7815, "step": 9946 }, { "epoch": 0.17633842513194806, "grad_norm": 2.6875, "learning_rate": 4.6363071445862704e-05, "loss": 0.8249, "step": 9948 }, { "epoch": 0.17637387716755962, "grad_norm": 2.421875, "learning_rate": 4.636162068089729e-05, "loss": 0.7989, "step": 9950 }, { "epoch": 0.17640932920317118, "grad_norm": 2.671875, "learning_rate": 4.636016964934329e-05, "loss": 0.8027, "step": 9952 }, { "epoch": 0.17644478123878277, "grad_norm": 2.953125, "learning_rate": 4.635871835121883e-05, "loss": 0.8011, "step": 9954 }, { "epoch": 0.17648023327439433, "grad_norm": 2.6875, "learning_rate": 4.6357266786542006e-05, "loss": 0.7947, "step": 9956 }, { "epoch": 0.1765156853100059, "grad_norm": 2.859375, "learning_rate": 4.6355814955330954e-05, "loss": 0.8287, "step": 9958 }, { "epoch": 0.17655113734561748, "grad_norm": 2.59375, "learning_rate": 4.635436285760377e-05, "loss": 0.7452, "step": 9960 }, { "epoch": 0.17658658938122904, "grad_norm": 2.59375, "learning_rate": 4.635291049337859e-05, "loss": 0.784, "step": 9962 }, { "epoch": 0.1766220414168406, "grad_norm": 2.859375, "learning_rate": 4.635145786267353e-05, "loss": 0.7922, "step": 9964 }, { "epoch": 0.17665749345245219, "grad_norm": 3.03125, "learning_rate": 4.635000496550672e-05, "loss": 0.8115, "step": 9966 }, { "epoch": 0.17669294548806375, "grad_norm": 2.84375, "learning_rate": 4.63485518018963e-05, "loss": 0.7737, "step": 9968 }, { "epoch": 0.1767283975236753, "grad_norm": 2.71875, "learning_rate": 4.6347098371860396e-05, "loss": 0.8111, "step": 9970 }, { "epoch": 0.1767638495592869, "grad_norm": 2.625, "learning_rate": 4.634564467541715e-05, "loss": 0.7941, "step": 9972 }, { "epoch": 0.17679930159489846, "grad_norm": 2.65625, "learning_rate": 4.634419071258472e-05, "loss": 0.8295, "step": 9974 }, { "epoch": 0.17683475363051002, "grad_norm": 2.734375, "learning_rate": 4.634273648338122e-05, "loss": 0.8232, "step": 9976 }, { "epoch": 0.1768702056661216, "grad_norm": 2.921875, "learning_rate": 4.6341281987824817e-05, "loss": 0.7908, "step": 9978 }, { "epoch": 0.17690565770173317, "grad_norm": 2.5625, "learning_rate": 4.6339827225933665e-05, "loss": 0.8186, "step": 9980 }, { "epoch": 0.17694110973734473, "grad_norm": 2.71875, "learning_rate": 4.633837219772591e-05, "loss": 0.8195, "step": 9982 }, { "epoch": 0.17697656177295631, "grad_norm": 2.609375, "learning_rate": 4.633691690321971e-05, "loss": 0.8031, "step": 9984 }, { "epoch": 0.17701201380856788, "grad_norm": 2.921875, "learning_rate": 4.633546134243324e-05, "loss": 0.8055, "step": 9986 }, { "epoch": 0.17704746584417944, "grad_norm": 2.765625, "learning_rate": 4.633400551538465e-05, "loss": 0.798, "step": 9988 }, { "epoch": 0.177082917879791, "grad_norm": 2.609375, "learning_rate": 4.633254942209212e-05, "loss": 0.8628, "step": 9990 }, { "epoch": 0.17711836991540258, "grad_norm": 2.734375, "learning_rate": 4.633109306257381e-05, "loss": 0.8108, "step": 9992 }, { "epoch": 0.17715382195101415, "grad_norm": 2.765625, "learning_rate": 4.632963643684791e-05, "loss": 0.791, "step": 9994 }, { "epoch": 0.1771892739866257, "grad_norm": 2.671875, "learning_rate": 4.632817954493258e-05, "loss": 0.8108, "step": 9996 }, { "epoch": 0.1772247260222373, "grad_norm": 2.921875, "learning_rate": 4.632672238684602e-05, "loss": 0.8331, "step": 9998 }, { "epoch": 0.17726017805784886, "grad_norm": 2.640625, "learning_rate": 4.6325264962606395e-05, "loss": 0.7822, "step": 10000 }, { "epoch": 0.17729563009346042, "grad_norm": 2.421875, "learning_rate": 4.6323807272231915e-05, "loss": 0.7935, "step": 10002 }, { "epoch": 0.177331082129072, "grad_norm": 2.65625, "learning_rate": 4.6322349315740756e-05, "loss": 0.7826, "step": 10004 }, { "epoch": 0.17736653416468356, "grad_norm": 2.578125, "learning_rate": 4.632089109315113e-05, "loss": 0.8238, "step": 10006 }, { "epoch": 0.17740198620029513, "grad_norm": 2.640625, "learning_rate": 4.631943260448122e-05, "loss": 0.8166, "step": 10008 }, { "epoch": 0.1774374382359067, "grad_norm": 2.671875, "learning_rate": 4.631797384974922e-05, "loss": 0.7851, "step": 10010 }, { "epoch": 0.17747289027151827, "grad_norm": 2.953125, "learning_rate": 4.631651482897336e-05, "loss": 0.7606, "step": 10012 }, { "epoch": 0.17750834230712983, "grad_norm": 2.71875, "learning_rate": 4.631505554217183e-05, "loss": 0.8473, "step": 10014 }, { "epoch": 0.17754379434274142, "grad_norm": 2.90625, "learning_rate": 4.6313595989362844e-05, "loss": 0.8125, "step": 10016 }, { "epoch": 0.17757924637835298, "grad_norm": 2.671875, "learning_rate": 4.631213617056462e-05, "loss": 0.7986, "step": 10018 }, { "epoch": 0.17761469841396454, "grad_norm": 2.90625, "learning_rate": 4.6310676085795376e-05, "loss": 0.8402, "step": 10020 }, { "epoch": 0.17765015044957613, "grad_norm": 2.953125, "learning_rate": 4.630921573507333e-05, "loss": 0.8056, "step": 10022 }, { "epoch": 0.1776856024851877, "grad_norm": 2.796875, "learning_rate": 4.630775511841672e-05, "loss": 0.7992, "step": 10024 }, { "epoch": 0.17772105452079925, "grad_norm": 2.859375, "learning_rate": 4.630629423584376e-05, "loss": 0.8099, "step": 10026 }, { "epoch": 0.17775650655641084, "grad_norm": 2.75, "learning_rate": 4.6304833087372676e-05, "loss": 0.8018, "step": 10028 }, { "epoch": 0.1777919585920224, "grad_norm": 2.75, "learning_rate": 4.6303371673021726e-05, "loss": 0.8144, "step": 10030 }, { "epoch": 0.17782741062763396, "grad_norm": 2.6875, "learning_rate": 4.630190999280912e-05, "loss": 0.8091, "step": 10032 }, { "epoch": 0.17786286266324555, "grad_norm": 3.015625, "learning_rate": 4.630044804675313e-05, "loss": 0.8219, "step": 10034 }, { "epoch": 0.1778983146988571, "grad_norm": 2.59375, "learning_rate": 4.629898583487198e-05, "loss": 0.8331, "step": 10036 }, { "epoch": 0.17793376673446867, "grad_norm": 2.609375, "learning_rate": 4.629752335718391e-05, "loss": 0.8037, "step": 10038 }, { "epoch": 0.17796921877008026, "grad_norm": 2.78125, "learning_rate": 4.62960606137072e-05, "loss": 0.8163, "step": 10040 }, { "epoch": 0.17800467080569182, "grad_norm": 2.65625, "learning_rate": 4.6294597604460086e-05, "loss": 0.7786, "step": 10042 }, { "epoch": 0.17804012284130338, "grad_norm": 2.890625, "learning_rate": 4.629313432946083e-05, "loss": 0.8375, "step": 10044 }, { "epoch": 0.17807557487691497, "grad_norm": 2.75, "learning_rate": 4.629167078872769e-05, "loss": 0.8391, "step": 10046 }, { "epoch": 0.17811102691252653, "grad_norm": 2.84375, "learning_rate": 4.629020698227893e-05, "loss": 0.7991, "step": 10048 }, { "epoch": 0.1781464789481381, "grad_norm": 2.46875, "learning_rate": 4.6288742910132834e-05, "loss": 0.7886, "step": 10050 }, { "epoch": 0.17818193098374968, "grad_norm": 2.671875, "learning_rate": 4.628727857230765e-05, "loss": 0.8491, "step": 10052 }, { "epoch": 0.17821738301936124, "grad_norm": 2.484375, "learning_rate": 4.628581396882166e-05, "loss": 0.7983, "step": 10054 }, { "epoch": 0.1782528350549728, "grad_norm": 2.78125, "learning_rate": 4.628434909969315e-05, "loss": 0.8217, "step": 10056 }, { "epoch": 0.1782882870905844, "grad_norm": 2.625, "learning_rate": 4.62828839649404e-05, "loss": 0.8234, "step": 10058 }, { "epoch": 0.17832373912619595, "grad_norm": 2.578125, "learning_rate": 4.628141856458168e-05, "loss": 0.8098, "step": 10060 }, { "epoch": 0.1783591911618075, "grad_norm": 2.609375, "learning_rate": 4.6279952898635305e-05, "loss": 0.7831, "step": 10062 }, { "epoch": 0.1783946431974191, "grad_norm": 2.703125, "learning_rate": 4.627848696711954e-05, "loss": 0.8053, "step": 10064 }, { "epoch": 0.17843009523303066, "grad_norm": 2.796875, "learning_rate": 4.6277020770052695e-05, "loss": 0.8616, "step": 10066 }, { "epoch": 0.17846554726864222, "grad_norm": 2.71875, "learning_rate": 4.627555430745305e-05, "loss": 0.8083, "step": 10068 }, { "epoch": 0.1785009993042538, "grad_norm": 2.75, "learning_rate": 4.6274087579338934e-05, "loss": 0.7734, "step": 10070 }, { "epoch": 0.17853645133986537, "grad_norm": 2.671875, "learning_rate": 4.6272620585728626e-05, "loss": 0.7799, "step": 10072 }, { "epoch": 0.17857190337547693, "grad_norm": 2.578125, "learning_rate": 4.627115332664045e-05, "loss": 0.7787, "step": 10074 }, { "epoch": 0.17860735541108852, "grad_norm": 2.515625, "learning_rate": 4.626968580209271e-05, "loss": 0.7949, "step": 10076 }, { "epoch": 0.17864280744670008, "grad_norm": 2.609375, "learning_rate": 4.6268218012103716e-05, "loss": 0.8278, "step": 10078 }, { "epoch": 0.17867825948231164, "grad_norm": 3.0, "learning_rate": 4.6266749956691794e-05, "loss": 0.8582, "step": 10080 }, { "epoch": 0.17871371151792323, "grad_norm": 2.78125, "learning_rate": 4.626528163587527e-05, "loss": 0.8595, "step": 10082 }, { "epoch": 0.1787491635535348, "grad_norm": 2.671875, "learning_rate": 4.626381304967244e-05, "loss": 0.8001, "step": 10084 }, { "epoch": 0.17878461558914635, "grad_norm": 2.65625, "learning_rate": 4.626234419810167e-05, "loss": 0.7742, "step": 10086 }, { "epoch": 0.17882006762475794, "grad_norm": 2.65625, "learning_rate": 4.626087508118127e-05, "loss": 0.7925, "step": 10088 }, { "epoch": 0.1788555196603695, "grad_norm": 2.578125, "learning_rate": 4.625940569892958e-05, "loss": 0.8243, "step": 10090 }, { "epoch": 0.17889097169598106, "grad_norm": 2.921875, "learning_rate": 4.6257936051364927e-05, "loss": 0.8089, "step": 10092 }, { "epoch": 0.17892642373159265, "grad_norm": 2.640625, "learning_rate": 4.625646613850566e-05, "loss": 0.815, "step": 10094 }, { "epoch": 0.1789618757672042, "grad_norm": 3.0, "learning_rate": 4.6254995960370126e-05, "loss": 0.8038, "step": 10096 }, { "epoch": 0.17899732780281577, "grad_norm": 2.765625, "learning_rate": 4.625352551697667e-05, "loss": 0.7894, "step": 10098 }, { "epoch": 0.17903277983842736, "grad_norm": 2.875, "learning_rate": 4.6252054808343645e-05, "loss": 0.7992, "step": 10100 }, { "epoch": 0.17906823187403892, "grad_norm": 2.84375, "learning_rate": 4.62505838344894e-05, "loss": 0.7953, "step": 10102 }, { "epoch": 0.17910368390965048, "grad_norm": 2.5625, "learning_rate": 4.62491125954323e-05, "loss": 0.8013, "step": 10104 }, { "epoch": 0.17913913594526207, "grad_norm": 2.890625, "learning_rate": 4.624764109119069e-05, "loss": 0.8029, "step": 10106 }, { "epoch": 0.17917458798087363, "grad_norm": 2.875, "learning_rate": 4.624616932178295e-05, "loss": 0.8491, "step": 10108 }, { "epoch": 0.1792100400164852, "grad_norm": 2.53125, "learning_rate": 4.624469728722744e-05, "loss": 0.7669, "step": 10110 }, { "epoch": 0.17924549205209678, "grad_norm": 2.640625, "learning_rate": 4.624322498754253e-05, "loss": 0.7937, "step": 10112 }, { "epoch": 0.17928094408770834, "grad_norm": 2.8125, "learning_rate": 4.624175242274661e-05, "loss": 0.8431, "step": 10114 }, { "epoch": 0.1793163961233199, "grad_norm": 2.625, "learning_rate": 4.624027959285804e-05, "loss": 0.8262, "step": 10116 }, { "epoch": 0.1793518481589315, "grad_norm": 2.734375, "learning_rate": 4.6238806497895194e-05, "loss": 0.8436, "step": 10118 }, { "epoch": 0.17938730019454305, "grad_norm": 2.9375, "learning_rate": 4.623733313787647e-05, "loss": 0.8014, "step": 10120 }, { "epoch": 0.1794227522301546, "grad_norm": 2.609375, "learning_rate": 4.623585951282026e-05, "loss": 0.7842, "step": 10122 }, { "epoch": 0.1794582042657662, "grad_norm": 2.609375, "learning_rate": 4.623438562274494e-05, "loss": 0.7823, "step": 10124 }, { "epoch": 0.17949365630137776, "grad_norm": 2.9375, "learning_rate": 4.623291146766892e-05, "loss": 0.8083, "step": 10126 }, { "epoch": 0.17952910833698932, "grad_norm": 2.828125, "learning_rate": 4.623143704761057e-05, "loss": 0.7822, "step": 10128 }, { "epoch": 0.1795645603726009, "grad_norm": 2.78125, "learning_rate": 4.622996236258832e-05, "loss": 0.8341, "step": 10130 }, { "epoch": 0.17960001240821247, "grad_norm": 2.484375, "learning_rate": 4.622848741262056e-05, "loss": 0.8124, "step": 10132 }, { "epoch": 0.17963546444382403, "grad_norm": 2.71875, "learning_rate": 4.6227012197725695e-05, "loss": 0.8148, "step": 10134 }, { "epoch": 0.17967091647943562, "grad_norm": 2.5625, "learning_rate": 4.622553671792213e-05, "loss": 0.7565, "step": 10136 }, { "epoch": 0.17970636851504718, "grad_norm": 2.578125, "learning_rate": 4.6224060973228314e-05, "loss": 0.7274, "step": 10138 }, { "epoch": 0.17974182055065874, "grad_norm": 2.96875, "learning_rate": 4.622258496366262e-05, "loss": 0.8538, "step": 10140 }, { "epoch": 0.17977727258627033, "grad_norm": 2.96875, "learning_rate": 4.622110868924349e-05, "loss": 0.8225, "step": 10142 }, { "epoch": 0.1798127246218819, "grad_norm": 2.59375, "learning_rate": 4.6219632149989336e-05, "loss": 0.7981, "step": 10144 }, { "epoch": 0.17984817665749345, "grad_norm": 2.65625, "learning_rate": 4.62181553459186e-05, "loss": 0.8034, "step": 10146 }, { "epoch": 0.17988362869310504, "grad_norm": 2.828125, "learning_rate": 4.6216678277049705e-05, "loss": 0.8419, "step": 10148 }, { "epoch": 0.1799190807287166, "grad_norm": 2.609375, "learning_rate": 4.621520094340108e-05, "loss": 0.7972, "step": 10150 }, { "epoch": 0.17995453276432816, "grad_norm": 2.578125, "learning_rate": 4.6213723344991163e-05, "loss": 0.7692, "step": 10152 }, { "epoch": 0.17998998479993975, "grad_norm": 2.578125, "learning_rate": 4.621224548183841e-05, "loss": 0.7733, "step": 10154 }, { "epoch": 0.1800254368355513, "grad_norm": 2.71875, "learning_rate": 4.621076735396124e-05, "loss": 0.7879, "step": 10156 }, { "epoch": 0.18006088887116287, "grad_norm": 2.875, "learning_rate": 4.620928896137812e-05, "loss": 0.7648, "step": 10158 }, { "epoch": 0.18009634090677445, "grad_norm": 3.015625, "learning_rate": 4.620781030410749e-05, "loss": 0.8248, "step": 10160 }, { "epoch": 0.18013179294238602, "grad_norm": 2.328125, "learning_rate": 4.62063313821678e-05, "loss": 0.7791, "step": 10162 }, { "epoch": 0.18016724497799758, "grad_norm": 2.859375, "learning_rate": 4.6204852195577506e-05, "loss": 0.8162, "step": 10164 }, { "epoch": 0.18020269701360914, "grad_norm": 3.03125, "learning_rate": 4.620337274435508e-05, "loss": 0.8146, "step": 10166 }, { "epoch": 0.18023814904922072, "grad_norm": 2.640625, "learning_rate": 4.6201893028518986e-05, "loss": 0.825, "step": 10168 }, { "epoch": 0.18027360108483229, "grad_norm": 2.75, "learning_rate": 4.620041304808767e-05, "loss": 0.7898, "step": 10170 }, { "epoch": 0.18030905312044385, "grad_norm": 2.65625, "learning_rate": 4.619893280307962e-05, "loss": 0.793, "step": 10172 }, { "epoch": 0.18034450515605543, "grad_norm": 2.875, "learning_rate": 4.619745229351331e-05, "loss": 0.7651, "step": 10174 }, { "epoch": 0.180379957191667, "grad_norm": 2.6875, "learning_rate": 4.61959715194072e-05, "loss": 0.7769, "step": 10176 }, { "epoch": 0.18041540922727856, "grad_norm": 2.671875, "learning_rate": 4.619449048077979e-05, "loss": 0.8221, "step": 10178 }, { "epoch": 0.18045086126289014, "grad_norm": 2.71875, "learning_rate": 4.619300917764955e-05, "loss": 0.7848, "step": 10180 }, { "epoch": 0.1804863132985017, "grad_norm": 2.828125, "learning_rate": 4.6191527610034965e-05, "loss": 0.7816, "step": 10182 }, { "epoch": 0.18052176533411327, "grad_norm": 2.75, "learning_rate": 4.619004577795453e-05, "loss": 0.7701, "step": 10184 }, { "epoch": 0.18055721736972485, "grad_norm": 2.640625, "learning_rate": 4.618856368142674e-05, "loss": 0.8016, "step": 10186 }, { "epoch": 0.18059266940533641, "grad_norm": 2.609375, "learning_rate": 4.6187081320470096e-05, "loss": 0.7553, "step": 10188 }, { "epoch": 0.18062812144094798, "grad_norm": 2.84375, "learning_rate": 4.6185598695103075e-05, "loss": 0.7912, "step": 10190 }, { "epoch": 0.18066357347655956, "grad_norm": 2.875, "learning_rate": 4.6184115805344206e-05, "loss": 0.8097, "step": 10192 }, { "epoch": 0.18069902551217112, "grad_norm": 2.734375, "learning_rate": 4.6182632651211976e-05, "loss": 0.7898, "step": 10194 }, { "epoch": 0.18073447754778268, "grad_norm": 2.765625, "learning_rate": 4.618114923272491e-05, "loss": 0.8266, "step": 10196 }, { "epoch": 0.18076992958339427, "grad_norm": 2.859375, "learning_rate": 4.6179665549901506e-05, "loss": 0.7944, "step": 10198 }, { "epoch": 0.18080538161900583, "grad_norm": 2.59375, "learning_rate": 4.617818160276029e-05, "loss": 0.7928, "step": 10200 }, { "epoch": 0.1808408336546174, "grad_norm": 2.859375, "learning_rate": 4.617669739131979e-05, "loss": 0.8188, "step": 10202 }, { "epoch": 0.18087628569022898, "grad_norm": 2.671875, "learning_rate": 4.617521291559851e-05, "loss": 0.7336, "step": 10204 }, { "epoch": 0.18091173772584054, "grad_norm": 2.53125, "learning_rate": 4.617372817561497e-05, "loss": 0.7776, "step": 10206 }, { "epoch": 0.1809471897614521, "grad_norm": 2.75, "learning_rate": 4.617224317138773e-05, "loss": 0.7534, "step": 10208 }, { "epoch": 0.1809826417970637, "grad_norm": 3.0, "learning_rate": 4.6170757902935296e-05, "loss": 0.7864, "step": 10210 }, { "epoch": 0.18101809383267525, "grad_norm": 2.609375, "learning_rate": 4.616927237027622e-05, "loss": 0.8448, "step": 10212 }, { "epoch": 0.1810535458682868, "grad_norm": 2.8125, "learning_rate": 4.616778657342903e-05, "loss": 0.7974, "step": 10214 }, { "epoch": 0.1810889979038984, "grad_norm": 2.625, "learning_rate": 4.616630051241227e-05, "loss": 0.8253, "step": 10216 }, { "epoch": 0.18112444993950996, "grad_norm": 2.9375, "learning_rate": 4.616481418724449e-05, "loss": 0.8596, "step": 10218 }, { "epoch": 0.18115990197512152, "grad_norm": 2.765625, "learning_rate": 4.616332759794424e-05, "loss": 0.8015, "step": 10220 }, { "epoch": 0.1811953540107331, "grad_norm": 3.1875, "learning_rate": 4.616184074453006e-05, "loss": 0.857, "step": 10222 }, { "epoch": 0.18123080604634467, "grad_norm": 2.671875, "learning_rate": 4.616035362702053e-05, "loss": 0.781, "step": 10224 }, { "epoch": 0.18126625808195623, "grad_norm": 2.84375, "learning_rate": 4.615886624543418e-05, "loss": 0.8163, "step": 10226 }, { "epoch": 0.18130171011756782, "grad_norm": 2.921875, "learning_rate": 4.615737859978959e-05, "loss": 0.8093, "step": 10228 }, { "epoch": 0.18133716215317938, "grad_norm": 2.671875, "learning_rate": 4.615589069010533e-05, "loss": 0.8303, "step": 10230 }, { "epoch": 0.18137261418879094, "grad_norm": 2.96875, "learning_rate": 4.615440251639995e-05, "loss": 0.8302, "step": 10232 }, { "epoch": 0.18140806622440253, "grad_norm": 2.328125, "learning_rate": 4.6152914078692046e-05, "loss": 0.8092, "step": 10234 }, { "epoch": 0.1814435182600141, "grad_norm": 2.53125, "learning_rate": 4.615142537700017e-05, "loss": 0.8085, "step": 10236 }, { "epoch": 0.18147897029562565, "grad_norm": 2.75, "learning_rate": 4.614993641134291e-05, "loss": 0.7855, "step": 10238 }, { "epoch": 0.18151442233123724, "grad_norm": 2.78125, "learning_rate": 4.614844718173885e-05, "loss": 0.8175, "step": 10240 }, { "epoch": 0.1815498743668488, "grad_norm": 2.953125, "learning_rate": 4.6146957688206585e-05, "loss": 0.8237, "step": 10242 }, { "epoch": 0.18158532640246036, "grad_norm": 2.90625, "learning_rate": 4.614546793076467e-05, "loss": 0.8189, "step": 10244 }, { "epoch": 0.18162077843807195, "grad_norm": 2.578125, "learning_rate": 4.614397790943174e-05, "loss": 0.7813, "step": 10246 }, { "epoch": 0.1816562304736835, "grad_norm": 2.71875, "learning_rate": 4.6142487624226364e-05, "loss": 0.8565, "step": 10248 }, { "epoch": 0.18169168250929507, "grad_norm": 2.78125, "learning_rate": 4.614099707516715e-05, "loss": 0.7952, "step": 10250 }, { "epoch": 0.18172713454490666, "grad_norm": 2.890625, "learning_rate": 4.6139506262272684e-05, "loss": 0.772, "step": 10252 }, { "epoch": 0.18176258658051822, "grad_norm": 2.8125, "learning_rate": 4.613801518556159e-05, "loss": 0.8336, "step": 10254 }, { "epoch": 0.18179803861612978, "grad_norm": 2.65625, "learning_rate": 4.613652384505247e-05, "loss": 0.8098, "step": 10256 }, { "epoch": 0.18183349065174137, "grad_norm": 3.09375, "learning_rate": 4.613503224076393e-05, "loss": 0.7743, "step": 10258 }, { "epoch": 0.18186894268735293, "grad_norm": 2.78125, "learning_rate": 4.613354037271459e-05, "loss": 0.8096, "step": 10260 }, { "epoch": 0.1819043947229645, "grad_norm": 2.828125, "learning_rate": 4.6132048240923075e-05, "loss": 0.8208, "step": 10262 }, { "epoch": 0.18193984675857608, "grad_norm": 2.9375, "learning_rate": 4.6130555845408e-05, "loss": 0.8287, "step": 10264 }, { "epoch": 0.18197529879418764, "grad_norm": 3.03125, "learning_rate": 4.612906318618798e-05, "loss": 0.8224, "step": 10266 }, { "epoch": 0.1820107508297992, "grad_norm": 2.828125, "learning_rate": 4.612757026328166e-05, "loss": 0.8103, "step": 10268 }, { "epoch": 0.1820462028654108, "grad_norm": 2.875, "learning_rate": 4.6126077076707665e-05, "loss": 0.8066, "step": 10270 }, { "epoch": 0.18208165490102235, "grad_norm": 2.4375, "learning_rate": 4.612458362648462e-05, "loss": 0.7646, "step": 10272 }, { "epoch": 0.1821171069366339, "grad_norm": 2.671875, "learning_rate": 4.612308991263118e-05, "loss": 0.8349, "step": 10274 }, { "epoch": 0.1821525589722455, "grad_norm": 2.78125, "learning_rate": 4.612159593516597e-05, "loss": 0.8153, "step": 10276 }, { "epoch": 0.18218801100785706, "grad_norm": 2.671875, "learning_rate": 4.612010169410764e-05, "loss": 0.7964, "step": 10278 }, { "epoch": 0.18222346304346862, "grad_norm": 2.765625, "learning_rate": 4.611860718947485e-05, "loss": 0.7736, "step": 10280 }, { "epoch": 0.1822589150790802, "grad_norm": 2.75, "learning_rate": 4.6117112421286235e-05, "loss": 0.819, "step": 10282 }, { "epoch": 0.18229436711469177, "grad_norm": 2.53125, "learning_rate": 4.611561738956046e-05, "loss": 0.8259, "step": 10284 }, { "epoch": 0.18232981915030333, "grad_norm": 3.203125, "learning_rate": 4.611412209431617e-05, "loss": 0.8199, "step": 10286 }, { "epoch": 0.18236527118591492, "grad_norm": 2.984375, "learning_rate": 4.6112626535572035e-05, "loss": 0.7539, "step": 10288 }, { "epoch": 0.18240072322152648, "grad_norm": 2.671875, "learning_rate": 4.611113071334673e-05, "loss": 0.7697, "step": 10290 }, { "epoch": 0.18243617525713804, "grad_norm": 3.03125, "learning_rate": 4.610963462765889e-05, "loss": 0.8111, "step": 10292 }, { "epoch": 0.18247162729274963, "grad_norm": 2.84375, "learning_rate": 4.6108138278527226e-05, "loss": 0.8284, "step": 10294 }, { "epoch": 0.1825070793283612, "grad_norm": 2.828125, "learning_rate": 4.610664166597039e-05, "loss": 0.8089, "step": 10296 }, { "epoch": 0.18254253136397275, "grad_norm": 2.96875, "learning_rate": 4.610514479000706e-05, "loss": 0.8126, "step": 10298 }, { "epoch": 0.18257798339958434, "grad_norm": 2.875, "learning_rate": 4.610364765065591e-05, "loss": 0.8213, "step": 10300 }, { "epoch": 0.1826134354351959, "grad_norm": 2.703125, "learning_rate": 4.610215024793564e-05, "loss": 0.7928, "step": 10302 }, { "epoch": 0.18264888747080746, "grad_norm": 2.640625, "learning_rate": 4.6100652581864925e-05, "loss": 0.7968, "step": 10304 }, { "epoch": 0.18268433950641905, "grad_norm": 2.625, "learning_rate": 4.6099154652462474e-05, "loss": 0.8257, "step": 10306 }, { "epoch": 0.1827197915420306, "grad_norm": 2.71875, "learning_rate": 4.609765645974695e-05, "loss": 0.8239, "step": 10308 }, { "epoch": 0.18275524357764217, "grad_norm": 2.859375, "learning_rate": 4.609615800373708e-05, "loss": 0.8294, "step": 10310 }, { "epoch": 0.18279069561325376, "grad_norm": 2.71875, "learning_rate": 4.609465928445155e-05, "loss": 0.7955, "step": 10312 }, { "epoch": 0.18282614764886532, "grad_norm": 2.765625, "learning_rate": 4.609316030190906e-05, "loss": 0.7991, "step": 10314 }, { "epoch": 0.18286159968447688, "grad_norm": 2.59375, "learning_rate": 4.609166105612833e-05, "loss": 0.7639, "step": 10316 }, { "epoch": 0.18289705172008847, "grad_norm": 2.859375, "learning_rate": 4.6090161547128065e-05, "loss": 0.8347, "step": 10318 }, { "epoch": 0.18293250375570003, "grad_norm": 2.65625, "learning_rate": 4.6088661774926975e-05, "loss": 0.7859, "step": 10320 }, { "epoch": 0.1829679557913116, "grad_norm": 2.78125, "learning_rate": 4.608716173954377e-05, "loss": 0.8036, "step": 10322 }, { "epoch": 0.18300340782692318, "grad_norm": 2.5, "learning_rate": 4.6085661440997185e-05, "loss": 0.7947, "step": 10324 }, { "epoch": 0.18303885986253474, "grad_norm": 2.859375, "learning_rate": 4.608416087930594e-05, "loss": 0.7943, "step": 10326 }, { "epoch": 0.1830743118981463, "grad_norm": 2.671875, "learning_rate": 4.608266005448876e-05, "loss": 0.8049, "step": 10328 }, { "epoch": 0.18310976393375789, "grad_norm": 2.59375, "learning_rate": 4.608115896656437e-05, "loss": 0.7881, "step": 10330 }, { "epoch": 0.18314521596936945, "grad_norm": 2.9375, "learning_rate": 4.6079657615551495e-05, "loss": 0.8153, "step": 10332 }, { "epoch": 0.183180668004981, "grad_norm": 2.5625, "learning_rate": 4.60781560014689e-05, "loss": 0.8135, "step": 10334 }, { "epoch": 0.18321612004059257, "grad_norm": 2.421875, "learning_rate": 4.607665412433531e-05, "loss": 0.7861, "step": 10336 }, { "epoch": 0.18325157207620416, "grad_norm": 2.65625, "learning_rate": 4.607515198416945e-05, "loss": 0.8023, "step": 10338 }, { "epoch": 0.18328702411181572, "grad_norm": 2.6875, "learning_rate": 4.6073649580990096e-05, "loss": 0.7736, "step": 10340 }, { "epoch": 0.18332247614742728, "grad_norm": 2.65625, "learning_rate": 4.607214691481598e-05, "loss": 0.825, "step": 10342 }, { "epoch": 0.18335792818303887, "grad_norm": 2.71875, "learning_rate": 4.6070643985665864e-05, "loss": 0.8063, "step": 10344 }, { "epoch": 0.18339338021865043, "grad_norm": 2.875, "learning_rate": 4.6069140793558495e-05, "loss": 0.806, "step": 10346 }, { "epoch": 0.183428832254262, "grad_norm": 2.5625, "learning_rate": 4.606763733851264e-05, "loss": 0.8334, "step": 10348 }, { "epoch": 0.18346428428987357, "grad_norm": 2.8125, "learning_rate": 4.606613362054706e-05, "loss": 0.8018, "step": 10350 }, { "epoch": 0.18349973632548514, "grad_norm": 3.0, "learning_rate": 4.6064629639680514e-05, "loss": 0.8372, "step": 10352 }, { "epoch": 0.1835351883610967, "grad_norm": 2.984375, "learning_rate": 4.606312539593178e-05, "loss": 0.8061, "step": 10354 }, { "epoch": 0.18357064039670828, "grad_norm": 2.75, "learning_rate": 4.606162088931963e-05, "loss": 0.814, "step": 10356 }, { "epoch": 0.18360609243231985, "grad_norm": 2.84375, "learning_rate": 4.606011611986283e-05, "loss": 0.8303, "step": 10358 }, { "epoch": 0.1836415444679314, "grad_norm": 2.828125, "learning_rate": 4.605861108758018e-05, "loss": 0.813, "step": 10360 }, { "epoch": 0.183676996503543, "grad_norm": 3.03125, "learning_rate": 4.6057105792490446e-05, "loss": 0.8015, "step": 10362 }, { "epoch": 0.18371244853915455, "grad_norm": 2.546875, "learning_rate": 4.605560023461242e-05, "loss": 0.7731, "step": 10364 }, { "epoch": 0.18374790057476612, "grad_norm": 2.765625, "learning_rate": 4.6054094413964876e-05, "loss": 0.768, "step": 10366 }, { "epoch": 0.1837833526103777, "grad_norm": 2.96875, "learning_rate": 4.605258833056663e-05, "loss": 0.7992, "step": 10368 }, { "epoch": 0.18381880464598926, "grad_norm": 2.796875, "learning_rate": 4.605108198443647e-05, "loss": 0.81, "step": 10370 }, { "epoch": 0.18385425668160083, "grad_norm": 2.625, "learning_rate": 4.604957537559318e-05, "loss": 0.8038, "step": 10372 }, { "epoch": 0.1838897087172124, "grad_norm": 2.890625, "learning_rate": 4.604806850405559e-05, "loss": 0.8335, "step": 10374 }, { "epoch": 0.18392516075282397, "grad_norm": 2.78125, "learning_rate": 4.604656136984247e-05, "loss": 0.8094, "step": 10376 }, { "epoch": 0.18396061278843553, "grad_norm": 2.578125, "learning_rate": 4.6045053972972654e-05, "loss": 0.8018, "step": 10378 }, { "epoch": 0.18399606482404712, "grad_norm": 2.6875, "learning_rate": 4.604354631346495e-05, "loss": 0.7497, "step": 10380 }, { "epoch": 0.18403151685965868, "grad_norm": 2.640625, "learning_rate": 4.6042038391338174e-05, "loss": 0.7949, "step": 10382 }, { "epoch": 0.18406696889527024, "grad_norm": 2.46875, "learning_rate": 4.6040530206611146e-05, "loss": 0.7979, "step": 10384 }, { "epoch": 0.18410242093088183, "grad_norm": 2.703125, "learning_rate": 4.603902175930267e-05, "loss": 0.8234, "step": 10386 }, { "epoch": 0.1841378729664934, "grad_norm": 2.890625, "learning_rate": 4.603751304943159e-05, "loss": 0.8141, "step": 10388 }, { "epoch": 0.18417332500210495, "grad_norm": 3.171875, "learning_rate": 4.603600407701673e-05, "loss": 0.779, "step": 10390 }, { "epoch": 0.18420877703771654, "grad_norm": 2.671875, "learning_rate": 4.603449484207692e-05, "loss": 0.7836, "step": 10392 }, { "epoch": 0.1842442290733281, "grad_norm": 2.75, "learning_rate": 4.6032985344631e-05, "loss": 0.8044, "step": 10394 }, { "epoch": 0.18427968110893966, "grad_norm": 2.5625, "learning_rate": 4.60314755846978e-05, "loss": 0.8166, "step": 10396 }, { "epoch": 0.18431513314455125, "grad_norm": 2.734375, "learning_rate": 4.602996556229616e-05, "loss": 0.8144, "step": 10398 }, { "epoch": 0.1843505851801628, "grad_norm": 2.765625, "learning_rate": 4.6028455277444936e-05, "loss": 0.7835, "step": 10400 }, { "epoch": 0.18438603721577437, "grad_norm": 2.828125, "learning_rate": 4.602694473016297e-05, "loss": 0.8057, "step": 10402 }, { "epoch": 0.18442148925138596, "grad_norm": 2.875, "learning_rate": 4.6025433920469117e-05, "loss": 0.7827, "step": 10404 }, { "epoch": 0.18445694128699752, "grad_norm": 2.6875, "learning_rate": 4.6023922848382215e-05, "loss": 0.7865, "step": 10406 }, { "epoch": 0.18449239332260908, "grad_norm": 2.96875, "learning_rate": 4.6022411513921146e-05, "loss": 0.8172, "step": 10408 }, { "epoch": 0.18452784535822067, "grad_norm": 2.90625, "learning_rate": 4.602089991710475e-05, "loss": 0.8207, "step": 10410 }, { "epoch": 0.18456329739383223, "grad_norm": 2.90625, "learning_rate": 4.6019388057951916e-05, "loss": 0.8287, "step": 10412 }, { "epoch": 0.1845987494294438, "grad_norm": 2.875, "learning_rate": 4.6017875936481494e-05, "loss": 0.801, "step": 10414 }, { "epoch": 0.18463420146505538, "grad_norm": 2.734375, "learning_rate": 4.601636355271235e-05, "loss": 0.8247, "step": 10416 }, { "epoch": 0.18466965350066694, "grad_norm": 2.765625, "learning_rate": 4.601485090666337e-05, "loss": 0.8076, "step": 10418 }, { "epoch": 0.1847051055362785, "grad_norm": 2.5, "learning_rate": 4.601333799835343e-05, "loss": 0.8205, "step": 10420 }, { "epoch": 0.1847405575718901, "grad_norm": 2.890625, "learning_rate": 4.60118248278014e-05, "loss": 0.7802, "step": 10422 }, { "epoch": 0.18477600960750165, "grad_norm": 2.984375, "learning_rate": 4.601031139502619e-05, "loss": 0.791, "step": 10424 }, { "epoch": 0.1848114616431132, "grad_norm": 2.53125, "learning_rate": 4.6008797700046647e-05, "loss": 0.8082, "step": 10426 }, { "epoch": 0.1848469136787248, "grad_norm": 2.578125, "learning_rate": 4.6007283742881704e-05, "loss": 0.7884, "step": 10428 }, { "epoch": 0.18488236571433636, "grad_norm": 2.75, "learning_rate": 4.6005769523550226e-05, "loss": 0.7965, "step": 10430 }, { "epoch": 0.18491781774994792, "grad_norm": 2.828125, "learning_rate": 4.600425504207112e-05, "loss": 0.7819, "step": 10432 }, { "epoch": 0.1849532697855595, "grad_norm": 2.640625, "learning_rate": 4.600274029846329e-05, "loss": 0.8161, "step": 10434 }, { "epoch": 0.18498872182117107, "grad_norm": 2.65625, "learning_rate": 4.600122529274563e-05, "loss": 0.7533, "step": 10436 }, { "epoch": 0.18502417385678263, "grad_norm": 2.765625, "learning_rate": 4.599971002493706e-05, "loss": 0.7857, "step": 10438 }, { "epoch": 0.18505962589239422, "grad_norm": 2.9375, "learning_rate": 4.599819449505647e-05, "loss": 0.741, "step": 10440 }, { "epoch": 0.18509507792800578, "grad_norm": 2.6875, "learning_rate": 4.5996678703122794e-05, "loss": 0.7814, "step": 10442 }, { "epoch": 0.18513052996361734, "grad_norm": 2.96875, "learning_rate": 4.5995162649154944e-05, "loss": 0.8245, "step": 10444 }, { "epoch": 0.18516598199922893, "grad_norm": 3.21875, "learning_rate": 4.5993646333171837e-05, "loss": 0.8011, "step": 10446 }, { "epoch": 0.1852014340348405, "grad_norm": 2.6875, "learning_rate": 4.599212975519239e-05, "loss": 0.7667, "step": 10448 }, { "epoch": 0.18523688607045205, "grad_norm": 2.84375, "learning_rate": 4.5990612915235545e-05, "loss": 0.8283, "step": 10450 }, { "epoch": 0.18527233810606364, "grad_norm": 2.859375, "learning_rate": 4.598909581332021e-05, "loss": 0.8087, "step": 10452 }, { "epoch": 0.1853077901416752, "grad_norm": 2.671875, "learning_rate": 4.598757844946534e-05, "loss": 0.8038, "step": 10454 }, { "epoch": 0.18534324217728676, "grad_norm": 2.828125, "learning_rate": 4.598606082368986e-05, "loss": 0.8039, "step": 10456 }, { "epoch": 0.18537869421289835, "grad_norm": 2.96875, "learning_rate": 4.5984542936012716e-05, "loss": 0.7779, "step": 10458 }, { "epoch": 0.1854141462485099, "grad_norm": 3.0, "learning_rate": 4.598302478645284e-05, "loss": 0.8191, "step": 10460 }, { "epoch": 0.18544959828412147, "grad_norm": 2.78125, "learning_rate": 4.5981506375029194e-05, "loss": 0.7917, "step": 10462 }, { "epoch": 0.18548505031973306, "grad_norm": 2.71875, "learning_rate": 4.597998770176071e-05, "loss": 0.8076, "step": 10464 }, { "epoch": 0.18552050235534462, "grad_norm": 2.390625, "learning_rate": 4.597846876666635e-05, "loss": 0.7947, "step": 10466 }, { "epoch": 0.18555595439095618, "grad_norm": 2.90625, "learning_rate": 4.597694956976508e-05, "loss": 0.8099, "step": 10468 }, { "epoch": 0.18559140642656777, "grad_norm": 3.046875, "learning_rate": 4.597543011107584e-05, "loss": 0.8352, "step": 10470 }, { "epoch": 0.18562685846217933, "grad_norm": 2.890625, "learning_rate": 4.59739103906176e-05, "loss": 0.7795, "step": 10472 }, { "epoch": 0.1856623104977909, "grad_norm": 2.71875, "learning_rate": 4.597239040840933e-05, "loss": 0.7894, "step": 10474 }, { "epoch": 0.18569776253340248, "grad_norm": 3.046875, "learning_rate": 4.5970870164469995e-05, "loss": 0.8411, "step": 10476 }, { "epoch": 0.18573321456901404, "grad_norm": 2.984375, "learning_rate": 4.5969349658818575e-05, "loss": 0.7486, "step": 10478 }, { "epoch": 0.1857686666046256, "grad_norm": 2.640625, "learning_rate": 4.596782889147403e-05, "loss": 0.8125, "step": 10480 }, { "epoch": 0.1858041186402372, "grad_norm": 2.671875, "learning_rate": 4.5966307862455344e-05, "loss": 0.8195, "step": 10482 }, { "epoch": 0.18583957067584875, "grad_norm": 2.84375, "learning_rate": 4.596478657178151e-05, "loss": 0.8261, "step": 10484 }, { "epoch": 0.1858750227114603, "grad_norm": 2.71875, "learning_rate": 4.5963265019471504e-05, "loss": 0.809, "step": 10486 }, { "epoch": 0.1859104747470719, "grad_norm": 2.9375, "learning_rate": 4.596174320554432e-05, "loss": 0.8152, "step": 10488 }, { "epoch": 0.18594592678268346, "grad_norm": 2.59375, "learning_rate": 4.5960221130018946e-05, "loss": 0.7875, "step": 10490 }, { "epoch": 0.18598137881829502, "grad_norm": 2.859375, "learning_rate": 4.5958698792914364e-05, "loss": 0.8312, "step": 10492 }, { "epoch": 0.1860168308539066, "grad_norm": 2.671875, "learning_rate": 4.595717619424961e-05, "loss": 0.746, "step": 10494 }, { "epoch": 0.18605228288951817, "grad_norm": 2.796875, "learning_rate": 4.595565333404365e-05, "loss": 0.8165, "step": 10496 }, { "epoch": 0.18608773492512973, "grad_norm": 2.625, "learning_rate": 4.59541302123155e-05, "loss": 0.7756, "step": 10498 }, { "epoch": 0.18612318696074132, "grad_norm": 2.3125, "learning_rate": 4.595260682908417e-05, "loss": 0.7439, "step": 10500 }, { "epoch": 0.18615863899635288, "grad_norm": 2.875, "learning_rate": 4.595108318436867e-05, "loss": 0.8294, "step": 10502 }, { "epoch": 0.18619409103196444, "grad_norm": 2.703125, "learning_rate": 4.594955927818802e-05, "loss": 0.7616, "step": 10504 }, { "epoch": 0.186229543067576, "grad_norm": 2.6875, "learning_rate": 4.5948035110561236e-05, "loss": 0.7847, "step": 10506 }, { "epoch": 0.1862649951031876, "grad_norm": 2.890625, "learning_rate": 4.5946510681507326e-05, "loss": 0.8121, "step": 10508 }, { "epoch": 0.18630044713879915, "grad_norm": 2.8125, "learning_rate": 4.5944985991045333e-05, "loss": 0.8213, "step": 10510 }, { "epoch": 0.1863358991744107, "grad_norm": 2.890625, "learning_rate": 4.594346103919428e-05, "loss": 0.8258, "step": 10512 }, { "epoch": 0.1863713512100223, "grad_norm": 2.859375, "learning_rate": 4.594193582597319e-05, "loss": 0.8081, "step": 10514 }, { "epoch": 0.18640680324563386, "grad_norm": 2.796875, "learning_rate": 4.594041035140111e-05, "loss": 0.7971, "step": 10516 }, { "epoch": 0.18644225528124542, "grad_norm": 2.5625, "learning_rate": 4.593888461549706e-05, "loss": 0.7671, "step": 10518 }, { "epoch": 0.186477707316857, "grad_norm": 2.796875, "learning_rate": 4.59373586182801e-05, "loss": 0.8175, "step": 10520 }, { "epoch": 0.18651315935246857, "grad_norm": 2.96875, "learning_rate": 4.593583235976926e-05, "loss": 0.7981, "step": 10522 }, { "epoch": 0.18654861138808013, "grad_norm": 2.953125, "learning_rate": 4.593430583998359e-05, "loss": 0.8102, "step": 10524 }, { "epoch": 0.18658406342369172, "grad_norm": 2.921875, "learning_rate": 4.5932779058942154e-05, "loss": 0.76, "step": 10526 }, { "epoch": 0.18661951545930328, "grad_norm": 2.421875, "learning_rate": 4.5931252016663985e-05, "loss": 0.8077, "step": 10528 }, { "epoch": 0.18665496749491484, "grad_norm": 2.953125, "learning_rate": 4.592972471316815e-05, "loss": 0.8312, "step": 10530 }, { "epoch": 0.18669041953052642, "grad_norm": 2.828125, "learning_rate": 4.5928197148473726e-05, "loss": 0.7986, "step": 10532 }, { "epoch": 0.18672587156613799, "grad_norm": 2.953125, "learning_rate": 4.5926669322599746e-05, "loss": 0.8203, "step": 10534 }, { "epoch": 0.18676132360174955, "grad_norm": 2.875, "learning_rate": 4.5925141235565294e-05, "loss": 0.776, "step": 10536 }, { "epoch": 0.18679677563736113, "grad_norm": 2.875, "learning_rate": 4.592361288738945e-05, "loss": 0.8089, "step": 10538 }, { "epoch": 0.1868322276729727, "grad_norm": 2.875, "learning_rate": 4.592208427809125e-05, "loss": 0.7796, "step": 10540 }, { "epoch": 0.18686767970858426, "grad_norm": 2.890625, "learning_rate": 4.592055540768981e-05, "loss": 0.7896, "step": 10542 }, { "epoch": 0.18690313174419584, "grad_norm": 2.40625, "learning_rate": 4.59190262762042e-05, "loss": 0.8076, "step": 10544 }, { "epoch": 0.1869385837798074, "grad_norm": 2.859375, "learning_rate": 4.591749688365349e-05, "loss": 0.8327, "step": 10546 }, { "epoch": 0.18697403581541897, "grad_norm": 2.671875, "learning_rate": 4.5915967230056786e-05, "loss": 0.766, "step": 10548 }, { "epoch": 0.18700948785103055, "grad_norm": 2.8125, "learning_rate": 4.591443731543316e-05, "loss": 0.8005, "step": 10550 }, { "epoch": 0.18704493988664211, "grad_norm": 2.359375, "learning_rate": 4.591290713980172e-05, "loss": 0.8138, "step": 10552 }, { "epoch": 0.18708039192225367, "grad_norm": 2.546875, "learning_rate": 4.591137670318155e-05, "loss": 0.8359, "step": 10554 }, { "epoch": 0.18711584395786526, "grad_norm": 2.46875, "learning_rate": 4.590984600559175e-05, "loss": 0.8147, "step": 10556 }, { "epoch": 0.18715129599347682, "grad_norm": 2.90625, "learning_rate": 4.590831504705143e-05, "loss": 0.8025, "step": 10558 }, { "epoch": 0.18718674802908838, "grad_norm": 2.609375, "learning_rate": 4.590678382757969e-05, "loss": 0.8017, "step": 10560 }, { "epoch": 0.18722220006469997, "grad_norm": 2.78125, "learning_rate": 4.590525234719565e-05, "loss": 0.7923, "step": 10562 }, { "epoch": 0.18725765210031153, "grad_norm": 2.671875, "learning_rate": 4.590372060591841e-05, "loss": 0.7839, "step": 10564 }, { "epoch": 0.1872931041359231, "grad_norm": 2.796875, "learning_rate": 4.5902188603767094e-05, "loss": 0.814, "step": 10566 }, { "epoch": 0.18732855617153468, "grad_norm": 2.671875, "learning_rate": 4.590065634076082e-05, "loss": 0.8513, "step": 10568 }, { "epoch": 0.18736400820714624, "grad_norm": 2.71875, "learning_rate": 4.58991238169187e-05, "loss": 0.7772, "step": 10570 }, { "epoch": 0.1873994602427578, "grad_norm": 3.0, "learning_rate": 4.589759103225987e-05, "loss": 0.8171, "step": 10572 }, { "epoch": 0.1874349122783694, "grad_norm": 2.703125, "learning_rate": 4.589605798680346e-05, "loss": 0.8193, "step": 10574 }, { "epoch": 0.18747036431398095, "grad_norm": 2.9375, "learning_rate": 4.5894524680568596e-05, "loss": 0.8185, "step": 10576 }, { "epoch": 0.1875058163495925, "grad_norm": 2.71875, "learning_rate": 4.589299111357441e-05, "loss": 0.788, "step": 10578 }, { "epoch": 0.1875412683852041, "grad_norm": 2.484375, "learning_rate": 4.589145728584006e-05, "loss": 0.7515, "step": 10580 }, { "epoch": 0.18757672042081566, "grad_norm": 2.984375, "learning_rate": 4.588992319738466e-05, "loss": 0.7798, "step": 10582 }, { "epoch": 0.18761217245642722, "grad_norm": 2.453125, "learning_rate": 4.588838884822738e-05, "loss": 0.7761, "step": 10584 }, { "epoch": 0.1876476244920388, "grad_norm": 3.0, "learning_rate": 4.5886854238387364e-05, "loss": 0.8103, "step": 10586 }, { "epoch": 0.18768307652765037, "grad_norm": 2.8125, "learning_rate": 4.588531936788375e-05, "loss": 0.8216, "step": 10588 }, { "epoch": 0.18771852856326193, "grad_norm": 2.671875, "learning_rate": 4.588378423673569e-05, "loss": 0.8116, "step": 10590 }, { "epoch": 0.18775398059887352, "grad_norm": 2.765625, "learning_rate": 4.588224884496237e-05, "loss": 0.8287, "step": 10592 }, { "epoch": 0.18778943263448508, "grad_norm": 2.59375, "learning_rate": 4.588071319258293e-05, "loss": 0.7659, "step": 10594 }, { "epoch": 0.18782488467009664, "grad_norm": 2.5, "learning_rate": 4.587917727961652e-05, "loss": 0.8064, "step": 10596 }, { "epoch": 0.18786033670570823, "grad_norm": 2.75, "learning_rate": 4.587764110608235e-05, "loss": 0.7971, "step": 10598 }, { "epoch": 0.1878957887413198, "grad_norm": 2.6875, "learning_rate": 4.5876104671999556e-05, "loss": 0.8327, "step": 10600 }, { "epoch": 0.18793124077693135, "grad_norm": 2.53125, "learning_rate": 4.5874567977387326e-05, "loss": 0.7434, "step": 10602 }, { "epoch": 0.18796669281254294, "grad_norm": 2.625, "learning_rate": 4.5873031022264834e-05, "loss": 0.8094, "step": 10604 }, { "epoch": 0.1880021448481545, "grad_norm": 2.8125, "learning_rate": 4.5871493806651265e-05, "loss": 0.7896, "step": 10606 }, { "epoch": 0.18803759688376606, "grad_norm": 2.59375, "learning_rate": 4.58699563305658e-05, "loss": 0.792, "step": 10608 }, { "epoch": 0.18807304891937765, "grad_norm": 2.75, "learning_rate": 4.586841859402763e-05, "loss": 0.8111, "step": 10610 }, { "epoch": 0.1881085009549892, "grad_norm": 2.984375, "learning_rate": 4.586688059705593e-05, "loss": 0.8222, "step": 10612 }, { "epoch": 0.18814395299060077, "grad_norm": 3.140625, "learning_rate": 4.586534233966992e-05, "loss": 0.8149, "step": 10614 }, { "epoch": 0.18817940502621236, "grad_norm": 2.625, "learning_rate": 4.5863803821888775e-05, "loss": 0.7729, "step": 10616 }, { "epoch": 0.18821485706182392, "grad_norm": 2.65625, "learning_rate": 4.586226504373171e-05, "loss": 0.8154, "step": 10618 }, { "epoch": 0.18825030909743548, "grad_norm": 2.6875, "learning_rate": 4.5860726005217924e-05, "loss": 0.8175, "step": 10620 }, { "epoch": 0.18828576113304707, "grad_norm": 2.828125, "learning_rate": 4.585918670636662e-05, "loss": 0.8531, "step": 10622 }, { "epoch": 0.18832121316865863, "grad_norm": 2.765625, "learning_rate": 4.5857647147197e-05, "loss": 0.7576, "step": 10624 }, { "epoch": 0.1883566652042702, "grad_norm": 2.578125, "learning_rate": 4.5856107327728305e-05, "loss": 0.7785, "step": 10626 }, { "epoch": 0.18839211723988178, "grad_norm": 2.8125, "learning_rate": 4.5854567247979727e-05, "loss": 0.7932, "step": 10628 }, { "epoch": 0.18842756927549334, "grad_norm": 2.890625, "learning_rate": 4.5853026907970484e-05, "loss": 0.7907, "step": 10630 }, { "epoch": 0.1884630213111049, "grad_norm": 2.671875, "learning_rate": 4.585148630771983e-05, "loss": 0.8394, "step": 10632 }, { "epoch": 0.1884984733467165, "grad_norm": 2.703125, "learning_rate": 4.584994544724695e-05, "loss": 0.8028, "step": 10634 }, { "epoch": 0.18853392538232805, "grad_norm": 2.578125, "learning_rate": 4.5848404326571104e-05, "loss": 0.8247, "step": 10636 }, { "epoch": 0.1885693774179396, "grad_norm": 3.171875, "learning_rate": 4.584686294571151e-05, "loss": 0.7953, "step": 10638 }, { "epoch": 0.1886048294535512, "grad_norm": 2.640625, "learning_rate": 4.584532130468741e-05, "loss": 0.8111, "step": 10640 }, { "epoch": 0.18864028148916276, "grad_norm": 2.578125, "learning_rate": 4.584377940351804e-05, "loss": 0.7983, "step": 10642 }, { "epoch": 0.18867573352477432, "grad_norm": 2.859375, "learning_rate": 4.584223724222265e-05, "loss": 0.8223, "step": 10644 }, { "epoch": 0.1887111855603859, "grad_norm": 2.4375, "learning_rate": 4.5840694820820476e-05, "loss": 0.8173, "step": 10646 }, { "epoch": 0.18874663759599747, "grad_norm": 2.625, "learning_rate": 4.583915213933077e-05, "loss": 0.7821, "step": 10648 }, { "epoch": 0.18878208963160903, "grad_norm": 2.703125, "learning_rate": 4.583760919777279e-05, "loss": 0.8053, "step": 10650 }, { "epoch": 0.18881754166722062, "grad_norm": 2.859375, "learning_rate": 4.583606599616579e-05, "loss": 0.7501, "step": 10652 }, { "epoch": 0.18885299370283218, "grad_norm": 2.5625, "learning_rate": 4.5834522534529015e-05, "loss": 0.771, "step": 10654 }, { "epoch": 0.18888844573844374, "grad_norm": 2.53125, "learning_rate": 4.5832978812881744e-05, "loss": 0.8112, "step": 10656 }, { "epoch": 0.18892389777405533, "grad_norm": 2.90625, "learning_rate": 4.583143483124324e-05, "loss": 0.8547, "step": 10658 }, { "epoch": 0.1889593498096669, "grad_norm": 2.46875, "learning_rate": 4.582989058963276e-05, "loss": 0.806, "step": 10660 }, { "epoch": 0.18899480184527845, "grad_norm": 2.875, "learning_rate": 4.5828346088069596e-05, "loss": 0.7513, "step": 10662 }, { "epoch": 0.18903025388089004, "grad_norm": 2.6875, "learning_rate": 4.5826801326573006e-05, "loss": 0.8386, "step": 10664 }, { "epoch": 0.1890657059165016, "grad_norm": 2.59375, "learning_rate": 4.582525630516227e-05, "loss": 0.7845, "step": 10666 }, { "epoch": 0.18910115795211316, "grad_norm": 2.734375, "learning_rate": 4.582371102385667e-05, "loss": 0.8075, "step": 10668 }, { "epoch": 0.18913660998772475, "grad_norm": 2.78125, "learning_rate": 4.5822165482675505e-05, "loss": 0.8587, "step": 10670 }, { "epoch": 0.1891720620233363, "grad_norm": 2.75, "learning_rate": 4.5820619681638046e-05, "loss": 0.7742, "step": 10672 }, { "epoch": 0.18920751405894787, "grad_norm": 2.625, "learning_rate": 4.581907362076359e-05, "loss": 0.839, "step": 10674 }, { "epoch": 0.18924296609455943, "grad_norm": 2.6875, "learning_rate": 4.581752730007143e-05, "loss": 0.8301, "step": 10676 }, { "epoch": 0.18927841813017102, "grad_norm": 2.671875, "learning_rate": 4.5815980719580864e-05, "loss": 0.7948, "step": 10678 }, { "epoch": 0.18931387016578258, "grad_norm": 2.59375, "learning_rate": 4.5814433879311194e-05, "loss": 0.8258, "step": 10680 }, { "epoch": 0.18934932220139414, "grad_norm": 3.03125, "learning_rate": 4.581288677928173e-05, "loss": 0.8405, "step": 10682 }, { "epoch": 0.18938477423700573, "grad_norm": 2.734375, "learning_rate": 4.581133941951177e-05, "loss": 0.8275, "step": 10684 }, { "epoch": 0.1894202262726173, "grad_norm": 2.828125, "learning_rate": 4.580979180002063e-05, "loss": 0.7894, "step": 10686 }, { "epoch": 0.18945567830822885, "grad_norm": 2.75, "learning_rate": 4.580824392082762e-05, "loss": 0.7929, "step": 10688 }, { "epoch": 0.18949113034384044, "grad_norm": 2.625, "learning_rate": 4.580669578195206e-05, "loss": 0.8554, "step": 10690 }, { "epoch": 0.189526582379452, "grad_norm": 2.75, "learning_rate": 4.580514738341328e-05, "loss": 0.7792, "step": 10692 }, { "epoch": 0.18956203441506356, "grad_norm": 2.734375, "learning_rate": 4.580359872523058e-05, "loss": 0.8322, "step": 10694 }, { "epoch": 0.18959748645067515, "grad_norm": 2.71875, "learning_rate": 4.580204980742331e-05, "loss": 0.8135, "step": 10696 }, { "epoch": 0.1896329384862867, "grad_norm": 2.6875, "learning_rate": 4.580050063001079e-05, "loss": 0.8146, "step": 10698 }, { "epoch": 0.18966839052189827, "grad_norm": 2.78125, "learning_rate": 4.579895119301235e-05, "loss": 0.8429, "step": 10700 }, { "epoch": 0.18970384255750986, "grad_norm": 2.703125, "learning_rate": 4.579740149644734e-05, "loss": 0.7867, "step": 10702 }, { "epoch": 0.18973929459312142, "grad_norm": 2.578125, "learning_rate": 4.579585154033509e-05, "loss": 0.8102, "step": 10704 }, { "epoch": 0.18977474662873298, "grad_norm": 2.4375, "learning_rate": 4.5794301324694934e-05, "loss": 0.8017, "step": 10706 }, { "epoch": 0.18981019866434456, "grad_norm": 2.625, "learning_rate": 4.579275084954623e-05, "loss": 0.7597, "step": 10708 }, { "epoch": 0.18984565069995613, "grad_norm": 2.65625, "learning_rate": 4.579120011490834e-05, "loss": 0.8081, "step": 10710 }, { "epoch": 0.1898811027355677, "grad_norm": 2.828125, "learning_rate": 4.5789649120800587e-05, "loss": 0.7912, "step": 10712 }, { "epoch": 0.18991655477117927, "grad_norm": 2.578125, "learning_rate": 4.5788097867242355e-05, "loss": 0.7823, "step": 10714 }, { "epoch": 0.18995200680679084, "grad_norm": 2.890625, "learning_rate": 4.578654635425298e-05, "loss": 0.8361, "step": 10716 }, { "epoch": 0.1899874588424024, "grad_norm": 2.609375, "learning_rate": 4.578499458185185e-05, "loss": 0.7681, "step": 10718 }, { "epoch": 0.19002291087801398, "grad_norm": 2.765625, "learning_rate": 4.578344255005831e-05, "loss": 0.8167, "step": 10720 }, { "epoch": 0.19005836291362554, "grad_norm": 2.640625, "learning_rate": 4.578189025889173e-05, "loss": 0.8504, "step": 10722 }, { "epoch": 0.1900938149492371, "grad_norm": 2.6875, "learning_rate": 4.578033770837149e-05, "loss": 0.7749, "step": 10724 }, { "epoch": 0.1901292669848487, "grad_norm": 2.84375, "learning_rate": 4.577878489851697e-05, "loss": 0.8276, "step": 10726 }, { "epoch": 0.19016471902046025, "grad_norm": 2.59375, "learning_rate": 4.577723182934754e-05, "loss": 0.8341, "step": 10728 }, { "epoch": 0.19020017105607182, "grad_norm": 2.75, "learning_rate": 4.577567850088258e-05, "loss": 0.7664, "step": 10730 }, { "epoch": 0.1902356230916834, "grad_norm": 2.8125, "learning_rate": 4.577412491314149e-05, "loss": 0.8106, "step": 10732 }, { "epoch": 0.19027107512729496, "grad_norm": 2.484375, "learning_rate": 4.577257106614364e-05, "loss": 0.8244, "step": 10734 }, { "epoch": 0.19030652716290652, "grad_norm": 2.921875, "learning_rate": 4.577101695990843e-05, "loss": 0.8214, "step": 10736 }, { "epoch": 0.1903419791985181, "grad_norm": 2.875, "learning_rate": 4.5769462594455256e-05, "loss": 0.8786, "step": 10738 }, { "epoch": 0.19037743123412967, "grad_norm": 2.546875, "learning_rate": 4.5767907969803514e-05, "loss": 0.8052, "step": 10740 }, { "epoch": 0.19041288326974123, "grad_norm": 2.71875, "learning_rate": 4.5766353085972605e-05, "loss": 0.8261, "step": 10742 }, { "epoch": 0.19044833530535282, "grad_norm": 2.828125, "learning_rate": 4.5764797942981944e-05, "loss": 0.7922, "step": 10744 }, { "epoch": 0.19048378734096438, "grad_norm": 2.828125, "learning_rate": 4.576324254085092e-05, "loss": 0.7848, "step": 10746 }, { "epoch": 0.19051923937657594, "grad_norm": 2.828125, "learning_rate": 4.576168687959895e-05, "loss": 0.8277, "step": 10748 }, { "epoch": 0.19055469141218753, "grad_norm": 2.625, "learning_rate": 4.5760130959245464e-05, "loss": 0.8246, "step": 10750 }, { "epoch": 0.1905901434477991, "grad_norm": 2.59375, "learning_rate": 4.575857477980986e-05, "loss": 0.7686, "step": 10752 }, { "epoch": 0.19062559548341065, "grad_norm": 2.828125, "learning_rate": 4.5757018341311565e-05, "loss": 0.7949, "step": 10754 }, { "epoch": 0.19066104751902224, "grad_norm": 2.671875, "learning_rate": 4.575546164377e-05, "loss": 0.8289, "step": 10756 }, { "epoch": 0.1906964995546338, "grad_norm": 2.71875, "learning_rate": 4.575390468720461e-05, "loss": 0.8116, "step": 10758 }, { "epoch": 0.19073195159024536, "grad_norm": 2.609375, "learning_rate": 4.5752347471634804e-05, "loss": 0.8185, "step": 10760 }, { "epoch": 0.19076740362585695, "grad_norm": 2.84375, "learning_rate": 4.5750789997080035e-05, "loss": 0.7798, "step": 10762 }, { "epoch": 0.1908028556614685, "grad_norm": 2.84375, "learning_rate": 4.5749232263559716e-05, "loss": 0.8127, "step": 10764 }, { "epoch": 0.19083830769708007, "grad_norm": 2.9375, "learning_rate": 4.5747674271093306e-05, "loss": 0.8089, "step": 10766 }, { "epoch": 0.19087375973269166, "grad_norm": 2.8125, "learning_rate": 4.5746116019700234e-05, "loss": 0.7595, "step": 10768 }, { "epoch": 0.19090921176830322, "grad_norm": 2.5625, "learning_rate": 4.574455750939997e-05, "loss": 0.7865, "step": 10770 }, { "epoch": 0.19094466380391478, "grad_norm": 2.8125, "learning_rate": 4.574299874021194e-05, "loss": 0.8002, "step": 10772 }, { "epoch": 0.19098011583952637, "grad_norm": 2.671875, "learning_rate": 4.574143971215561e-05, "loss": 0.7734, "step": 10774 }, { "epoch": 0.19101556787513793, "grad_norm": 2.703125, "learning_rate": 4.573988042525042e-05, "loss": 0.794, "step": 10776 }, { "epoch": 0.1910510199107495, "grad_norm": 2.921875, "learning_rate": 4.573832087951586e-05, "loss": 0.7835, "step": 10778 }, { "epoch": 0.19108647194636108, "grad_norm": 2.75, "learning_rate": 4.5736761074971366e-05, "loss": 0.814, "step": 10780 }, { "epoch": 0.19112192398197264, "grad_norm": 2.796875, "learning_rate": 4.573520101163641e-05, "loss": 0.8026, "step": 10782 }, { "epoch": 0.1911573760175842, "grad_norm": 2.859375, "learning_rate": 4.5733640689530465e-05, "loss": 0.7686, "step": 10784 }, { "epoch": 0.1911928280531958, "grad_norm": 2.75, "learning_rate": 4.5732080108673007e-05, "loss": 0.8028, "step": 10786 }, { "epoch": 0.19122828008880735, "grad_norm": 2.640625, "learning_rate": 4.573051926908351e-05, "loss": 0.8185, "step": 10788 }, { "epoch": 0.1912637321244189, "grad_norm": 3.25, "learning_rate": 4.5728958170781446e-05, "loss": 0.8286, "step": 10790 }, { "epoch": 0.1912991841600305, "grad_norm": 2.578125, "learning_rate": 4.57273968137863e-05, "loss": 0.8065, "step": 10792 }, { "epoch": 0.19133463619564206, "grad_norm": 2.640625, "learning_rate": 4.572583519811756e-05, "loss": 0.8441, "step": 10794 }, { "epoch": 0.19137008823125362, "grad_norm": 2.546875, "learning_rate": 4.572427332379472e-05, "loss": 0.7712, "step": 10796 }, { "epoch": 0.1914055402668652, "grad_norm": 2.65625, "learning_rate": 4.572271119083726e-05, "loss": 0.7992, "step": 10798 }, { "epoch": 0.19144099230247677, "grad_norm": 3.109375, "learning_rate": 4.5721148799264676e-05, "loss": 0.7992, "step": 10800 }, { "epoch": 0.19147644433808833, "grad_norm": 2.578125, "learning_rate": 4.571958614909648e-05, "loss": 0.7961, "step": 10802 }, { "epoch": 0.19151189637369992, "grad_norm": 2.8125, "learning_rate": 4.571802324035216e-05, "loss": 0.7667, "step": 10804 }, { "epoch": 0.19154734840931148, "grad_norm": 2.625, "learning_rate": 4.5716460073051224e-05, "loss": 0.788, "step": 10806 }, { "epoch": 0.19158280044492304, "grad_norm": 2.53125, "learning_rate": 4.571489664721318e-05, "loss": 0.7764, "step": 10808 }, { "epoch": 0.19161825248053463, "grad_norm": 2.796875, "learning_rate": 4.571333296285755e-05, "loss": 0.7794, "step": 10810 }, { "epoch": 0.1916537045161462, "grad_norm": 2.65625, "learning_rate": 4.571176902000383e-05, "loss": 0.7814, "step": 10812 }, { "epoch": 0.19168915655175775, "grad_norm": 2.921875, "learning_rate": 4.5710204818671546e-05, "loss": 0.825, "step": 10814 }, { "epoch": 0.19172460858736934, "grad_norm": 3.296875, "learning_rate": 4.570864035888022e-05, "loss": 0.8684, "step": 10816 }, { "epoch": 0.1917600606229809, "grad_norm": 3.015625, "learning_rate": 4.570707564064938e-05, "loss": 0.7767, "step": 10818 }, { "epoch": 0.19179551265859246, "grad_norm": 2.859375, "learning_rate": 4.570551066399854e-05, "loss": 0.8271, "step": 10820 }, { "epoch": 0.19183096469420405, "grad_norm": 2.4375, "learning_rate": 4.570394542894725e-05, "loss": 0.7728, "step": 10822 }, { "epoch": 0.1918664167298156, "grad_norm": 2.765625, "learning_rate": 4.5702379935515026e-05, "loss": 0.7949, "step": 10824 }, { "epoch": 0.19190186876542717, "grad_norm": 2.65625, "learning_rate": 4.570081418372142e-05, "loss": 0.8263, "step": 10826 }, { "epoch": 0.19193732080103876, "grad_norm": 2.953125, "learning_rate": 4.569924817358596e-05, "loss": 0.7999, "step": 10828 }, { "epoch": 0.19197277283665032, "grad_norm": 2.515625, "learning_rate": 4.5697681905128195e-05, "loss": 0.794, "step": 10830 }, { "epoch": 0.19200822487226188, "grad_norm": 2.515625, "learning_rate": 4.569611537836767e-05, "loss": 0.7843, "step": 10832 }, { "epoch": 0.19204367690787347, "grad_norm": 2.84375, "learning_rate": 4.569454859332394e-05, "loss": 0.806, "step": 10834 }, { "epoch": 0.19207912894348503, "grad_norm": 2.6875, "learning_rate": 4.569298155001655e-05, "loss": 0.8376, "step": 10836 }, { "epoch": 0.1921145809790966, "grad_norm": 2.734375, "learning_rate": 4.569141424846506e-05, "loss": 0.8096, "step": 10838 }, { "epoch": 0.19215003301470818, "grad_norm": 2.625, "learning_rate": 4.568984668868903e-05, "loss": 0.8156, "step": 10840 }, { "epoch": 0.19218548505031974, "grad_norm": 2.828125, "learning_rate": 4.568827887070802e-05, "loss": 0.7778, "step": 10842 }, { "epoch": 0.1922209370859313, "grad_norm": 2.796875, "learning_rate": 4.5686710794541595e-05, "loss": 0.808, "step": 10844 }, { "epoch": 0.19225638912154286, "grad_norm": 3.1875, "learning_rate": 4.568514246020934e-05, "loss": 0.7928, "step": 10846 }, { "epoch": 0.19229184115715445, "grad_norm": 2.828125, "learning_rate": 4.568357386773081e-05, "loss": 0.7913, "step": 10848 }, { "epoch": 0.192327293192766, "grad_norm": 2.671875, "learning_rate": 4.5682005017125584e-05, "loss": 0.7929, "step": 10850 }, { "epoch": 0.19236274522837757, "grad_norm": 2.796875, "learning_rate": 4.568043590841325e-05, "loss": 0.809, "step": 10852 }, { "epoch": 0.19239819726398916, "grad_norm": 2.9375, "learning_rate": 4.567886654161338e-05, "loss": 0.7859, "step": 10854 }, { "epoch": 0.19243364929960072, "grad_norm": 2.5625, "learning_rate": 4.567729691674556e-05, "loss": 0.7684, "step": 10856 }, { "epoch": 0.19246910133521228, "grad_norm": 2.8125, "learning_rate": 4.5675727033829386e-05, "loss": 0.8059, "step": 10858 }, { "epoch": 0.19250455337082387, "grad_norm": 2.8125, "learning_rate": 4.567415689288444e-05, "loss": 0.8213, "step": 10860 }, { "epoch": 0.19254000540643543, "grad_norm": 2.6875, "learning_rate": 4.5672586493930325e-05, "loss": 0.7972, "step": 10862 }, { "epoch": 0.192575457442047, "grad_norm": 2.625, "learning_rate": 4.567101583698663e-05, "loss": 0.8258, "step": 10864 }, { "epoch": 0.19261090947765858, "grad_norm": 2.921875, "learning_rate": 4.5669444922072965e-05, "loss": 0.8301, "step": 10866 }, { "epoch": 0.19264636151327014, "grad_norm": 2.671875, "learning_rate": 4.5667873749208946e-05, "loss": 0.8251, "step": 10868 }, { "epoch": 0.1926818135488817, "grad_norm": 2.5625, "learning_rate": 4.566630231841416e-05, "loss": 0.8066, "step": 10870 }, { "epoch": 0.19271726558449329, "grad_norm": 2.546875, "learning_rate": 4.566473062970821e-05, "loss": 0.7789, "step": 10872 }, { "epoch": 0.19275271762010485, "grad_norm": 2.6875, "learning_rate": 4.566315868311074e-05, "loss": 0.8069, "step": 10874 }, { "epoch": 0.1927881696557164, "grad_norm": 2.765625, "learning_rate": 4.5661586478641356e-05, "loss": 0.7928, "step": 10876 }, { "epoch": 0.192823621691328, "grad_norm": 2.515625, "learning_rate": 4.5660014016319674e-05, "loss": 0.7653, "step": 10878 }, { "epoch": 0.19285907372693956, "grad_norm": 2.921875, "learning_rate": 4.5658441296165316e-05, "loss": 0.8492, "step": 10880 }, { "epoch": 0.19289452576255112, "grad_norm": 2.75, "learning_rate": 4.5656868318197914e-05, "loss": 0.7798, "step": 10882 }, { "epoch": 0.1929299777981627, "grad_norm": 2.6875, "learning_rate": 4.56552950824371e-05, "loss": 0.7742, "step": 10884 }, { "epoch": 0.19296542983377427, "grad_norm": 2.703125, "learning_rate": 4.5653721588902506e-05, "loss": 0.779, "step": 10886 }, { "epoch": 0.19300088186938583, "grad_norm": 2.734375, "learning_rate": 4.565214783761377e-05, "loss": 0.8233, "step": 10888 }, { "epoch": 0.19303633390499741, "grad_norm": 2.8125, "learning_rate": 4.5650573828590525e-05, "loss": 0.8177, "step": 10890 }, { "epoch": 0.19307178594060898, "grad_norm": 2.625, "learning_rate": 4.5648999561852424e-05, "loss": 0.8479, "step": 10892 }, { "epoch": 0.19310723797622054, "grad_norm": 2.96875, "learning_rate": 4.564742503741911e-05, "loss": 0.8367, "step": 10894 }, { "epoch": 0.19314269001183212, "grad_norm": 2.890625, "learning_rate": 4.564585025531023e-05, "loss": 0.7768, "step": 10896 }, { "epoch": 0.19317814204744369, "grad_norm": 2.75, "learning_rate": 4.564427521554544e-05, "loss": 0.8172, "step": 10898 }, { "epoch": 0.19321359408305525, "grad_norm": 2.671875, "learning_rate": 4.564269991814439e-05, "loss": 0.8101, "step": 10900 }, { "epoch": 0.19324904611866683, "grad_norm": 2.859375, "learning_rate": 4.564112436312675e-05, "loss": 0.8061, "step": 10902 }, { "epoch": 0.1932844981542784, "grad_norm": 2.828125, "learning_rate": 4.563954855051218e-05, "loss": 0.8271, "step": 10904 }, { "epoch": 0.19331995018988996, "grad_norm": 2.796875, "learning_rate": 4.563797248032034e-05, "loss": 0.7935, "step": 10906 }, { "epoch": 0.19335540222550154, "grad_norm": 2.578125, "learning_rate": 4.5636396152570906e-05, "loss": 0.8145, "step": 10908 }, { "epoch": 0.1933908542611131, "grad_norm": 2.890625, "learning_rate": 4.5634819567283536e-05, "loss": 0.8593, "step": 10910 }, { "epoch": 0.19342630629672466, "grad_norm": 2.765625, "learning_rate": 4.563324272447792e-05, "loss": 0.7838, "step": 10912 }, { "epoch": 0.19346175833233625, "grad_norm": 2.734375, "learning_rate": 4.563166562417374e-05, "loss": 0.8105, "step": 10914 }, { "epoch": 0.19349721036794781, "grad_norm": 2.984375, "learning_rate": 4.563008826639066e-05, "loss": 0.8196, "step": 10916 }, { "epoch": 0.19353266240355937, "grad_norm": 2.6875, "learning_rate": 4.5628510651148385e-05, "loss": 0.8281, "step": 10918 }, { "epoch": 0.19356811443917096, "grad_norm": 2.625, "learning_rate": 4.562693277846658e-05, "loss": 0.7671, "step": 10920 }, { "epoch": 0.19360356647478252, "grad_norm": 3.0625, "learning_rate": 4.562535464836496e-05, "loss": 0.8377, "step": 10922 }, { "epoch": 0.19363901851039408, "grad_norm": 2.578125, "learning_rate": 4.562377626086321e-05, "loss": 0.7881, "step": 10924 }, { "epoch": 0.19367447054600567, "grad_norm": 2.8125, "learning_rate": 4.5622197615981025e-05, "loss": 0.815, "step": 10926 }, { "epoch": 0.19370992258161723, "grad_norm": 2.640625, "learning_rate": 4.5620618713738114e-05, "loss": 0.7683, "step": 10928 }, { "epoch": 0.1937453746172288, "grad_norm": 2.6875, "learning_rate": 4.561903955415417e-05, "loss": 0.7652, "step": 10930 }, { "epoch": 0.19378082665284038, "grad_norm": 2.578125, "learning_rate": 4.5617460137248915e-05, "loss": 0.8001, "step": 10932 }, { "epoch": 0.19381627868845194, "grad_norm": 2.65625, "learning_rate": 4.5615880463042036e-05, "loss": 0.8007, "step": 10934 }, { "epoch": 0.1938517307240635, "grad_norm": 2.84375, "learning_rate": 4.561430053155328e-05, "loss": 0.7932, "step": 10936 }, { "epoch": 0.1938871827596751, "grad_norm": 2.625, "learning_rate": 4.561272034280234e-05, "loss": 0.7869, "step": 10938 }, { "epoch": 0.19392263479528665, "grad_norm": 2.8125, "learning_rate": 4.561113989680894e-05, "loss": 0.8114, "step": 10940 }, { "epoch": 0.1939580868308982, "grad_norm": 2.578125, "learning_rate": 4.560955919359281e-05, "loss": 0.7966, "step": 10942 }, { "epoch": 0.1939935388665098, "grad_norm": 2.515625, "learning_rate": 4.560797823317368e-05, "loss": 0.8255, "step": 10944 }, { "epoch": 0.19402899090212136, "grad_norm": 2.875, "learning_rate": 4.560639701557127e-05, "loss": 0.8436, "step": 10946 }, { "epoch": 0.19406444293773292, "grad_norm": 2.515625, "learning_rate": 4.560481554080531e-05, "loss": 0.7665, "step": 10948 }, { "epoch": 0.1940998949733445, "grad_norm": 2.734375, "learning_rate": 4.5603233808895554e-05, "loss": 0.7843, "step": 10950 }, { "epoch": 0.19413534700895607, "grad_norm": 2.8125, "learning_rate": 4.560165181986172e-05, "loss": 0.8479, "step": 10952 }, { "epoch": 0.19417079904456763, "grad_norm": 3.046875, "learning_rate": 4.5600069573723577e-05, "loss": 0.7911, "step": 10954 }, { "epoch": 0.19420625108017922, "grad_norm": 2.375, "learning_rate": 4.559848707050085e-05, "loss": 0.7645, "step": 10956 }, { "epoch": 0.19424170311579078, "grad_norm": 2.78125, "learning_rate": 4.559690431021329e-05, "loss": 0.8018, "step": 10958 }, { "epoch": 0.19427715515140234, "grad_norm": 2.6875, "learning_rate": 4.559532129288066e-05, "loss": 0.8233, "step": 10960 }, { "epoch": 0.19431260718701393, "grad_norm": 2.515625, "learning_rate": 4.559373801852271e-05, "loss": 0.809, "step": 10962 }, { "epoch": 0.1943480592226255, "grad_norm": 2.671875, "learning_rate": 4.5592154487159197e-05, "loss": 0.7707, "step": 10964 }, { "epoch": 0.19438351125823705, "grad_norm": 2.765625, "learning_rate": 4.559057069880988e-05, "loss": 0.7766, "step": 10966 }, { "epoch": 0.19441896329384864, "grad_norm": 2.765625, "learning_rate": 4.558898665349453e-05, "loss": 0.8001, "step": 10968 }, { "epoch": 0.1944544153294602, "grad_norm": 2.921875, "learning_rate": 4.558740235123292e-05, "loss": 0.7537, "step": 10970 }, { "epoch": 0.19448986736507176, "grad_norm": 3.140625, "learning_rate": 4.5585817792044815e-05, "loss": 0.8424, "step": 10972 }, { "epoch": 0.19452531940068335, "grad_norm": 2.671875, "learning_rate": 4.558423297595e-05, "loss": 0.8104, "step": 10974 }, { "epoch": 0.1945607714362949, "grad_norm": 2.640625, "learning_rate": 4.558264790296823e-05, "loss": 0.8017, "step": 10976 }, { "epoch": 0.19459622347190647, "grad_norm": 2.734375, "learning_rate": 4.558106257311932e-05, "loss": 0.8555, "step": 10978 }, { "epoch": 0.19463167550751806, "grad_norm": 2.546875, "learning_rate": 4.557947698642302e-05, "loss": 0.8092, "step": 10980 }, { "epoch": 0.19466712754312962, "grad_norm": 2.890625, "learning_rate": 4.557789114289913e-05, "loss": 0.8264, "step": 10982 }, { "epoch": 0.19470257957874118, "grad_norm": 2.890625, "learning_rate": 4.557630504256746e-05, "loss": 0.7817, "step": 10984 }, { "epoch": 0.19473803161435277, "grad_norm": 2.734375, "learning_rate": 4.5574718685447784e-05, "loss": 0.7646, "step": 10986 }, { "epoch": 0.19477348364996433, "grad_norm": 2.828125, "learning_rate": 4.55731320715599e-05, "loss": 0.7924, "step": 10988 }, { "epoch": 0.1948089356855759, "grad_norm": 2.90625, "learning_rate": 4.557154520092361e-05, "loss": 0.7924, "step": 10990 }, { "epoch": 0.19484438772118748, "grad_norm": 2.921875, "learning_rate": 4.5569958073558724e-05, "loss": 0.8446, "step": 10992 }, { "epoch": 0.19487983975679904, "grad_norm": 2.765625, "learning_rate": 4.556837068948505e-05, "loss": 0.83, "step": 10994 }, { "epoch": 0.1949152917924106, "grad_norm": 3.0, "learning_rate": 4.556678304872239e-05, "loss": 0.7711, "step": 10996 }, { "epoch": 0.1949507438280222, "grad_norm": 2.53125, "learning_rate": 4.556519515129056e-05, "loss": 0.8246, "step": 10998 }, { "epoch": 0.19498619586363375, "grad_norm": 2.609375, "learning_rate": 4.556360699720938e-05, "loss": 0.8017, "step": 11000 }, { "epoch": 0.1950216478992453, "grad_norm": 3.015625, "learning_rate": 4.556201858649867e-05, "loss": 0.838, "step": 11002 }, { "epoch": 0.1950570999348569, "grad_norm": 2.734375, "learning_rate": 4.556042991917825e-05, "loss": 0.7748, "step": 11004 }, { "epoch": 0.19509255197046846, "grad_norm": 2.921875, "learning_rate": 4.555884099526794e-05, "loss": 0.8404, "step": 11006 }, { "epoch": 0.19512800400608002, "grad_norm": 2.71875, "learning_rate": 4.555725181478758e-05, "loss": 0.8018, "step": 11008 }, { "epoch": 0.1951634560416916, "grad_norm": 2.75, "learning_rate": 4.5555662377757e-05, "loss": 0.8392, "step": 11010 }, { "epoch": 0.19519890807730317, "grad_norm": 2.53125, "learning_rate": 4.5554072684196035e-05, "loss": 0.8423, "step": 11012 }, { "epoch": 0.19523436011291473, "grad_norm": 2.765625, "learning_rate": 4.555248273412453e-05, "loss": 0.8131, "step": 11014 }, { "epoch": 0.1952698121485263, "grad_norm": 2.65625, "learning_rate": 4.555089252756232e-05, "loss": 0.7641, "step": 11016 }, { "epoch": 0.19530526418413788, "grad_norm": 2.65625, "learning_rate": 4.554930206452924e-05, "loss": 0.7994, "step": 11018 }, { "epoch": 0.19534071621974944, "grad_norm": 2.6875, "learning_rate": 4.554771134504516e-05, "loss": 0.7984, "step": 11020 }, { "epoch": 0.195376168255361, "grad_norm": 2.53125, "learning_rate": 4.554612036912992e-05, "loss": 0.7979, "step": 11022 }, { "epoch": 0.1954116202909726, "grad_norm": 2.78125, "learning_rate": 4.554452913680338e-05, "loss": 0.7939, "step": 11024 }, { "epoch": 0.19544707232658415, "grad_norm": 2.546875, "learning_rate": 4.5542937648085394e-05, "loss": 0.8338, "step": 11026 }, { "epoch": 0.1954825243621957, "grad_norm": 2.84375, "learning_rate": 4.5541345902995825e-05, "loss": 0.8299, "step": 11028 }, { "epoch": 0.1955179763978073, "grad_norm": 2.609375, "learning_rate": 4.553975390155454e-05, "loss": 0.8436, "step": 11030 }, { "epoch": 0.19555342843341886, "grad_norm": 3.015625, "learning_rate": 4.55381616437814e-05, "loss": 0.7852, "step": 11032 }, { "epoch": 0.19558888046903042, "grad_norm": 2.953125, "learning_rate": 4.553656912969628e-05, "loss": 0.8327, "step": 11034 }, { "epoch": 0.195624332504642, "grad_norm": 2.828125, "learning_rate": 4.553497635931905e-05, "loss": 0.7849, "step": 11036 }, { "epoch": 0.19565978454025357, "grad_norm": 2.796875, "learning_rate": 4.55333833326696e-05, "loss": 0.801, "step": 11038 }, { "epoch": 0.19569523657586513, "grad_norm": 3.0, "learning_rate": 4.55317900497678e-05, "loss": 0.8602, "step": 11040 }, { "epoch": 0.19573068861147672, "grad_norm": 2.59375, "learning_rate": 4.553019651063354e-05, "loss": 0.7818, "step": 11042 }, { "epoch": 0.19576614064708828, "grad_norm": 2.734375, "learning_rate": 4.55286027152867e-05, "loss": 0.7704, "step": 11044 }, { "epoch": 0.19580159268269984, "grad_norm": 2.71875, "learning_rate": 4.5527008663747176e-05, "loss": 0.7926, "step": 11046 }, { "epoch": 0.19583704471831143, "grad_norm": 2.6875, "learning_rate": 4.552541435603486e-05, "loss": 0.8085, "step": 11048 }, { "epoch": 0.195872496753923, "grad_norm": 2.671875, "learning_rate": 4.5523819792169646e-05, "loss": 0.7907, "step": 11050 }, { "epoch": 0.19590794878953455, "grad_norm": 2.5625, "learning_rate": 4.5522224972171435e-05, "loss": 0.8319, "step": 11052 }, { "epoch": 0.19594340082514614, "grad_norm": 2.796875, "learning_rate": 4.5520629896060134e-05, "loss": 0.8224, "step": 11054 }, { "epoch": 0.1959788528607577, "grad_norm": 2.390625, "learning_rate": 4.5519034563855646e-05, "loss": 0.7893, "step": 11056 }, { "epoch": 0.19601430489636926, "grad_norm": 2.921875, "learning_rate": 4.551743897557788e-05, "loss": 0.8568, "step": 11058 }, { "epoch": 0.19604975693198085, "grad_norm": 2.484375, "learning_rate": 4.551584313124675e-05, "loss": 0.7999, "step": 11060 }, { "epoch": 0.1960852089675924, "grad_norm": 2.671875, "learning_rate": 4.5514247030882165e-05, "loss": 0.818, "step": 11062 }, { "epoch": 0.19612066100320397, "grad_norm": 2.8125, "learning_rate": 4.551265067450405e-05, "loss": 0.7775, "step": 11064 }, { "epoch": 0.19615611303881555, "grad_norm": 2.875, "learning_rate": 4.551105406213233e-05, "loss": 0.8353, "step": 11066 }, { "epoch": 0.19619156507442712, "grad_norm": 3.03125, "learning_rate": 4.550945719378693e-05, "loss": 0.8182, "step": 11068 }, { "epoch": 0.19622701711003868, "grad_norm": 2.703125, "learning_rate": 4.550786006948777e-05, "loss": 0.7792, "step": 11070 }, { "epoch": 0.19626246914565026, "grad_norm": 2.640625, "learning_rate": 4.5506262689254796e-05, "loss": 0.819, "step": 11072 }, { "epoch": 0.19629792118126183, "grad_norm": 2.734375, "learning_rate": 4.550466505310793e-05, "loss": 0.8382, "step": 11074 }, { "epoch": 0.19633337321687339, "grad_norm": 2.484375, "learning_rate": 4.550306716106712e-05, "loss": 0.8173, "step": 11076 }, { "epoch": 0.19636882525248497, "grad_norm": 2.90625, "learning_rate": 4.55014690131523e-05, "loss": 0.8505, "step": 11078 }, { "epoch": 0.19640427728809653, "grad_norm": 2.8125, "learning_rate": 4.549987060938341e-05, "loss": 0.8275, "step": 11080 }, { "epoch": 0.1964397293237081, "grad_norm": 2.703125, "learning_rate": 4.5498271949780414e-05, "loss": 0.765, "step": 11082 }, { "epoch": 0.19647518135931968, "grad_norm": 2.6875, "learning_rate": 4.5496673034363246e-05, "loss": 0.7818, "step": 11084 }, { "epoch": 0.19651063339493124, "grad_norm": 2.625, "learning_rate": 4.549507386315187e-05, "loss": 0.7932, "step": 11086 }, { "epoch": 0.1965460854305428, "grad_norm": 2.609375, "learning_rate": 4.5493474436166236e-05, "loss": 0.817, "step": 11088 }, { "epoch": 0.1965815374661544, "grad_norm": 2.71875, "learning_rate": 4.549187475342632e-05, "loss": 0.805, "step": 11090 }, { "epoch": 0.19661698950176595, "grad_norm": 2.5, "learning_rate": 4.549027481495207e-05, "loss": 0.7974, "step": 11092 }, { "epoch": 0.19665244153737751, "grad_norm": 2.640625, "learning_rate": 4.548867462076346e-05, "loss": 0.8194, "step": 11094 }, { "epoch": 0.1966878935729891, "grad_norm": 2.859375, "learning_rate": 4.548707417088046e-05, "loss": 0.8188, "step": 11096 }, { "epoch": 0.19672334560860066, "grad_norm": 2.921875, "learning_rate": 4.5485473465323035e-05, "loss": 0.8054, "step": 11098 }, { "epoch": 0.19675879764421222, "grad_norm": 3.171875, "learning_rate": 4.548387250411117e-05, "loss": 0.8056, "step": 11100 }, { "epoch": 0.1967942496798238, "grad_norm": 2.828125, "learning_rate": 4.5482271287264845e-05, "loss": 0.7905, "step": 11102 }, { "epoch": 0.19682970171543537, "grad_norm": 2.5625, "learning_rate": 4.5480669814804036e-05, "loss": 0.7922, "step": 11104 }, { "epoch": 0.19686515375104693, "grad_norm": 2.734375, "learning_rate": 4.5479068086748746e-05, "loss": 0.8214, "step": 11106 }, { "epoch": 0.19690060578665852, "grad_norm": 2.921875, "learning_rate": 4.5477466103118936e-05, "loss": 0.7875, "step": 11108 }, { "epoch": 0.19693605782227008, "grad_norm": 2.84375, "learning_rate": 4.547586386393462e-05, "loss": 0.816, "step": 11110 }, { "epoch": 0.19697150985788164, "grad_norm": 3.1875, "learning_rate": 4.547426136921579e-05, "loss": 0.8257, "step": 11112 }, { "epoch": 0.19700696189349323, "grad_norm": 2.640625, "learning_rate": 4.5472658618982446e-05, "loss": 0.7914, "step": 11114 }, { "epoch": 0.1970424139291048, "grad_norm": 2.640625, "learning_rate": 4.5471055613254574e-05, "loss": 0.8, "step": 11116 }, { "epoch": 0.19707786596471635, "grad_norm": 2.546875, "learning_rate": 4.54694523520522e-05, "loss": 0.7953, "step": 11118 }, { "epoch": 0.19711331800032794, "grad_norm": 2.84375, "learning_rate": 4.546784883539533e-05, "loss": 0.7683, "step": 11120 }, { "epoch": 0.1971487700359395, "grad_norm": 2.8125, "learning_rate": 4.546624506330396e-05, "loss": 0.7772, "step": 11122 }, { "epoch": 0.19718422207155106, "grad_norm": 2.890625, "learning_rate": 4.546464103579812e-05, "loss": 0.7998, "step": 11124 }, { "epoch": 0.19721967410716265, "grad_norm": 2.84375, "learning_rate": 4.546303675289782e-05, "loss": 0.8329, "step": 11126 }, { "epoch": 0.1972551261427742, "grad_norm": 2.875, "learning_rate": 4.5461432214623084e-05, "loss": 0.7998, "step": 11128 }, { "epoch": 0.19729057817838577, "grad_norm": 2.578125, "learning_rate": 4.545982742099394e-05, "loss": 0.8297, "step": 11130 }, { "epoch": 0.19732603021399736, "grad_norm": 2.671875, "learning_rate": 4.54582223720304e-05, "loss": 0.8171, "step": 11132 }, { "epoch": 0.19736148224960892, "grad_norm": 2.609375, "learning_rate": 4.545661706775251e-05, "loss": 0.7716, "step": 11134 }, { "epoch": 0.19739693428522048, "grad_norm": 2.625, "learning_rate": 4.54550115081803e-05, "loss": 0.7913, "step": 11136 }, { "epoch": 0.19743238632083207, "grad_norm": 2.640625, "learning_rate": 4.545340569333382e-05, "loss": 0.7837, "step": 11138 }, { "epoch": 0.19746783835644363, "grad_norm": 2.8125, "learning_rate": 4.545179962323308e-05, "loss": 0.8132, "step": 11140 }, { "epoch": 0.1975032903920552, "grad_norm": 2.953125, "learning_rate": 4.545019329789815e-05, "loss": 0.8121, "step": 11142 }, { "epoch": 0.19753874242766678, "grad_norm": 2.78125, "learning_rate": 4.5448586717349065e-05, "loss": 0.8451, "step": 11144 }, { "epoch": 0.19757419446327834, "grad_norm": 3.0625, "learning_rate": 4.5446979881605874e-05, "loss": 0.7968, "step": 11146 }, { "epoch": 0.1976096464988899, "grad_norm": 2.765625, "learning_rate": 4.5445372790688634e-05, "loss": 0.7621, "step": 11148 }, { "epoch": 0.1976450985345015, "grad_norm": 2.890625, "learning_rate": 4.5443765444617404e-05, "loss": 0.805, "step": 11150 }, { "epoch": 0.19768055057011305, "grad_norm": 2.75, "learning_rate": 4.544215784341224e-05, "loss": 0.8009, "step": 11152 }, { "epoch": 0.1977160026057246, "grad_norm": 2.8125, "learning_rate": 4.544054998709319e-05, "loss": 0.8013, "step": 11154 }, { "epoch": 0.1977514546413362, "grad_norm": 2.6875, "learning_rate": 4.543894187568035e-05, "loss": 0.804, "step": 11156 }, { "epoch": 0.19778690667694776, "grad_norm": 2.40625, "learning_rate": 4.5437333509193765e-05, "loss": 0.8002, "step": 11158 }, { "epoch": 0.19782235871255932, "grad_norm": 2.703125, "learning_rate": 4.543572488765351e-05, "loss": 0.8441, "step": 11160 }, { "epoch": 0.1978578107481709, "grad_norm": 3.015625, "learning_rate": 4.5434116011079675e-05, "loss": 0.8051, "step": 11162 }, { "epoch": 0.19789326278378247, "grad_norm": 2.828125, "learning_rate": 4.543250687949232e-05, "loss": 0.7755, "step": 11164 }, { "epoch": 0.19792871481939403, "grad_norm": 2.78125, "learning_rate": 4.543089749291154e-05, "loss": 0.7464, "step": 11166 }, { "epoch": 0.19796416685500562, "grad_norm": 2.609375, "learning_rate": 4.5429287851357416e-05, "loss": 0.8078, "step": 11168 }, { "epoch": 0.19799961889061718, "grad_norm": 2.984375, "learning_rate": 4.542767795485003e-05, "loss": 0.7877, "step": 11170 }, { "epoch": 0.19803507092622874, "grad_norm": 2.859375, "learning_rate": 4.542606780340948e-05, "loss": 0.8076, "step": 11172 }, { "epoch": 0.19807052296184033, "grad_norm": 2.609375, "learning_rate": 4.5424457397055856e-05, "loss": 0.7988, "step": 11174 }, { "epoch": 0.1981059749974519, "grad_norm": 2.703125, "learning_rate": 4.542284673580927e-05, "loss": 0.8249, "step": 11176 }, { "epoch": 0.19814142703306345, "grad_norm": 2.5, "learning_rate": 4.5421235819689796e-05, "loss": 0.7981, "step": 11178 }, { "epoch": 0.19817687906867504, "grad_norm": 2.484375, "learning_rate": 4.541962464871756e-05, "loss": 0.7837, "step": 11180 }, { "epoch": 0.1982123311042866, "grad_norm": 2.640625, "learning_rate": 4.541801322291266e-05, "loss": 0.756, "step": 11182 }, { "epoch": 0.19824778313989816, "grad_norm": 2.59375, "learning_rate": 4.54164015422952e-05, "loss": 0.8059, "step": 11184 }, { "epoch": 0.19828323517550972, "grad_norm": 2.71875, "learning_rate": 4.541478960688531e-05, "loss": 0.8035, "step": 11186 }, { "epoch": 0.1983186872111213, "grad_norm": 2.796875, "learning_rate": 4.5413177416703094e-05, "loss": 0.8364, "step": 11188 }, { "epoch": 0.19835413924673287, "grad_norm": 2.578125, "learning_rate": 4.541156497176868e-05, "loss": 0.7633, "step": 11190 }, { "epoch": 0.19838959128234443, "grad_norm": 2.84375, "learning_rate": 4.540995227210218e-05, "loss": 0.8082, "step": 11192 }, { "epoch": 0.19842504331795602, "grad_norm": 2.875, "learning_rate": 4.540833931772373e-05, "loss": 0.8092, "step": 11194 }, { "epoch": 0.19846049535356758, "grad_norm": 2.640625, "learning_rate": 4.540672610865346e-05, "loss": 0.8039, "step": 11196 }, { "epoch": 0.19849594738917914, "grad_norm": 2.59375, "learning_rate": 4.540511264491149e-05, "loss": 0.8232, "step": 11198 }, { "epoch": 0.19853139942479073, "grad_norm": 2.828125, "learning_rate": 4.540349892651797e-05, "loss": 0.8326, "step": 11200 }, { "epoch": 0.1985668514604023, "grad_norm": 2.65625, "learning_rate": 4.5401884953493035e-05, "loss": 0.8045, "step": 11202 }, { "epoch": 0.19860230349601385, "grad_norm": 2.5625, "learning_rate": 4.540027072585682e-05, "loss": 0.8091, "step": 11204 }, { "epoch": 0.19863775553162544, "grad_norm": 2.828125, "learning_rate": 4.539865624362948e-05, "loss": 0.8383, "step": 11206 }, { "epoch": 0.198673207567237, "grad_norm": 2.515625, "learning_rate": 4.5397041506831154e-05, "loss": 0.7917, "step": 11208 }, { "epoch": 0.19870865960284856, "grad_norm": 2.546875, "learning_rate": 4.5395426515482005e-05, "loss": 0.7516, "step": 11210 }, { "epoch": 0.19874411163846015, "grad_norm": 2.640625, "learning_rate": 4.5393811269602173e-05, "loss": 0.7987, "step": 11212 }, { "epoch": 0.1987795636740717, "grad_norm": 2.625, "learning_rate": 4.539219576921183e-05, "loss": 0.7911, "step": 11214 }, { "epoch": 0.19881501570968327, "grad_norm": 2.640625, "learning_rate": 4.539058001433113e-05, "loss": 0.8121, "step": 11216 }, { "epoch": 0.19885046774529486, "grad_norm": 2.625, "learning_rate": 4.538896400498024e-05, "loss": 0.7945, "step": 11218 }, { "epoch": 0.19888591978090642, "grad_norm": 2.59375, "learning_rate": 4.538734774117932e-05, "loss": 0.8229, "step": 11220 }, { "epoch": 0.19892137181651798, "grad_norm": 2.390625, "learning_rate": 4.538573122294856e-05, "loss": 0.8015, "step": 11222 }, { "epoch": 0.19895682385212957, "grad_norm": 2.8125, "learning_rate": 4.53841144503081e-05, "loss": 0.7746, "step": 11224 }, { "epoch": 0.19899227588774113, "grad_norm": 2.953125, "learning_rate": 4.538249742327815e-05, "loss": 0.8317, "step": 11226 }, { "epoch": 0.1990277279233527, "grad_norm": 2.6875, "learning_rate": 4.5380880141878876e-05, "loss": 0.7737, "step": 11228 }, { "epoch": 0.19906317995896428, "grad_norm": 2.796875, "learning_rate": 4.5379262606130465e-05, "loss": 0.7977, "step": 11230 }, { "epoch": 0.19909863199457584, "grad_norm": 2.734375, "learning_rate": 4.53776448160531e-05, "loss": 0.7838, "step": 11232 }, { "epoch": 0.1991340840301874, "grad_norm": 2.546875, "learning_rate": 4.537602677166697e-05, "loss": 0.7875, "step": 11234 }, { "epoch": 0.19916953606579899, "grad_norm": 2.796875, "learning_rate": 4.537440847299227e-05, "loss": 0.798, "step": 11236 }, { "epoch": 0.19920498810141055, "grad_norm": 2.578125, "learning_rate": 4.53727899200492e-05, "loss": 0.8197, "step": 11238 }, { "epoch": 0.1992404401370221, "grad_norm": 2.765625, "learning_rate": 4.537117111285795e-05, "loss": 0.7943, "step": 11240 }, { "epoch": 0.1992758921726337, "grad_norm": 2.90625, "learning_rate": 4.536955205143873e-05, "loss": 0.858, "step": 11242 }, { "epoch": 0.19931134420824526, "grad_norm": 3.15625, "learning_rate": 4.536793273581174e-05, "loss": 0.781, "step": 11244 }, { "epoch": 0.19934679624385682, "grad_norm": 2.796875, "learning_rate": 4.5366313165997196e-05, "loss": 0.7907, "step": 11246 }, { "epoch": 0.1993822482794684, "grad_norm": 2.515625, "learning_rate": 4.5364693342015306e-05, "loss": 0.8009, "step": 11248 }, { "epoch": 0.19941770031507997, "grad_norm": 2.640625, "learning_rate": 4.536307326388628e-05, "loss": 0.8137, "step": 11250 }, { "epoch": 0.19945315235069153, "grad_norm": 2.640625, "learning_rate": 4.536145293163034e-05, "loss": 0.7723, "step": 11252 }, { "epoch": 0.19948860438630311, "grad_norm": 2.671875, "learning_rate": 4.535983234526772e-05, "loss": 0.7916, "step": 11254 }, { "epoch": 0.19952405642191468, "grad_norm": 2.578125, "learning_rate": 4.5358211504818625e-05, "loss": 0.7999, "step": 11256 }, { "epoch": 0.19955950845752624, "grad_norm": 2.953125, "learning_rate": 4.535659041030329e-05, "loss": 0.7989, "step": 11258 }, { "epoch": 0.19959496049313782, "grad_norm": 2.828125, "learning_rate": 4.535496906174195e-05, "loss": 0.8241, "step": 11260 }, { "epoch": 0.19963041252874938, "grad_norm": 2.890625, "learning_rate": 4.535334745915483e-05, "loss": 0.8053, "step": 11262 }, { "epoch": 0.19966586456436095, "grad_norm": 2.578125, "learning_rate": 4.535172560256218e-05, "loss": 0.8119, "step": 11264 }, { "epoch": 0.19970131659997253, "grad_norm": 2.703125, "learning_rate": 4.535010349198423e-05, "loss": 0.7971, "step": 11266 }, { "epoch": 0.1997367686355841, "grad_norm": 2.53125, "learning_rate": 4.5348481127441226e-05, "loss": 0.8, "step": 11268 }, { "epoch": 0.19977222067119565, "grad_norm": 2.734375, "learning_rate": 4.534685850895342e-05, "loss": 0.7838, "step": 11270 }, { "epoch": 0.19980767270680724, "grad_norm": 2.640625, "learning_rate": 4.534523563654105e-05, "loss": 0.7988, "step": 11272 }, { "epoch": 0.1998431247424188, "grad_norm": 2.953125, "learning_rate": 4.5343612510224374e-05, "loss": 0.8068, "step": 11274 }, { "epoch": 0.19987857677803036, "grad_norm": 2.59375, "learning_rate": 4.534198913002367e-05, "loss": 0.8058, "step": 11276 }, { "epoch": 0.19991402881364195, "grad_norm": 2.6875, "learning_rate": 4.534036549595916e-05, "loss": 0.8102, "step": 11278 }, { "epoch": 0.1999494808492535, "grad_norm": 2.84375, "learning_rate": 4.533874160805113e-05, "loss": 0.7908, "step": 11280 }, { "epoch": 0.19998493288486507, "grad_norm": 2.640625, "learning_rate": 4.5337117466319843e-05, "loss": 0.7979, "step": 11282 }, { "epoch": 0.20002038492047666, "grad_norm": 2.59375, "learning_rate": 4.533549307078557e-05, "loss": 0.8293, "step": 11284 }, { "epoch": 0.20005583695608822, "grad_norm": 2.625, "learning_rate": 4.5333868421468574e-05, "loss": 0.8047, "step": 11286 }, { "epoch": 0.20009128899169978, "grad_norm": 2.5, "learning_rate": 4.533224351838914e-05, "loss": 0.7941, "step": 11288 }, { "epoch": 0.20012674102731137, "grad_norm": 2.734375, "learning_rate": 4.533061836156753e-05, "loss": 0.7903, "step": 11290 }, { "epoch": 0.20016219306292293, "grad_norm": 2.59375, "learning_rate": 4.5328992951024054e-05, "loss": 0.8487, "step": 11292 }, { "epoch": 0.2001976450985345, "grad_norm": 2.65625, "learning_rate": 4.532736728677897e-05, "loss": 0.8333, "step": 11294 }, { "epoch": 0.20023309713414608, "grad_norm": 2.84375, "learning_rate": 4.5325741368852576e-05, "loss": 0.8218, "step": 11296 }, { "epoch": 0.20026854916975764, "grad_norm": 2.828125, "learning_rate": 4.532411519726517e-05, "loss": 0.7909, "step": 11298 }, { "epoch": 0.2003040012053692, "grad_norm": 2.890625, "learning_rate": 4.532248877203703e-05, "loss": 0.84, "step": 11300 }, { "epoch": 0.2003394532409808, "grad_norm": 2.703125, "learning_rate": 4.532086209318846e-05, "loss": 0.8219, "step": 11302 }, { "epoch": 0.20037490527659235, "grad_norm": 2.984375, "learning_rate": 4.531923516073978e-05, "loss": 0.7958, "step": 11304 }, { "epoch": 0.2004103573122039, "grad_norm": 2.796875, "learning_rate": 4.5317607974711265e-05, "loss": 0.8303, "step": 11306 }, { "epoch": 0.2004458093478155, "grad_norm": 2.8125, "learning_rate": 4.5315980535123246e-05, "loss": 0.817, "step": 11308 }, { "epoch": 0.20048126138342706, "grad_norm": 2.671875, "learning_rate": 4.531435284199601e-05, "loss": 0.8366, "step": 11310 }, { "epoch": 0.20051671341903862, "grad_norm": 2.8125, "learning_rate": 4.5312724895349885e-05, "loss": 0.7865, "step": 11312 }, { "epoch": 0.2005521654546502, "grad_norm": 2.625, "learning_rate": 4.531109669520519e-05, "loss": 0.8117, "step": 11314 }, { "epoch": 0.20058761749026177, "grad_norm": 2.59375, "learning_rate": 4.530946824158223e-05, "loss": 0.7994, "step": 11316 }, { "epoch": 0.20062306952587333, "grad_norm": 2.734375, "learning_rate": 4.530783953450134e-05, "loss": 0.7917, "step": 11318 }, { "epoch": 0.20065852156148492, "grad_norm": 2.90625, "learning_rate": 4.530621057398284e-05, "loss": 0.8028, "step": 11320 }, { "epoch": 0.20069397359709648, "grad_norm": 2.78125, "learning_rate": 4.530458136004706e-05, "loss": 0.8238, "step": 11322 }, { "epoch": 0.20072942563270804, "grad_norm": 2.953125, "learning_rate": 4.530295189271434e-05, "loss": 0.8099, "step": 11324 }, { "epoch": 0.20076487766831963, "grad_norm": 2.78125, "learning_rate": 4.530132217200501e-05, "loss": 0.8288, "step": 11326 }, { "epoch": 0.2008003297039312, "grad_norm": 2.828125, "learning_rate": 4.52996921979394e-05, "loss": 0.8228, "step": 11328 }, { "epoch": 0.20083578173954275, "grad_norm": 2.6875, "learning_rate": 4.5298061970537865e-05, "loss": 0.8187, "step": 11330 }, { "epoch": 0.20087123377515434, "grad_norm": 2.640625, "learning_rate": 4.529643148982074e-05, "loss": 0.7506, "step": 11332 }, { "epoch": 0.2009066858107659, "grad_norm": 2.703125, "learning_rate": 4.5294800755808385e-05, "loss": 0.7772, "step": 11334 }, { "epoch": 0.20094213784637746, "grad_norm": 2.921875, "learning_rate": 4.5293169768521135e-05, "loss": 0.804, "step": 11336 }, { "epoch": 0.20097758988198905, "grad_norm": 2.84375, "learning_rate": 4.529153852797936e-05, "loss": 0.8106, "step": 11338 }, { "epoch": 0.2010130419176006, "grad_norm": 2.671875, "learning_rate": 4.528990703420341e-05, "loss": 0.8061, "step": 11340 }, { "epoch": 0.20104849395321217, "grad_norm": 2.5625, "learning_rate": 4.528827528721364e-05, "loss": 0.7881, "step": 11342 }, { "epoch": 0.20108394598882376, "grad_norm": 2.9375, "learning_rate": 4.528664328703043e-05, "loss": 0.7986, "step": 11344 }, { "epoch": 0.20111939802443532, "grad_norm": 2.609375, "learning_rate": 4.528501103367413e-05, "loss": 0.7846, "step": 11346 }, { "epoch": 0.20115485006004688, "grad_norm": 2.640625, "learning_rate": 4.5283378527165125e-05, "loss": 0.8148, "step": 11348 }, { "epoch": 0.20119030209565847, "grad_norm": 2.65625, "learning_rate": 4.528174576752377e-05, "loss": 0.7728, "step": 11350 }, { "epoch": 0.20122575413127003, "grad_norm": 2.71875, "learning_rate": 4.528011275477045e-05, "loss": 0.7757, "step": 11352 }, { "epoch": 0.2012612061668816, "grad_norm": 2.59375, "learning_rate": 4.5278479488925563e-05, "loss": 0.8256, "step": 11354 }, { "epoch": 0.20129665820249315, "grad_norm": 2.71875, "learning_rate": 4.527684597000946e-05, "loss": 0.7902, "step": 11356 }, { "epoch": 0.20133211023810474, "grad_norm": 2.703125, "learning_rate": 4.527521219804255e-05, "loss": 0.8071, "step": 11358 }, { "epoch": 0.2013675622737163, "grad_norm": 2.515625, "learning_rate": 4.527357817304522e-05, "loss": 0.8052, "step": 11360 }, { "epoch": 0.20140301430932786, "grad_norm": 3.1875, "learning_rate": 4.527194389503784e-05, "loss": 0.8477, "step": 11362 }, { "epoch": 0.20143846634493945, "grad_norm": 2.59375, "learning_rate": 4.527030936404084e-05, "loss": 0.7812, "step": 11364 }, { "epoch": 0.201473918380551, "grad_norm": 3.25, "learning_rate": 4.5268674580074594e-05, "loss": 0.7922, "step": 11366 }, { "epoch": 0.20150937041616257, "grad_norm": 2.9375, "learning_rate": 4.5267039543159504e-05, "loss": 0.8091, "step": 11368 }, { "epoch": 0.20154482245177416, "grad_norm": 2.75, "learning_rate": 4.5265404253316e-05, "loss": 0.8252, "step": 11370 }, { "epoch": 0.20158027448738572, "grad_norm": 3.125, "learning_rate": 4.526376871056446e-05, "loss": 0.7924, "step": 11372 }, { "epoch": 0.20161572652299728, "grad_norm": 2.546875, "learning_rate": 4.5262132914925303e-05, "loss": 0.8151, "step": 11374 }, { "epoch": 0.20165117855860887, "grad_norm": 2.984375, "learning_rate": 4.526049686641896e-05, "loss": 0.8193, "step": 11376 }, { "epoch": 0.20168663059422043, "grad_norm": 2.75, "learning_rate": 4.525886056506582e-05, "loss": 0.8402, "step": 11378 }, { "epoch": 0.201722082629832, "grad_norm": 2.53125, "learning_rate": 4.5257224010886335e-05, "loss": 0.8217, "step": 11380 }, { "epoch": 0.20175753466544358, "grad_norm": 2.625, "learning_rate": 4.525558720390091e-05, "loss": 0.8155, "step": 11382 }, { "epoch": 0.20179298670105514, "grad_norm": 2.890625, "learning_rate": 4.525395014412997e-05, "loss": 0.7995, "step": 11384 }, { "epoch": 0.2018284387366667, "grad_norm": 2.515625, "learning_rate": 4.525231283159395e-05, "loss": 0.7724, "step": 11386 }, { "epoch": 0.2018638907722783, "grad_norm": 2.578125, "learning_rate": 4.525067526631329e-05, "loss": 0.7979, "step": 11388 }, { "epoch": 0.20189934280788985, "grad_norm": 2.6875, "learning_rate": 4.524903744830842e-05, "loss": 0.8032, "step": 11390 }, { "epoch": 0.2019347948435014, "grad_norm": 2.75, "learning_rate": 4.5247399377599773e-05, "loss": 0.8137, "step": 11392 }, { "epoch": 0.201970246879113, "grad_norm": 3.046875, "learning_rate": 4.524576105420781e-05, "loss": 0.7718, "step": 11394 }, { "epoch": 0.20200569891472456, "grad_norm": 2.796875, "learning_rate": 4.524412247815296e-05, "loss": 0.8087, "step": 11396 }, { "epoch": 0.20204115095033612, "grad_norm": 2.71875, "learning_rate": 4.524248364945568e-05, "loss": 0.8201, "step": 11398 }, { "epoch": 0.2020766029859477, "grad_norm": 2.515625, "learning_rate": 4.5240844568136415e-05, "loss": 0.7793, "step": 11400 }, { "epoch": 0.20211205502155927, "grad_norm": 2.96875, "learning_rate": 4.5239205234215634e-05, "loss": 0.8182, "step": 11402 }, { "epoch": 0.20214750705717083, "grad_norm": 2.796875, "learning_rate": 4.523756564771378e-05, "loss": 0.7857, "step": 11404 }, { "epoch": 0.20218295909278242, "grad_norm": 2.59375, "learning_rate": 4.523592580865132e-05, "loss": 0.7496, "step": 11406 }, { "epoch": 0.20221841112839398, "grad_norm": 2.6875, "learning_rate": 4.523428571704873e-05, "loss": 0.7862, "step": 11408 }, { "epoch": 0.20225386316400554, "grad_norm": 2.84375, "learning_rate": 4.523264537292646e-05, "loss": 0.7743, "step": 11410 }, { "epoch": 0.20228931519961713, "grad_norm": 2.34375, "learning_rate": 4.5231004776305e-05, "loss": 0.7779, "step": 11412 }, { "epoch": 0.2023247672352287, "grad_norm": 2.890625, "learning_rate": 4.52293639272048e-05, "loss": 0.7965, "step": 11414 }, { "epoch": 0.20236021927084025, "grad_norm": 2.546875, "learning_rate": 4.522772282564637e-05, "loss": 0.8085, "step": 11416 }, { "epoch": 0.20239567130645184, "grad_norm": 2.53125, "learning_rate": 4.522608147165016e-05, "loss": 0.8085, "step": 11418 }, { "epoch": 0.2024311233420634, "grad_norm": 2.953125, "learning_rate": 4.522443986523667e-05, "loss": 0.8009, "step": 11420 }, { "epoch": 0.20246657537767496, "grad_norm": 2.859375, "learning_rate": 4.522279800642638e-05, "loss": 0.8008, "step": 11422 }, { "epoch": 0.20250202741328654, "grad_norm": 2.640625, "learning_rate": 4.522115589523978e-05, "loss": 0.826, "step": 11424 }, { "epoch": 0.2025374794488981, "grad_norm": 2.40625, "learning_rate": 4.521951353169737e-05, "loss": 0.7952, "step": 11426 }, { "epoch": 0.20257293148450967, "grad_norm": 2.890625, "learning_rate": 4.521787091581964e-05, "loss": 0.7572, "step": 11428 }, { "epoch": 0.20260838352012125, "grad_norm": 2.53125, "learning_rate": 4.5216228047627096e-05, "loss": 0.8417, "step": 11430 }, { "epoch": 0.20264383555573282, "grad_norm": 2.671875, "learning_rate": 4.5214584927140236e-05, "loss": 0.8195, "step": 11432 }, { "epoch": 0.20267928759134438, "grad_norm": 2.796875, "learning_rate": 4.521294155437957e-05, "loss": 0.7914, "step": 11434 }, { "epoch": 0.20271473962695596, "grad_norm": 2.53125, "learning_rate": 4.52112979293656e-05, "loss": 0.763, "step": 11436 }, { "epoch": 0.20275019166256752, "grad_norm": 2.796875, "learning_rate": 4.520965405211884e-05, "loss": 0.8315, "step": 11438 }, { "epoch": 0.20278564369817909, "grad_norm": 3.171875, "learning_rate": 4.520800992265981e-05, "loss": 0.7956, "step": 11440 }, { "epoch": 0.20282109573379067, "grad_norm": 2.765625, "learning_rate": 4.520636554100902e-05, "loss": 0.7905, "step": 11442 }, { "epoch": 0.20285654776940223, "grad_norm": 2.703125, "learning_rate": 4.5204720907187004e-05, "loss": 0.7493, "step": 11444 }, { "epoch": 0.2028919998050138, "grad_norm": 2.6875, "learning_rate": 4.5203076021214274e-05, "loss": 0.8327, "step": 11446 }, { "epoch": 0.20292745184062538, "grad_norm": 2.65625, "learning_rate": 4.520143088311136e-05, "loss": 0.8388, "step": 11448 }, { "epoch": 0.20296290387623694, "grad_norm": 2.609375, "learning_rate": 4.5199785492898805e-05, "loss": 0.794, "step": 11450 }, { "epoch": 0.2029983559118485, "grad_norm": 3.109375, "learning_rate": 4.519813985059712e-05, "loss": 0.8091, "step": 11452 }, { "epoch": 0.2030338079474601, "grad_norm": 2.703125, "learning_rate": 4.519649395622687e-05, "loss": 0.8035, "step": 11454 }, { "epoch": 0.20306925998307165, "grad_norm": 2.890625, "learning_rate": 4.5194847809808585e-05, "loss": 0.8301, "step": 11456 }, { "epoch": 0.20310471201868321, "grad_norm": 2.515625, "learning_rate": 4.519320141136279e-05, "loss": 0.7984, "step": 11458 }, { "epoch": 0.2031401640542948, "grad_norm": 3.015625, "learning_rate": 4.519155476091006e-05, "loss": 0.8362, "step": 11460 }, { "epoch": 0.20317561608990636, "grad_norm": 3.03125, "learning_rate": 4.518990785847093e-05, "loss": 0.8017, "step": 11462 }, { "epoch": 0.20321106812551792, "grad_norm": 2.84375, "learning_rate": 4.5188260704065955e-05, "loss": 0.8518, "step": 11464 }, { "epoch": 0.2032465201611295, "grad_norm": 2.671875, "learning_rate": 4.518661329771569e-05, "loss": 0.8023, "step": 11466 }, { "epoch": 0.20328197219674107, "grad_norm": 2.84375, "learning_rate": 4.51849656394407e-05, "loss": 0.7892, "step": 11468 }, { "epoch": 0.20331742423235263, "grad_norm": 2.421875, "learning_rate": 4.518331772926154e-05, "loss": 0.7387, "step": 11470 }, { "epoch": 0.20335287626796422, "grad_norm": 2.875, "learning_rate": 4.518166956719877e-05, "loss": 0.7539, "step": 11472 }, { "epoch": 0.20338832830357578, "grad_norm": 2.84375, "learning_rate": 4.518002115327298e-05, "loss": 0.8094, "step": 11474 }, { "epoch": 0.20342378033918734, "grad_norm": 2.875, "learning_rate": 4.517837248750473e-05, "loss": 0.787, "step": 11476 }, { "epoch": 0.20345923237479893, "grad_norm": 2.5625, "learning_rate": 4.517672356991458e-05, "loss": 0.7691, "step": 11478 }, { "epoch": 0.2034946844104105, "grad_norm": 3.015625, "learning_rate": 4.517507440052313e-05, "loss": 0.8491, "step": 11480 }, { "epoch": 0.20353013644602205, "grad_norm": 2.8125, "learning_rate": 4.517342497935096e-05, "loss": 0.8421, "step": 11482 }, { "epoch": 0.20356558848163364, "grad_norm": 3.0, "learning_rate": 4.517177530641864e-05, "loss": 0.8048, "step": 11484 }, { "epoch": 0.2036010405172452, "grad_norm": 2.59375, "learning_rate": 4.517012538174676e-05, "loss": 0.8004, "step": 11486 }, { "epoch": 0.20363649255285676, "grad_norm": 2.828125, "learning_rate": 4.516847520535593e-05, "loss": 0.806, "step": 11488 }, { "epoch": 0.20367194458846835, "grad_norm": 3.21875, "learning_rate": 4.516682477726673e-05, "loss": 0.7854, "step": 11490 }, { "epoch": 0.2037073966240799, "grad_norm": 2.765625, "learning_rate": 4.5165174097499755e-05, "loss": 0.7726, "step": 11492 }, { "epoch": 0.20374284865969147, "grad_norm": 2.796875, "learning_rate": 4.5163523166075594e-05, "loss": 0.8349, "step": 11494 }, { "epoch": 0.20377830069530306, "grad_norm": 2.578125, "learning_rate": 4.516187198301488e-05, "loss": 0.7926, "step": 11496 }, { "epoch": 0.20381375273091462, "grad_norm": 2.75, "learning_rate": 4.516022054833819e-05, "loss": 0.8755, "step": 11498 }, { "epoch": 0.20384920476652618, "grad_norm": 2.65625, "learning_rate": 4.515856886206616e-05, "loss": 0.8133, "step": 11500 }, { "epoch": 0.20388465680213777, "grad_norm": 2.484375, "learning_rate": 4.5156916924219385e-05, "loss": 0.8109, "step": 11502 }, { "epoch": 0.20392010883774933, "grad_norm": 2.46875, "learning_rate": 4.515526473481848e-05, "loss": 0.8204, "step": 11504 }, { "epoch": 0.2039555608733609, "grad_norm": 2.515625, "learning_rate": 4.515361229388407e-05, "loss": 0.78, "step": 11506 }, { "epoch": 0.20399101290897248, "grad_norm": 2.734375, "learning_rate": 4.515195960143678e-05, "loss": 0.8417, "step": 11508 }, { "epoch": 0.20402646494458404, "grad_norm": 2.859375, "learning_rate": 4.515030665749723e-05, "loss": 0.8101, "step": 11510 }, { "epoch": 0.2040619169801956, "grad_norm": 2.453125, "learning_rate": 4.514865346208605e-05, "loss": 0.8105, "step": 11512 }, { "epoch": 0.2040973690158072, "grad_norm": 3.015625, "learning_rate": 4.514700001522387e-05, "loss": 0.8042, "step": 11514 }, { "epoch": 0.20413282105141875, "grad_norm": 2.625, "learning_rate": 4.514534631693133e-05, "loss": 0.7869, "step": 11516 }, { "epoch": 0.2041682730870303, "grad_norm": 2.71875, "learning_rate": 4.5143692367229065e-05, "loss": 0.8014, "step": 11518 }, { "epoch": 0.2042037251226419, "grad_norm": 3.046875, "learning_rate": 4.5142038166137706e-05, "loss": 0.8336, "step": 11520 }, { "epoch": 0.20423917715825346, "grad_norm": 2.8125, "learning_rate": 4.5140383713677916e-05, "loss": 0.7753, "step": 11522 }, { "epoch": 0.20427462919386502, "grad_norm": 2.859375, "learning_rate": 4.513872900987032e-05, "loss": 0.8049, "step": 11524 }, { "epoch": 0.2043100812294766, "grad_norm": 2.671875, "learning_rate": 4.513707405473559e-05, "loss": 0.793, "step": 11526 }, { "epoch": 0.20434553326508817, "grad_norm": 2.875, "learning_rate": 4.5135418848294366e-05, "loss": 0.7965, "step": 11528 }, { "epoch": 0.20438098530069973, "grad_norm": 3.015625, "learning_rate": 4.5133763390567316e-05, "loss": 0.7977, "step": 11530 }, { "epoch": 0.2044164373363113, "grad_norm": 2.984375, "learning_rate": 4.513210768157508e-05, "loss": 0.8373, "step": 11532 }, { "epoch": 0.20445188937192288, "grad_norm": 2.859375, "learning_rate": 4.5130451721338344e-05, "loss": 0.8149, "step": 11534 }, { "epoch": 0.20448734140753444, "grad_norm": 2.75, "learning_rate": 4.5128795509877764e-05, "loss": 0.7718, "step": 11536 }, { "epoch": 0.204522793443146, "grad_norm": 2.765625, "learning_rate": 4.5127139047214006e-05, "loss": 0.7862, "step": 11538 }, { "epoch": 0.2045582454787576, "grad_norm": 2.640625, "learning_rate": 4.5125482333367744e-05, "loss": 0.7925, "step": 11540 }, { "epoch": 0.20459369751436915, "grad_norm": 2.6875, "learning_rate": 4.512382536835965e-05, "loss": 0.8175, "step": 11542 }, { "epoch": 0.2046291495499807, "grad_norm": 2.734375, "learning_rate": 4.512216815221041e-05, "loss": 0.8373, "step": 11544 }, { "epoch": 0.2046646015855923, "grad_norm": 2.734375, "learning_rate": 4.51205106849407e-05, "loss": 0.8322, "step": 11546 }, { "epoch": 0.20470005362120386, "grad_norm": 2.75, "learning_rate": 4.511885296657121e-05, "loss": 0.7942, "step": 11548 }, { "epoch": 0.20473550565681542, "grad_norm": 2.546875, "learning_rate": 4.511719499712264e-05, "loss": 0.7904, "step": 11550 }, { "epoch": 0.204770957692427, "grad_norm": 2.5625, "learning_rate": 4.511553677661564e-05, "loss": 0.8381, "step": 11552 }, { "epoch": 0.20480640972803857, "grad_norm": 2.59375, "learning_rate": 4.5113878305070945e-05, "loss": 0.7896, "step": 11554 }, { "epoch": 0.20484186176365013, "grad_norm": 2.875, "learning_rate": 4.5112219582509244e-05, "loss": 0.8513, "step": 11556 }, { "epoch": 0.20487731379926172, "grad_norm": 3.171875, "learning_rate": 4.511056060895122e-05, "loss": 0.7517, "step": 11558 }, { "epoch": 0.20491276583487328, "grad_norm": 2.4375, "learning_rate": 4.510890138441759e-05, "loss": 0.7744, "step": 11560 }, { "epoch": 0.20494821787048484, "grad_norm": 2.828125, "learning_rate": 4.5107241908929066e-05, "loss": 0.8118, "step": 11562 }, { "epoch": 0.20498366990609643, "grad_norm": 2.8125, "learning_rate": 4.510558218250635e-05, "loss": 0.8291, "step": 11564 }, { "epoch": 0.205019121941708, "grad_norm": 2.546875, "learning_rate": 4.5103922205170144e-05, "loss": 0.794, "step": 11566 }, { "epoch": 0.20505457397731955, "grad_norm": 2.703125, "learning_rate": 4.510226197694119e-05, "loss": 0.8009, "step": 11568 }, { "epoch": 0.20509002601293114, "grad_norm": 2.625, "learning_rate": 4.510060149784019e-05, "loss": 0.7495, "step": 11570 }, { "epoch": 0.2051254780485427, "grad_norm": 2.796875, "learning_rate": 4.509894076788787e-05, "loss": 0.8229, "step": 11572 }, { "epoch": 0.20516093008415426, "grad_norm": 2.671875, "learning_rate": 4.509727978710495e-05, "loss": 0.8195, "step": 11574 }, { "epoch": 0.20519638211976585, "grad_norm": 2.625, "learning_rate": 4.509561855551217e-05, "loss": 0.8167, "step": 11576 }, { "epoch": 0.2052318341553774, "grad_norm": 2.609375, "learning_rate": 4.509395707313026e-05, "loss": 0.7554, "step": 11578 }, { "epoch": 0.20526728619098897, "grad_norm": 2.90625, "learning_rate": 4.509229533997994e-05, "loss": 0.8383, "step": 11580 }, { "epoch": 0.20530273822660056, "grad_norm": 2.875, "learning_rate": 4.509063335608196e-05, "loss": 0.7817, "step": 11582 }, { "epoch": 0.20533819026221212, "grad_norm": 2.390625, "learning_rate": 4.5088971121457066e-05, "loss": 0.7454, "step": 11584 }, { "epoch": 0.20537364229782368, "grad_norm": 2.59375, "learning_rate": 4.508730863612599e-05, "loss": 0.8176, "step": 11586 }, { "epoch": 0.20540909433343527, "grad_norm": 2.703125, "learning_rate": 4.50856459001095e-05, "loss": 0.821, "step": 11588 }, { "epoch": 0.20544454636904683, "grad_norm": 2.640625, "learning_rate": 4.5083982913428324e-05, "loss": 0.843, "step": 11590 }, { "epoch": 0.2054799984046584, "grad_norm": 2.65625, "learning_rate": 4.508231967610322e-05, "loss": 0.8111, "step": 11592 }, { "epoch": 0.20551545044026998, "grad_norm": 2.859375, "learning_rate": 4.5080656188154955e-05, "loss": 0.7918, "step": 11594 }, { "epoch": 0.20555090247588154, "grad_norm": 2.640625, "learning_rate": 4.507899244960429e-05, "loss": 0.7829, "step": 11596 }, { "epoch": 0.2055863545114931, "grad_norm": 2.6875, "learning_rate": 4.5077328460471965e-05, "loss": 0.8103, "step": 11598 }, { "epoch": 0.20562180654710469, "grad_norm": 2.546875, "learning_rate": 4.5075664220778766e-05, "loss": 0.8046, "step": 11600 }, { "epoch": 0.20565725858271625, "grad_norm": 2.984375, "learning_rate": 4.5073999730545466e-05, "loss": 0.7595, "step": 11602 }, { "epoch": 0.2056927106183278, "grad_norm": 2.34375, "learning_rate": 4.507233498979283e-05, "loss": 0.8069, "step": 11604 }, { "epoch": 0.2057281626539394, "grad_norm": 2.5, "learning_rate": 4.507066999854164e-05, "loss": 0.8049, "step": 11606 }, { "epoch": 0.20576361468955096, "grad_norm": 2.65625, "learning_rate": 4.506900475681266e-05, "loss": 0.7755, "step": 11608 }, { "epoch": 0.20579906672516252, "grad_norm": 2.5625, "learning_rate": 4.506733926462668e-05, "loss": 0.794, "step": 11610 }, { "epoch": 0.2058345187607741, "grad_norm": 2.640625, "learning_rate": 4.5065673522004495e-05, "loss": 0.7593, "step": 11612 }, { "epoch": 0.20586997079638567, "grad_norm": 2.734375, "learning_rate": 4.5064007528966865e-05, "loss": 0.7871, "step": 11614 }, { "epoch": 0.20590542283199723, "grad_norm": 2.78125, "learning_rate": 4.506234128553461e-05, "loss": 0.8246, "step": 11616 }, { "epoch": 0.20594087486760881, "grad_norm": 2.90625, "learning_rate": 4.506067479172852e-05, "loss": 0.8485, "step": 11618 }, { "epoch": 0.20597632690322037, "grad_norm": 2.8125, "learning_rate": 4.505900804756938e-05, "loss": 0.8238, "step": 11620 }, { "epoch": 0.20601177893883194, "grad_norm": 2.59375, "learning_rate": 4.5057341053078004e-05, "loss": 0.7727, "step": 11622 }, { "epoch": 0.20604723097444352, "grad_norm": 2.78125, "learning_rate": 4.505567380827519e-05, "loss": 0.7404, "step": 11624 }, { "epoch": 0.20608268301005508, "grad_norm": 3.046875, "learning_rate": 4.505400631318174e-05, "loss": 0.7966, "step": 11626 }, { "epoch": 0.20611813504566665, "grad_norm": 2.96875, "learning_rate": 4.505233856781846e-05, "loss": 0.8268, "step": 11628 }, { "epoch": 0.20615358708127823, "grad_norm": 3.046875, "learning_rate": 4.5050670572206186e-05, "loss": 0.803, "step": 11630 }, { "epoch": 0.2061890391168898, "grad_norm": 2.90625, "learning_rate": 4.504900232636571e-05, "loss": 0.7863, "step": 11632 }, { "epoch": 0.20622449115250135, "grad_norm": 2.8125, "learning_rate": 4.5047333830317865e-05, "loss": 0.8045, "step": 11634 }, { "epoch": 0.20625994318811294, "grad_norm": 2.59375, "learning_rate": 4.504566508408347e-05, "loss": 0.8201, "step": 11636 }, { "epoch": 0.2062953952237245, "grad_norm": 2.421875, "learning_rate": 4.5043996087683346e-05, "loss": 0.7946, "step": 11638 }, { "epoch": 0.20633084725933606, "grad_norm": 2.6875, "learning_rate": 4.504232684113833e-05, "loss": 0.8265, "step": 11640 }, { "epoch": 0.20636629929494765, "grad_norm": 2.375, "learning_rate": 4.504065734446925e-05, "loss": 0.794, "step": 11642 }, { "epoch": 0.2064017513305592, "grad_norm": 2.625, "learning_rate": 4.503898759769694e-05, "loss": 0.7796, "step": 11644 }, { "epoch": 0.20643720336617077, "grad_norm": 2.59375, "learning_rate": 4.5037317600842235e-05, "loss": 0.7687, "step": 11646 }, { "epoch": 0.20647265540178236, "grad_norm": 2.609375, "learning_rate": 4.503564735392598e-05, "loss": 0.78, "step": 11648 }, { "epoch": 0.20650810743739392, "grad_norm": 2.40625, "learning_rate": 4.503397685696902e-05, "loss": 0.7891, "step": 11650 }, { "epoch": 0.20654355947300548, "grad_norm": 2.828125, "learning_rate": 4.5032306109992204e-05, "loss": 0.7959, "step": 11652 }, { "epoch": 0.20657901150861707, "grad_norm": 3.0, "learning_rate": 4.503063511301638e-05, "loss": 0.7963, "step": 11654 }, { "epoch": 0.20661446354422863, "grad_norm": 2.765625, "learning_rate": 4.50289638660624e-05, "loss": 0.7754, "step": 11656 }, { "epoch": 0.2066499155798402, "grad_norm": 2.828125, "learning_rate": 4.502729236915112e-05, "loss": 0.7979, "step": 11658 }, { "epoch": 0.20668536761545178, "grad_norm": 2.53125, "learning_rate": 4.50256206223034e-05, "loss": 0.8032, "step": 11660 }, { "epoch": 0.20672081965106334, "grad_norm": 2.453125, "learning_rate": 4.502394862554011e-05, "loss": 0.8072, "step": 11662 }, { "epoch": 0.2067562716866749, "grad_norm": 2.90625, "learning_rate": 4.5022276378882125e-05, "loss": 0.8484, "step": 11664 }, { "epoch": 0.2067917237222865, "grad_norm": 2.609375, "learning_rate": 4.5020603882350286e-05, "loss": 0.7754, "step": 11666 }, { "epoch": 0.20682717575789805, "grad_norm": 2.84375, "learning_rate": 4.501893113596548e-05, "loss": 0.8538, "step": 11668 }, { "epoch": 0.2068626277935096, "grad_norm": 2.546875, "learning_rate": 4.501725813974858e-05, "loss": 0.7993, "step": 11670 }, { "epoch": 0.2068980798291212, "grad_norm": 2.765625, "learning_rate": 4.501558489372049e-05, "loss": 0.8001, "step": 11672 }, { "epoch": 0.20693353186473276, "grad_norm": 2.578125, "learning_rate": 4.5013911397902044e-05, "loss": 0.8201, "step": 11674 }, { "epoch": 0.20696898390034432, "grad_norm": 2.671875, "learning_rate": 4.5012237652314164e-05, "loss": 0.7967, "step": 11676 }, { "epoch": 0.2070044359359559, "grad_norm": 2.5625, "learning_rate": 4.5010563656977725e-05, "loss": 0.8026, "step": 11678 }, { "epoch": 0.20703988797156747, "grad_norm": 2.46875, "learning_rate": 4.5008889411913625e-05, "loss": 0.7553, "step": 11680 }, { "epoch": 0.20707534000717903, "grad_norm": 2.90625, "learning_rate": 4.500721491714274e-05, "loss": 0.7963, "step": 11682 }, { "epoch": 0.20711079204279062, "grad_norm": 2.8125, "learning_rate": 4.500554017268599e-05, "loss": 0.8529, "step": 11684 }, { "epoch": 0.20714624407840218, "grad_norm": 2.46875, "learning_rate": 4.5003865178564265e-05, "loss": 0.7984, "step": 11686 }, { "epoch": 0.20718169611401374, "grad_norm": 2.84375, "learning_rate": 4.500218993479847e-05, "loss": 0.8345, "step": 11688 }, { "epoch": 0.20721714814962533, "grad_norm": 2.734375, "learning_rate": 4.5000514441409505e-05, "loss": 0.8268, "step": 11690 }, { "epoch": 0.2072526001852369, "grad_norm": 2.75, "learning_rate": 4.499883869841828e-05, "loss": 0.8061, "step": 11692 }, { "epoch": 0.20728805222084845, "grad_norm": 2.75, "learning_rate": 4.499716270584573e-05, "loss": 0.8296, "step": 11694 }, { "epoch": 0.20732350425646004, "grad_norm": 2.828125, "learning_rate": 4.4995486463712735e-05, "loss": 0.7848, "step": 11696 }, { "epoch": 0.2073589562920716, "grad_norm": 2.703125, "learning_rate": 4.4993809972040246e-05, "loss": 0.8256, "step": 11698 }, { "epoch": 0.20739440832768316, "grad_norm": 2.546875, "learning_rate": 4.4992133230849176e-05, "loss": 0.835, "step": 11700 }, { "epoch": 0.20742986036329472, "grad_norm": 2.859375, "learning_rate": 4.499045624016044e-05, "loss": 0.7905, "step": 11702 }, { "epoch": 0.2074653123989063, "grad_norm": 2.8125, "learning_rate": 4.498877899999498e-05, "loss": 0.7935, "step": 11704 }, { "epoch": 0.20750076443451787, "grad_norm": 2.890625, "learning_rate": 4.4987101510373717e-05, "loss": 0.7899, "step": 11706 }, { "epoch": 0.20753621647012943, "grad_norm": 2.78125, "learning_rate": 4.4985423771317595e-05, "loss": 0.7984, "step": 11708 }, { "epoch": 0.20757166850574102, "grad_norm": 2.640625, "learning_rate": 4.498374578284754e-05, "loss": 0.8174, "step": 11710 }, { "epoch": 0.20760712054135258, "grad_norm": 2.71875, "learning_rate": 4.49820675449845e-05, "loss": 0.8063, "step": 11712 }, { "epoch": 0.20764257257696414, "grad_norm": 2.84375, "learning_rate": 4.498038905774942e-05, "loss": 0.8267, "step": 11714 }, { "epoch": 0.20767802461257573, "grad_norm": 2.65625, "learning_rate": 4.497871032116325e-05, "loss": 0.8247, "step": 11716 }, { "epoch": 0.2077134766481873, "grad_norm": 2.703125, "learning_rate": 4.497703133524693e-05, "loss": 0.762, "step": 11718 }, { "epoch": 0.20774892868379885, "grad_norm": 2.765625, "learning_rate": 4.497535210002143e-05, "loss": 0.7811, "step": 11720 }, { "epoch": 0.20778438071941044, "grad_norm": 2.734375, "learning_rate": 4.497367261550769e-05, "loss": 0.8083, "step": 11722 }, { "epoch": 0.207819832755022, "grad_norm": 2.671875, "learning_rate": 4.497199288172668e-05, "loss": 0.7593, "step": 11724 }, { "epoch": 0.20785528479063356, "grad_norm": 2.625, "learning_rate": 4.497031289869936e-05, "loss": 0.7981, "step": 11726 }, { "epoch": 0.20789073682624515, "grad_norm": 3.0, "learning_rate": 4.4968632666446684e-05, "loss": 0.7701, "step": 11728 }, { "epoch": 0.2079261888618567, "grad_norm": 2.75, "learning_rate": 4.4966952184989645e-05, "loss": 0.8126, "step": 11730 }, { "epoch": 0.20796164089746827, "grad_norm": 2.25, "learning_rate": 4.4965271454349186e-05, "loss": 0.8052, "step": 11732 }, { "epoch": 0.20799709293307986, "grad_norm": 2.515625, "learning_rate": 4.496359047454631e-05, "loss": 0.783, "step": 11734 }, { "epoch": 0.20803254496869142, "grad_norm": 2.59375, "learning_rate": 4.496190924560197e-05, "loss": 0.8078, "step": 11736 }, { "epoch": 0.20806799700430298, "grad_norm": 2.71875, "learning_rate": 4.4960227767537175e-05, "loss": 0.8013, "step": 11738 }, { "epoch": 0.20810344903991457, "grad_norm": 2.59375, "learning_rate": 4.4958546040372896e-05, "loss": 0.7807, "step": 11740 }, { "epoch": 0.20813890107552613, "grad_norm": 2.984375, "learning_rate": 4.495686406413011e-05, "loss": 0.82, "step": 11742 }, { "epoch": 0.2081743531111377, "grad_norm": 2.5625, "learning_rate": 4.495518183882982e-05, "loss": 0.7697, "step": 11744 }, { "epoch": 0.20820980514674928, "grad_norm": 2.46875, "learning_rate": 4.4953499364493015e-05, "loss": 0.8185, "step": 11746 }, { "epoch": 0.20824525718236084, "grad_norm": 2.6875, "learning_rate": 4.49518166411407e-05, "loss": 0.8237, "step": 11748 }, { "epoch": 0.2082807092179724, "grad_norm": 2.734375, "learning_rate": 4.4950133668793856e-05, "loss": 0.8437, "step": 11750 }, { "epoch": 0.208316161253584, "grad_norm": 2.765625, "learning_rate": 4.4948450447473515e-05, "loss": 0.8473, "step": 11752 }, { "epoch": 0.20835161328919555, "grad_norm": 2.703125, "learning_rate": 4.494676697720066e-05, "loss": 0.8198, "step": 11754 }, { "epoch": 0.2083870653248071, "grad_norm": 2.71875, "learning_rate": 4.4945083257996306e-05, "loss": 0.7779, "step": 11756 }, { "epoch": 0.2084225173604187, "grad_norm": 2.484375, "learning_rate": 4.494339928988147e-05, "loss": 0.7301, "step": 11758 }, { "epoch": 0.20845796939603026, "grad_norm": 2.609375, "learning_rate": 4.494171507287717e-05, "loss": 0.8416, "step": 11760 }, { "epoch": 0.20849342143164182, "grad_norm": 2.65625, "learning_rate": 4.4940030607004405e-05, "loss": 0.8098, "step": 11762 }, { "epoch": 0.2085288734672534, "grad_norm": 2.734375, "learning_rate": 4.4938345892284225e-05, "loss": 0.7958, "step": 11764 }, { "epoch": 0.20856432550286497, "grad_norm": 2.765625, "learning_rate": 4.493666092873763e-05, "loss": 0.781, "step": 11766 }, { "epoch": 0.20859977753847653, "grad_norm": 2.875, "learning_rate": 4.493497571638567e-05, "loss": 0.8235, "step": 11768 }, { "epoch": 0.20863522957408812, "grad_norm": 2.71875, "learning_rate": 4.493329025524936e-05, "loss": 0.7772, "step": 11770 }, { "epoch": 0.20867068160969968, "grad_norm": 2.515625, "learning_rate": 4.4931604545349735e-05, "loss": 0.7812, "step": 11772 }, { "epoch": 0.20870613364531124, "grad_norm": 2.75, "learning_rate": 4.492991858670784e-05, "loss": 0.8014, "step": 11774 }, { "epoch": 0.20874158568092283, "grad_norm": 3.328125, "learning_rate": 4.492823237934472e-05, "loss": 0.8308, "step": 11776 }, { "epoch": 0.20877703771653439, "grad_norm": 2.578125, "learning_rate": 4.4926545923281404e-05, "loss": 0.7867, "step": 11778 }, { "epoch": 0.20881248975214595, "grad_norm": 2.8125, "learning_rate": 4.492485921853894e-05, "loss": 0.793, "step": 11780 }, { "epoch": 0.20884794178775754, "grad_norm": 2.96875, "learning_rate": 4.492317226513839e-05, "loss": 0.7952, "step": 11782 }, { "epoch": 0.2088833938233691, "grad_norm": 2.78125, "learning_rate": 4.4921485063100796e-05, "loss": 0.7814, "step": 11784 }, { "epoch": 0.20891884585898066, "grad_norm": 2.859375, "learning_rate": 4.491979761244722e-05, "loss": 0.8192, "step": 11786 }, { "epoch": 0.20895429789459224, "grad_norm": 2.609375, "learning_rate": 4.491810991319873e-05, "loss": 0.8639, "step": 11788 }, { "epoch": 0.2089897499302038, "grad_norm": 2.578125, "learning_rate": 4.491642196537635e-05, "loss": 0.7892, "step": 11790 }, { "epoch": 0.20902520196581537, "grad_norm": 2.515625, "learning_rate": 4.491473376900119e-05, "loss": 0.795, "step": 11792 }, { "epoch": 0.20906065400142695, "grad_norm": 2.609375, "learning_rate": 4.4913045324094306e-05, "loss": 0.8182, "step": 11794 }, { "epoch": 0.20909610603703851, "grad_norm": 2.671875, "learning_rate": 4.4911356630676756e-05, "loss": 0.8547, "step": 11796 }, { "epoch": 0.20913155807265008, "grad_norm": 2.75, "learning_rate": 4.4909667688769616e-05, "loss": 0.7721, "step": 11798 }, { "epoch": 0.20916701010826166, "grad_norm": 2.78125, "learning_rate": 4.490797849839398e-05, "loss": 0.8018, "step": 11800 }, { "epoch": 0.20920246214387322, "grad_norm": 2.515625, "learning_rate": 4.4906289059570916e-05, "loss": 0.7506, "step": 11802 }, { "epoch": 0.20923791417948479, "grad_norm": 2.6875, "learning_rate": 4.490459937232151e-05, "loss": 0.8056, "step": 11804 }, { "epoch": 0.20927336621509637, "grad_norm": 2.8125, "learning_rate": 4.4902909436666855e-05, "loss": 0.8051, "step": 11806 }, { "epoch": 0.20930881825070793, "grad_norm": 2.96875, "learning_rate": 4.490121925262803e-05, "loss": 0.7755, "step": 11808 }, { "epoch": 0.2093442702863195, "grad_norm": 2.546875, "learning_rate": 4.489952882022613e-05, "loss": 0.8145, "step": 11810 }, { "epoch": 0.20937972232193108, "grad_norm": 2.703125, "learning_rate": 4.4897838139482263e-05, "loss": 0.7995, "step": 11812 }, { "epoch": 0.20941517435754264, "grad_norm": 2.609375, "learning_rate": 4.489614721041751e-05, "loss": 0.7982, "step": 11814 }, { "epoch": 0.2094506263931542, "grad_norm": 2.8125, "learning_rate": 4.4894456033053e-05, "loss": 0.8647, "step": 11816 }, { "epoch": 0.2094860784287658, "grad_norm": 2.8125, "learning_rate": 4.4892764607409806e-05, "loss": 0.8212, "step": 11818 }, { "epoch": 0.20952153046437735, "grad_norm": 2.5625, "learning_rate": 4.489107293350907e-05, "loss": 0.7901, "step": 11820 }, { "epoch": 0.20955698249998891, "grad_norm": 2.84375, "learning_rate": 4.488938101137188e-05, "loss": 0.8199, "step": 11822 }, { "epoch": 0.2095924345356005, "grad_norm": 2.875, "learning_rate": 4.4887688841019346e-05, "loss": 0.8028, "step": 11824 }, { "epoch": 0.20962788657121206, "grad_norm": 3.15625, "learning_rate": 4.488599642247261e-05, "loss": 0.796, "step": 11826 }, { "epoch": 0.20966333860682362, "grad_norm": 2.671875, "learning_rate": 4.4884303755752775e-05, "loss": 0.8427, "step": 11828 }, { "epoch": 0.2096987906424352, "grad_norm": 2.921875, "learning_rate": 4.488261084088098e-05, "loss": 0.7922, "step": 11830 }, { "epoch": 0.20973424267804677, "grad_norm": 2.671875, "learning_rate": 4.488091767787833e-05, "loss": 0.798, "step": 11832 }, { "epoch": 0.20976969471365833, "grad_norm": 2.84375, "learning_rate": 4.4879224266765974e-05, "loss": 0.8005, "step": 11834 }, { "epoch": 0.20980514674926992, "grad_norm": 2.984375, "learning_rate": 4.4877530607565045e-05, "loss": 0.7842, "step": 11836 }, { "epoch": 0.20984059878488148, "grad_norm": 2.96875, "learning_rate": 4.487583670029667e-05, "loss": 0.8524, "step": 11838 }, { "epoch": 0.20987605082049304, "grad_norm": 3.15625, "learning_rate": 4.487414254498199e-05, "loss": 0.8432, "step": 11840 }, { "epoch": 0.20991150285610463, "grad_norm": 2.765625, "learning_rate": 4.4872448141642156e-05, "loss": 0.7839, "step": 11842 }, { "epoch": 0.2099469548917162, "grad_norm": 2.578125, "learning_rate": 4.48707534902983e-05, "loss": 0.7926, "step": 11844 }, { "epoch": 0.20998240692732775, "grad_norm": 3.015625, "learning_rate": 4.486905859097158e-05, "loss": 0.8395, "step": 11846 }, { "epoch": 0.21001785896293934, "grad_norm": 2.578125, "learning_rate": 4.486736344368315e-05, "loss": 0.7788, "step": 11848 }, { "epoch": 0.2100533109985509, "grad_norm": 2.578125, "learning_rate": 4.4865668048454166e-05, "loss": 0.7706, "step": 11850 }, { "epoch": 0.21008876303416246, "grad_norm": 2.546875, "learning_rate": 4.486397240530578e-05, "loss": 0.8222, "step": 11852 }, { "epoch": 0.21012421506977405, "grad_norm": 2.71875, "learning_rate": 4.486227651425916e-05, "loss": 0.7723, "step": 11854 }, { "epoch": 0.2101596671053856, "grad_norm": 2.96875, "learning_rate": 4.486058037533546e-05, "loss": 0.8181, "step": 11856 }, { "epoch": 0.21019511914099717, "grad_norm": 2.6875, "learning_rate": 4.4858883988555854e-05, "loss": 0.7714, "step": 11858 }, { "epoch": 0.21023057117660876, "grad_norm": 2.8125, "learning_rate": 4.485718735394151e-05, "loss": 0.8509, "step": 11860 }, { "epoch": 0.21026602321222032, "grad_norm": 2.953125, "learning_rate": 4.485549047151361e-05, "loss": 0.8047, "step": 11862 }, { "epoch": 0.21030147524783188, "grad_norm": 2.6875, "learning_rate": 4.485379334129333e-05, "loss": 0.809, "step": 11864 }, { "epoch": 0.21033692728344347, "grad_norm": 2.890625, "learning_rate": 4.485209596330183e-05, "loss": 0.7935, "step": 11866 }, { "epoch": 0.21037237931905503, "grad_norm": 2.75, "learning_rate": 4.485039833756032e-05, "loss": 0.7642, "step": 11868 }, { "epoch": 0.2104078313546666, "grad_norm": 3.09375, "learning_rate": 4.484870046408996e-05, "loss": 0.8066, "step": 11870 }, { "epoch": 0.21044328339027815, "grad_norm": 2.625, "learning_rate": 4.4847002342911956e-05, "loss": 0.8243, "step": 11872 }, { "epoch": 0.21047873542588974, "grad_norm": 2.609375, "learning_rate": 4.48453039740475e-05, "loss": 0.8057, "step": 11874 }, { "epoch": 0.2105141874615013, "grad_norm": 2.796875, "learning_rate": 4.4843605357517786e-05, "loss": 0.7966, "step": 11876 }, { "epoch": 0.21054963949711286, "grad_norm": 2.890625, "learning_rate": 4.484190649334401e-05, "loss": 0.8227, "step": 11878 }, { "epoch": 0.21058509153272445, "grad_norm": 2.8125, "learning_rate": 4.484020738154737e-05, "loss": 0.857, "step": 11880 }, { "epoch": 0.210620543568336, "grad_norm": 2.578125, "learning_rate": 4.4838508022149074e-05, "loss": 0.7818, "step": 11882 }, { "epoch": 0.21065599560394757, "grad_norm": 2.6875, "learning_rate": 4.483680841517033e-05, "loss": 0.7862, "step": 11884 }, { "epoch": 0.21069144763955916, "grad_norm": 2.578125, "learning_rate": 4.4835108560632353e-05, "loss": 0.7826, "step": 11886 }, { "epoch": 0.21072689967517072, "grad_norm": 2.75, "learning_rate": 4.483340845855635e-05, "loss": 0.7946, "step": 11888 }, { "epoch": 0.21076235171078228, "grad_norm": 3.03125, "learning_rate": 4.4831708108963546e-05, "loss": 0.8464, "step": 11890 }, { "epoch": 0.21079780374639387, "grad_norm": 3.15625, "learning_rate": 4.483000751187515e-05, "loss": 0.8415, "step": 11892 }, { "epoch": 0.21083325578200543, "grad_norm": 2.765625, "learning_rate": 4.4828306667312385e-05, "loss": 0.8356, "step": 11894 }, { "epoch": 0.210868707817617, "grad_norm": 2.578125, "learning_rate": 4.4826605575296486e-05, "loss": 0.8171, "step": 11896 }, { "epoch": 0.21090415985322858, "grad_norm": 2.59375, "learning_rate": 4.482490423584868e-05, "loss": 0.7678, "step": 11898 }, { "epoch": 0.21093961188884014, "grad_norm": 2.765625, "learning_rate": 4.48232026489902e-05, "loss": 0.8157, "step": 11900 }, { "epoch": 0.2109750639244517, "grad_norm": 2.78125, "learning_rate": 4.482150081474229e-05, "loss": 0.8076, "step": 11902 }, { "epoch": 0.2110105159600633, "grad_norm": 2.984375, "learning_rate": 4.481979873312616e-05, "loss": 0.8188, "step": 11904 }, { "epoch": 0.21104596799567485, "grad_norm": 2.609375, "learning_rate": 4.481809640416308e-05, "loss": 0.7849, "step": 11906 }, { "epoch": 0.2110814200312864, "grad_norm": 2.578125, "learning_rate": 4.481639382787428e-05, "loss": 0.8167, "step": 11908 }, { "epoch": 0.211116872066898, "grad_norm": 2.640625, "learning_rate": 4.481469100428102e-05, "loss": 0.7851, "step": 11910 }, { "epoch": 0.21115232410250956, "grad_norm": 2.21875, "learning_rate": 4.4812987933404535e-05, "loss": 0.7196, "step": 11912 }, { "epoch": 0.21118777613812112, "grad_norm": 2.796875, "learning_rate": 4.481128461526609e-05, "loss": 0.7946, "step": 11914 }, { "epoch": 0.2112232281737327, "grad_norm": 2.828125, "learning_rate": 4.480958104988694e-05, "loss": 0.8444, "step": 11916 }, { "epoch": 0.21125868020934427, "grad_norm": 2.84375, "learning_rate": 4.4807877237288344e-05, "loss": 0.8068, "step": 11918 }, { "epoch": 0.21129413224495583, "grad_norm": 2.84375, "learning_rate": 4.4806173177491564e-05, "loss": 0.7822, "step": 11920 }, { "epoch": 0.21132958428056742, "grad_norm": 2.59375, "learning_rate": 4.480446887051787e-05, "loss": 0.8279, "step": 11922 }, { "epoch": 0.21136503631617898, "grad_norm": 2.734375, "learning_rate": 4.4802764316388536e-05, "loss": 0.7888, "step": 11924 }, { "epoch": 0.21140048835179054, "grad_norm": 2.6875, "learning_rate": 4.480105951512482e-05, "loss": 0.7546, "step": 11926 }, { "epoch": 0.21143594038740213, "grad_norm": 2.59375, "learning_rate": 4.4799354466748e-05, "loss": 0.8287, "step": 11928 }, { "epoch": 0.2114713924230137, "grad_norm": 2.65625, "learning_rate": 4.479764917127938e-05, "loss": 0.807, "step": 11930 }, { "epoch": 0.21150684445862525, "grad_norm": 2.765625, "learning_rate": 4.4795943628740204e-05, "loss": 0.7858, "step": 11932 }, { "epoch": 0.21154229649423684, "grad_norm": 3.03125, "learning_rate": 4.479423783915177e-05, "loss": 0.815, "step": 11934 }, { "epoch": 0.2115777485298484, "grad_norm": 3.09375, "learning_rate": 4.4792531802535386e-05, "loss": 0.8248, "step": 11936 }, { "epoch": 0.21161320056545996, "grad_norm": 2.984375, "learning_rate": 4.4790825518912326e-05, "loss": 0.8082, "step": 11938 }, { "epoch": 0.21164865260107155, "grad_norm": 2.65625, "learning_rate": 4.478911898830388e-05, "loss": 0.7982, "step": 11940 }, { "epoch": 0.2116841046366831, "grad_norm": 2.84375, "learning_rate": 4.478741221073136e-05, "loss": 0.7951, "step": 11942 }, { "epoch": 0.21171955667229467, "grad_norm": 3.046875, "learning_rate": 4.478570518621604e-05, "loss": 0.8596, "step": 11944 }, { "epoch": 0.21175500870790626, "grad_norm": 2.625, "learning_rate": 4.4783997914779254e-05, "loss": 0.7852, "step": 11946 }, { "epoch": 0.21179046074351782, "grad_norm": 2.53125, "learning_rate": 4.47822903964423e-05, "loss": 0.7785, "step": 11948 }, { "epoch": 0.21182591277912938, "grad_norm": 2.578125, "learning_rate": 4.478058263122646e-05, "loss": 0.7551, "step": 11950 }, { "epoch": 0.21186136481474097, "grad_norm": 2.921875, "learning_rate": 4.4778874619153086e-05, "loss": 0.8051, "step": 11952 }, { "epoch": 0.21189681685035253, "grad_norm": 2.859375, "learning_rate": 4.4777166360243474e-05, "loss": 0.7988, "step": 11954 }, { "epoch": 0.2119322688859641, "grad_norm": 2.96875, "learning_rate": 4.4775457854518944e-05, "loss": 0.8211, "step": 11956 }, { "epoch": 0.21196772092157568, "grad_norm": 2.75, "learning_rate": 4.4773749102000814e-05, "loss": 0.7679, "step": 11958 }, { "epoch": 0.21200317295718724, "grad_norm": 2.640625, "learning_rate": 4.477204010271042e-05, "loss": 0.8257, "step": 11960 }, { "epoch": 0.2120386249927988, "grad_norm": 2.609375, "learning_rate": 4.477033085666909e-05, "loss": 0.8224, "step": 11962 }, { "epoch": 0.21207407702841038, "grad_norm": 2.40625, "learning_rate": 4.4768621363898135e-05, "loss": 0.7642, "step": 11964 }, { "epoch": 0.21210952906402195, "grad_norm": 2.9375, "learning_rate": 4.47669116244189e-05, "loss": 0.7883, "step": 11966 }, { "epoch": 0.2121449810996335, "grad_norm": 2.65625, "learning_rate": 4.476520163825274e-05, "loss": 0.8087, "step": 11968 }, { "epoch": 0.2121804331352451, "grad_norm": 2.9375, "learning_rate": 4.476349140542098e-05, "loss": 0.8171, "step": 11970 }, { "epoch": 0.21221588517085666, "grad_norm": 2.890625, "learning_rate": 4.476178092594495e-05, "loss": 0.7746, "step": 11972 }, { "epoch": 0.21225133720646822, "grad_norm": 2.921875, "learning_rate": 4.4760070199846016e-05, "loss": 0.8099, "step": 11974 }, { "epoch": 0.2122867892420798, "grad_norm": 2.65625, "learning_rate": 4.475835922714552e-05, "loss": 0.8398, "step": 11976 }, { "epoch": 0.21232224127769136, "grad_norm": 2.546875, "learning_rate": 4.475664800786482e-05, "loss": 0.7722, "step": 11978 }, { "epoch": 0.21235769331330293, "grad_norm": 2.453125, "learning_rate": 4.475493654202527e-05, "loss": 0.7342, "step": 11980 }, { "epoch": 0.2123931453489145, "grad_norm": 2.765625, "learning_rate": 4.475322482964823e-05, "loss": 0.7683, "step": 11982 }, { "epoch": 0.21242859738452607, "grad_norm": 2.75, "learning_rate": 4.475151287075505e-05, "loss": 0.8195, "step": 11984 }, { "epoch": 0.21246404942013764, "grad_norm": 2.640625, "learning_rate": 4.4749800665367104e-05, "loss": 0.8022, "step": 11986 }, { "epoch": 0.21249950145574922, "grad_norm": 2.84375, "learning_rate": 4.474808821350576e-05, "loss": 0.8183, "step": 11988 }, { "epoch": 0.21253495349136078, "grad_norm": 2.8125, "learning_rate": 4.474637551519239e-05, "loss": 0.8333, "step": 11990 }, { "epoch": 0.21257040552697234, "grad_norm": 2.703125, "learning_rate": 4.474466257044837e-05, "loss": 0.7638, "step": 11992 }, { "epoch": 0.21260585756258393, "grad_norm": 2.515625, "learning_rate": 4.474294937929506e-05, "loss": 0.7644, "step": 11994 }, { "epoch": 0.2126413095981955, "grad_norm": 2.875, "learning_rate": 4.474123594175387e-05, "loss": 0.8115, "step": 11996 }, { "epoch": 0.21267676163380705, "grad_norm": 3.25, "learning_rate": 4.4739522257846154e-05, "loss": 0.8353, "step": 11998 }, { "epoch": 0.21271221366941864, "grad_norm": 2.578125, "learning_rate": 4.4737808327593325e-05, "loss": 0.7944, "step": 12000 } ], "logging_steps": 2, "max_steps": 56414, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 1.9936181361265607e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }