{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9993355481727575, "eval_steps": 500, "global_step": 1316, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007593735168485999, "grad_norm": 4.0, "learning_rate": 2.0000000000000002e-07, "loss": 1.9271, "step": 1 }, { "epoch": 0.0015187470336971997, "grad_norm": 3.359375, "learning_rate": 4.0000000000000003e-07, "loss": 1.6784, "step": 2 }, { "epoch": 0.0022781205505457997, "grad_norm": 3.1875, "learning_rate": 6.000000000000001e-07, "loss": 1.7025, "step": 3 }, { "epoch": 0.0030374940673943995, "grad_norm": 3.640625, "learning_rate": 8.000000000000001e-07, "loss": 1.8366, "step": 4 }, { "epoch": 0.0037968675842429997, "grad_norm": 3.859375, "learning_rate": 1.0000000000000002e-06, "loss": 1.8837, "step": 5 }, { "epoch": 0.0045562411010915994, "grad_norm": 3.546875, "learning_rate": 1.2000000000000002e-06, "loss": 1.7767, "step": 6 }, { "epoch": 0.0053156146179402, "grad_norm": 3.21875, "learning_rate": 1.4000000000000001e-06, "loss": 1.6786, "step": 7 }, { "epoch": 0.006074988134788799, "grad_norm": 3.96875, "learning_rate": 1.6000000000000001e-06, "loss": 1.873, "step": 8 }, { "epoch": 0.006834361651637399, "grad_norm": 3.703125, "learning_rate": 1.8000000000000001e-06, "loss": 1.8584, "step": 9 }, { "epoch": 0.007593735168485999, "grad_norm": 3.40625, "learning_rate": 2.0000000000000003e-06, "loss": 1.7817, "step": 10 }, { "epoch": 0.0083531086853346, "grad_norm": 3.25, "learning_rate": 2.2e-06, "loss": 1.7579, "step": 11 }, { "epoch": 0.009112482202183199, "grad_norm": 3.953125, "learning_rate": 2.4000000000000003e-06, "loss": 1.9496, "step": 12 }, { "epoch": 0.009871855719031798, "grad_norm": 3.515625, "learning_rate": 2.6e-06, "loss": 1.7234, "step": 13 }, { "epoch": 0.0106312292358804, "grad_norm": 4.375, "learning_rate": 2.8000000000000003e-06, "loss": 2.003, "step": 14 }, { "epoch": 0.011390602752728999, "grad_norm": 4.125, "learning_rate": 3e-06, "loss": 1.8606, "step": 15 }, { "epoch": 0.012149976269577598, "grad_norm": 3.71875, "learning_rate": 3.2000000000000003e-06, "loss": 1.8039, "step": 16 }, { "epoch": 0.012909349786426199, "grad_norm": 3.96875, "learning_rate": 3.4000000000000005e-06, "loss": 1.936, "step": 17 }, { "epoch": 0.013668723303274798, "grad_norm": 3.453125, "learning_rate": 3.6000000000000003e-06, "loss": 1.7465, "step": 18 }, { "epoch": 0.014428096820123398, "grad_norm": 3.5625, "learning_rate": 3.8000000000000005e-06, "loss": 1.659, "step": 19 }, { "epoch": 0.015187470336971999, "grad_norm": 3.59375, "learning_rate": 4.000000000000001e-06, "loss": 1.7962, "step": 20 }, { "epoch": 0.015946843853820596, "grad_norm": 3.375, "learning_rate": 4.2000000000000004e-06, "loss": 1.6802, "step": 21 }, { "epoch": 0.0167062173706692, "grad_norm": 3.875, "learning_rate": 4.4e-06, "loss": 1.8444, "step": 22 }, { "epoch": 0.0174655908875178, "grad_norm": 3.53125, "learning_rate": 4.600000000000001e-06, "loss": 1.7685, "step": 23 }, { "epoch": 0.018224964404366398, "grad_norm": 3.421875, "learning_rate": 4.800000000000001e-06, "loss": 1.6768, "step": 24 }, { "epoch": 0.018984337921214997, "grad_norm": 3.6875, "learning_rate": 5e-06, "loss": 1.8115, "step": 25 }, { "epoch": 0.019743711438063596, "grad_norm": 3.671875, "learning_rate": 5.2e-06, "loss": 1.7844, "step": 26 }, { "epoch": 0.020503084954912196, "grad_norm": 3.75, "learning_rate": 5.400000000000001e-06, "loss": 1.8313, "step": 27 }, { "epoch": 0.0212624584717608, "grad_norm": 3.953125, "learning_rate": 5.600000000000001e-06, "loss": 1.8675, "step": 28 }, { "epoch": 0.022021831988609398, "grad_norm": 3.71875, "learning_rate": 5.8e-06, "loss": 1.7895, "step": 29 }, { "epoch": 0.022781205505457997, "grad_norm": 4.03125, "learning_rate": 6e-06, "loss": 1.8702, "step": 30 }, { "epoch": 0.023540579022306597, "grad_norm": 3.640625, "learning_rate": 6.200000000000001e-06, "loss": 1.6666, "step": 31 }, { "epoch": 0.024299952539155196, "grad_norm": 4.125, "learning_rate": 6.4000000000000006e-06, "loss": 1.9699, "step": 32 }, { "epoch": 0.025059326056003795, "grad_norm": 3.578125, "learning_rate": 6.600000000000001e-06, "loss": 1.6828, "step": 33 }, { "epoch": 0.025818699572852398, "grad_norm": 3.65625, "learning_rate": 6.800000000000001e-06, "loss": 1.8098, "step": 34 }, { "epoch": 0.026578073089700997, "grad_norm": 3.484375, "learning_rate": 7e-06, "loss": 1.6943, "step": 35 }, { "epoch": 0.027337446606549597, "grad_norm": 3.40625, "learning_rate": 7.2000000000000005e-06, "loss": 1.6835, "step": 36 }, { "epoch": 0.028096820123398196, "grad_norm": 3.53125, "learning_rate": 7.4e-06, "loss": 1.7776, "step": 37 }, { "epoch": 0.028856193640246795, "grad_norm": 3.828125, "learning_rate": 7.600000000000001e-06, "loss": 1.8554, "step": 38 }, { "epoch": 0.029615567157095395, "grad_norm": 4.28125, "learning_rate": 7.800000000000002e-06, "loss": 2.0373, "step": 39 }, { "epoch": 0.030374940673943997, "grad_norm": 3.4375, "learning_rate": 8.000000000000001e-06, "loss": 1.7848, "step": 40 }, { "epoch": 0.031134314190792597, "grad_norm": 3.71875, "learning_rate": 8.2e-06, "loss": 1.8114, "step": 41 }, { "epoch": 0.03189368770764119, "grad_norm": 4.21875, "learning_rate": 8.400000000000001e-06, "loss": 1.9938, "step": 42 }, { "epoch": 0.0326530612244898, "grad_norm": 3.515625, "learning_rate": 8.6e-06, "loss": 1.6792, "step": 43 }, { "epoch": 0.0334124347413384, "grad_norm": 3.6875, "learning_rate": 8.8e-06, "loss": 1.8027, "step": 44 }, { "epoch": 0.034171808258187, "grad_norm": 4.0625, "learning_rate": 9e-06, "loss": 1.8655, "step": 45 }, { "epoch": 0.0349311817750356, "grad_norm": 3.75, "learning_rate": 9.200000000000002e-06, "loss": 1.8402, "step": 46 }, { "epoch": 0.035690555291884196, "grad_norm": 3.75, "learning_rate": 9.4e-06, "loss": 1.8507, "step": 47 }, { "epoch": 0.036449928808732796, "grad_norm": 3.515625, "learning_rate": 9.600000000000001e-06, "loss": 1.7811, "step": 48 }, { "epoch": 0.037209302325581395, "grad_norm": 3.796875, "learning_rate": 9.800000000000001e-06, "loss": 1.873, "step": 49 }, { "epoch": 0.037968675842429994, "grad_norm": 3.40625, "learning_rate": 1e-05, "loss": 1.7869, "step": 50 }, { "epoch": 0.038728049359278593, "grad_norm": 3.890625, "learning_rate": 1.02e-05, "loss": 1.847, "step": 51 }, { "epoch": 0.03948742287612719, "grad_norm": 4.0, "learning_rate": 1.04e-05, "loss": 1.9245, "step": 52 }, { "epoch": 0.04024679639297579, "grad_norm": 4.09375, "learning_rate": 1.0600000000000002e-05, "loss": 2.0196, "step": 53 }, { "epoch": 0.04100616990982439, "grad_norm": 3.609375, "learning_rate": 1.0800000000000002e-05, "loss": 1.7158, "step": 54 }, { "epoch": 0.041765543426673, "grad_norm": 3.453125, "learning_rate": 1.1000000000000001e-05, "loss": 1.7212, "step": 55 }, { "epoch": 0.0425249169435216, "grad_norm": 4.0625, "learning_rate": 1.1200000000000001e-05, "loss": 1.9365, "step": 56 }, { "epoch": 0.043284290460370196, "grad_norm": 3.65625, "learning_rate": 1.14e-05, "loss": 1.8307, "step": 57 }, { "epoch": 0.044043663977218796, "grad_norm": 3.296875, "learning_rate": 1.16e-05, "loss": 1.6974, "step": 58 }, { "epoch": 0.044803037494067395, "grad_norm": 3.328125, "learning_rate": 1.18e-05, "loss": 1.6723, "step": 59 }, { "epoch": 0.045562411010915994, "grad_norm": 3.25, "learning_rate": 1.2e-05, "loss": 1.6466, "step": 60 }, { "epoch": 0.046321784527764594, "grad_norm": 3.59375, "learning_rate": 1.22e-05, "loss": 1.7542, "step": 61 }, { "epoch": 0.04708115804461319, "grad_norm": 3.546875, "learning_rate": 1.2400000000000002e-05, "loss": 1.8449, "step": 62 }, { "epoch": 0.04784053156146179, "grad_norm": 3.140625, "learning_rate": 1.2600000000000001e-05, "loss": 1.6788, "step": 63 }, { "epoch": 0.04859990507831039, "grad_norm": 3.421875, "learning_rate": 1.2800000000000001e-05, "loss": 1.8164, "step": 64 }, { "epoch": 0.04935927859515899, "grad_norm": 3.359375, "learning_rate": 1.3000000000000001e-05, "loss": 1.7674, "step": 65 }, { "epoch": 0.05011865211200759, "grad_norm": 3.296875, "learning_rate": 1.3200000000000002e-05, "loss": 1.711, "step": 66 }, { "epoch": 0.0508780256288562, "grad_norm": 3.109375, "learning_rate": 1.3400000000000002e-05, "loss": 1.6482, "step": 67 }, { "epoch": 0.051637399145704796, "grad_norm": 3.1875, "learning_rate": 1.3600000000000002e-05, "loss": 1.6734, "step": 68 }, { "epoch": 0.052396772662553395, "grad_norm": 3.546875, "learning_rate": 1.38e-05, "loss": 1.793, "step": 69 }, { "epoch": 0.053156146179401995, "grad_norm": 3.65625, "learning_rate": 1.4e-05, "loss": 1.8647, "step": 70 }, { "epoch": 0.053915519696250594, "grad_norm": 3.53125, "learning_rate": 1.4200000000000001e-05, "loss": 1.8249, "step": 71 }, { "epoch": 0.05467489321309919, "grad_norm": 3.796875, "learning_rate": 1.4400000000000001e-05, "loss": 1.88, "step": 72 }, { "epoch": 0.05543426672994779, "grad_norm": 3.984375, "learning_rate": 1.46e-05, "loss": 1.9618, "step": 73 }, { "epoch": 0.05619364024679639, "grad_norm": 3.46875, "learning_rate": 1.48e-05, "loss": 1.8017, "step": 74 }, { "epoch": 0.05695301376364499, "grad_norm": 3.40625, "learning_rate": 1.5000000000000002e-05, "loss": 1.6894, "step": 75 }, { "epoch": 0.05771238728049359, "grad_norm": 3.375, "learning_rate": 1.5200000000000002e-05, "loss": 1.7385, "step": 76 }, { "epoch": 0.05847176079734219, "grad_norm": 3.21875, "learning_rate": 1.54e-05, "loss": 1.631, "step": 77 }, { "epoch": 0.05923113431419079, "grad_norm": 3.421875, "learning_rate": 1.5600000000000003e-05, "loss": 1.7345, "step": 78 }, { "epoch": 0.059990507831039395, "grad_norm": 3.25, "learning_rate": 1.58e-05, "loss": 1.6198, "step": 79 }, { "epoch": 0.060749881347887995, "grad_norm": 3.703125, "learning_rate": 1.6000000000000003e-05, "loss": 1.7695, "step": 80 }, { "epoch": 0.061509254864736594, "grad_norm": 3.546875, "learning_rate": 1.62e-05, "loss": 1.7828, "step": 81 }, { "epoch": 0.062268628381585193, "grad_norm": 4.0625, "learning_rate": 1.64e-05, "loss": 1.8845, "step": 82 }, { "epoch": 0.06302800189843379, "grad_norm": 3.1875, "learning_rate": 1.66e-05, "loss": 1.698, "step": 83 }, { "epoch": 0.06378737541528239, "grad_norm": 3.28125, "learning_rate": 1.6800000000000002e-05, "loss": 1.6712, "step": 84 }, { "epoch": 0.064546748932131, "grad_norm": 3.375, "learning_rate": 1.7e-05, "loss": 1.7673, "step": 85 }, { "epoch": 0.0653061224489796, "grad_norm": 3.609375, "learning_rate": 1.72e-05, "loss": 1.8005, "step": 86 }, { "epoch": 0.0660654959658282, "grad_norm": 3.5, "learning_rate": 1.7400000000000003e-05, "loss": 1.7674, "step": 87 }, { "epoch": 0.0668248694826768, "grad_norm": 3.484375, "learning_rate": 1.76e-05, "loss": 1.7665, "step": 88 }, { "epoch": 0.0675842429995254, "grad_norm": 3.578125, "learning_rate": 1.7800000000000002e-05, "loss": 1.7815, "step": 89 }, { "epoch": 0.068343616516374, "grad_norm": 3.25, "learning_rate": 1.8e-05, "loss": 1.6855, "step": 90 }, { "epoch": 0.0691029900332226, "grad_norm": 3.1875, "learning_rate": 1.8200000000000002e-05, "loss": 1.6461, "step": 91 }, { "epoch": 0.0698623635500712, "grad_norm": 3.09375, "learning_rate": 1.8400000000000003e-05, "loss": 1.6388, "step": 92 }, { "epoch": 0.07062173706691979, "grad_norm": 3.46875, "learning_rate": 1.86e-05, "loss": 1.7505, "step": 93 }, { "epoch": 0.07138111058376839, "grad_norm": 2.84375, "learning_rate": 1.88e-05, "loss": 1.6048, "step": 94 }, { "epoch": 0.07214048410061699, "grad_norm": 3.34375, "learning_rate": 1.9e-05, "loss": 1.6765, "step": 95 }, { "epoch": 0.07289985761746559, "grad_norm": 3.171875, "learning_rate": 1.9200000000000003e-05, "loss": 1.634, "step": 96 }, { "epoch": 0.07365923113431419, "grad_norm": 3.0625, "learning_rate": 1.94e-05, "loss": 1.6586, "step": 97 }, { "epoch": 0.07441860465116279, "grad_norm": 3.140625, "learning_rate": 1.9600000000000002e-05, "loss": 1.6871, "step": 98 }, { "epoch": 0.07517797816801139, "grad_norm": 3.703125, "learning_rate": 1.98e-05, "loss": 1.7815, "step": 99 }, { "epoch": 0.07593735168485999, "grad_norm": 3.671875, "learning_rate": 2e-05, "loss": 1.8349, "step": 100 }, { "epoch": 0.07669672520170859, "grad_norm": 3.625, "learning_rate": 1.9999966626453647e-05, "loss": 1.8303, "step": 101 }, { "epoch": 0.07745609871855719, "grad_norm": 2.921875, "learning_rate": 1.9999866506037346e-05, "loss": 1.5889, "step": 102 }, { "epoch": 0.07821547223540579, "grad_norm": 2.984375, "learning_rate": 1.9999699639419373e-05, "loss": 1.5841, "step": 103 }, { "epoch": 0.07897484575225439, "grad_norm": 2.96875, "learning_rate": 1.999946602771351e-05, "loss": 1.6492, "step": 104 }, { "epoch": 0.07973421926910298, "grad_norm": 3.203125, "learning_rate": 1.999916567247905e-05, "loss": 1.6682, "step": 105 }, { "epoch": 0.08049359278595158, "grad_norm": 2.6875, "learning_rate": 1.9998798575720776e-05, "loss": 1.522, "step": 106 }, { "epoch": 0.08125296630280018, "grad_norm": 3.171875, "learning_rate": 1.9998364739888954e-05, "loss": 1.6903, "step": 107 }, { "epoch": 0.08201233981964878, "grad_norm": 2.765625, "learning_rate": 1.9997864167879313e-05, "loss": 1.5823, "step": 108 }, { "epoch": 0.0827717133364974, "grad_norm": 2.953125, "learning_rate": 1.9997296863033018e-05, "loss": 1.6105, "step": 109 }, { "epoch": 0.083531086853346, "grad_norm": 2.90625, "learning_rate": 1.9996662829136676e-05, "loss": 1.5877, "step": 110 }, { "epoch": 0.0842904603701946, "grad_norm": 3.515625, "learning_rate": 1.999596207042227e-05, "loss": 1.7453, "step": 111 }, { "epoch": 0.0850498338870432, "grad_norm": 3.3125, "learning_rate": 1.999519459156716e-05, "loss": 1.7015, "step": 112 }, { "epoch": 0.0858092074038918, "grad_norm": 3.09375, "learning_rate": 1.999436039769405e-05, "loss": 1.6773, "step": 113 }, { "epoch": 0.08656858092074039, "grad_norm": 2.84375, "learning_rate": 1.9993459494370938e-05, "loss": 1.6287, "step": 114 }, { "epoch": 0.08732795443758899, "grad_norm": 3.40625, "learning_rate": 1.9992491887611095e-05, "loss": 1.7393, "step": 115 }, { "epoch": 0.08808732795443759, "grad_norm": 4.03125, "learning_rate": 1.999145758387301e-05, "loss": 1.9157, "step": 116 }, { "epoch": 0.08884670147128619, "grad_norm": 2.734375, "learning_rate": 1.9990356590060363e-05, "loss": 1.6195, "step": 117 }, { "epoch": 0.08960607498813479, "grad_norm": 2.71875, "learning_rate": 1.998918891352197e-05, "loss": 1.6428, "step": 118 }, { "epoch": 0.09036544850498339, "grad_norm": 3.1875, "learning_rate": 1.9987954562051724e-05, "loss": 1.6772, "step": 119 }, { "epoch": 0.09112482202183199, "grad_norm": 2.84375, "learning_rate": 1.998665354388857e-05, "loss": 1.5625, "step": 120 }, { "epoch": 0.09188419553868059, "grad_norm": 2.984375, "learning_rate": 1.9985285867716423e-05, "loss": 1.6915, "step": 121 }, { "epoch": 0.09264356905552919, "grad_norm": 2.828125, "learning_rate": 1.9983851542664125e-05, "loss": 1.6413, "step": 122 }, { "epoch": 0.09340294257237779, "grad_norm": 2.65625, "learning_rate": 1.998235057830538e-05, "loss": 1.5844, "step": 123 }, { "epoch": 0.09416231608922639, "grad_norm": 2.59375, "learning_rate": 1.9980782984658682e-05, "loss": 1.561, "step": 124 }, { "epoch": 0.09492168960607499, "grad_norm": 2.921875, "learning_rate": 1.997914877218727e-05, "loss": 1.6305, "step": 125 }, { "epoch": 0.09568106312292358, "grad_norm": 2.25, "learning_rate": 1.9977447951799035e-05, "loss": 1.4409, "step": 126 }, { "epoch": 0.09644043663977218, "grad_norm": 2.484375, "learning_rate": 1.9975680534846457e-05, "loss": 1.5723, "step": 127 }, { "epoch": 0.09719981015662078, "grad_norm": 3.453125, "learning_rate": 1.9973846533126533e-05, "loss": 1.7338, "step": 128 }, { "epoch": 0.09795918367346938, "grad_norm": 2.703125, "learning_rate": 1.997194595888069e-05, "loss": 1.6383, "step": 129 }, { "epoch": 0.09871855719031798, "grad_norm": 2.4375, "learning_rate": 1.996997882479471e-05, "loss": 1.5887, "step": 130 }, { "epoch": 0.09947793070716658, "grad_norm": 2.4375, "learning_rate": 1.9967945143998636e-05, "loss": 1.5525, "step": 131 }, { "epoch": 0.10023730422401518, "grad_norm": 2.359375, "learning_rate": 1.99658449300667e-05, "loss": 1.4995, "step": 132 }, { "epoch": 0.1009966777408638, "grad_norm": 2.140625, "learning_rate": 1.996367819701722e-05, "loss": 1.5085, "step": 133 }, { "epoch": 0.1017560512577124, "grad_norm": 2.46875, "learning_rate": 1.996144495931251e-05, "loss": 1.5708, "step": 134 }, { "epoch": 0.10251542477456099, "grad_norm": 2.71875, "learning_rate": 1.995914523185878e-05, "loss": 1.623, "step": 135 }, { "epoch": 0.10327479829140959, "grad_norm": 2.1875, "learning_rate": 1.9956779030006038e-05, "loss": 1.5378, "step": 136 }, { "epoch": 0.10403417180825819, "grad_norm": 2.5, "learning_rate": 1.9954346369548002e-05, "loss": 1.5672, "step": 137 }, { "epoch": 0.10479354532510679, "grad_norm": 2.078125, "learning_rate": 1.995184726672197e-05, "loss": 1.5316, "step": 138 }, { "epoch": 0.10555291884195539, "grad_norm": 2.25, "learning_rate": 1.994928173820873e-05, "loss": 1.5776, "step": 139 }, { "epoch": 0.10631229235880399, "grad_norm": 2.34375, "learning_rate": 1.994664980113243e-05, "loss": 1.6079, "step": 140 }, { "epoch": 0.10707166587565259, "grad_norm": 2.296875, "learning_rate": 1.9943951473060488e-05, "loss": 1.5903, "step": 141 }, { "epoch": 0.10783103939250119, "grad_norm": 2.53125, "learning_rate": 1.9941186772003463e-05, "loss": 1.6456, "step": 142 }, { "epoch": 0.10859041290934979, "grad_norm": 2.171875, "learning_rate": 1.9938355716414933e-05, "loss": 1.5053, "step": 143 }, { "epoch": 0.10934978642619839, "grad_norm": 2.09375, "learning_rate": 1.9935458325191365e-05, "loss": 1.5925, "step": 144 }, { "epoch": 0.11010915994304699, "grad_norm": 2.171875, "learning_rate": 1.9932494617672007e-05, "loss": 1.6033, "step": 145 }, { "epoch": 0.11086853345989559, "grad_norm": 2.046875, "learning_rate": 1.992946461363874e-05, "loss": 1.553, "step": 146 }, { "epoch": 0.11162790697674418, "grad_norm": 2.21875, "learning_rate": 1.9926368333315964e-05, "loss": 1.5962, "step": 147 }, { "epoch": 0.11238728049359278, "grad_norm": 2.203125, "learning_rate": 1.992320579737045e-05, "loss": 1.6159, "step": 148 }, { "epoch": 0.11314665401044138, "grad_norm": 2.0625, "learning_rate": 1.991997702691121e-05, "loss": 1.4709, "step": 149 }, { "epoch": 0.11390602752728998, "grad_norm": 2.375, "learning_rate": 1.9916682043489337e-05, "loss": 1.6076, "step": 150 }, { "epoch": 0.11466540104413858, "grad_norm": 1.984375, "learning_rate": 1.9913320869097897e-05, "loss": 1.4864, "step": 151 }, { "epoch": 0.11542477456098718, "grad_norm": 1.7734375, "learning_rate": 1.9909893526171745e-05, "loss": 1.4559, "step": 152 }, { "epoch": 0.11618414807783578, "grad_norm": 1.9921875, "learning_rate": 1.990640003758741e-05, "loss": 1.5585, "step": 153 }, { "epoch": 0.11694352159468438, "grad_norm": 1.796875, "learning_rate": 1.9902840426662897e-05, "loss": 1.4656, "step": 154 }, { "epoch": 0.11770289511153298, "grad_norm": 1.9296875, "learning_rate": 1.9899214717157588e-05, "loss": 1.5357, "step": 155 }, { "epoch": 0.11846226862838158, "grad_norm": 1.9296875, "learning_rate": 1.9895522933272028e-05, "loss": 1.5101, "step": 156 }, { "epoch": 0.11922164214523019, "grad_norm": 1.9140625, "learning_rate": 1.989176509964781e-05, "loss": 1.5287, "step": 157 }, { "epoch": 0.11998101566207879, "grad_norm": 1.9765625, "learning_rate": 1.988794124136738e-05, "loss": 1.6104, "step": 158 }, { "epoch": 0.12074038917892739, "grad_norm": 1.9296875, "learning_rate": 1.9884051383953876e-05, "loss": 1.5313, "step": 159 }, { "epoch": 0.12149976269577599, "grad_norm": 1.703125, "learning_rate": 1.9880095553370967e-05, "loss": 1.4602, "step": 160 }, { "epoch": 0.12225913621262459, "grad_norm": 1.59375, "learning_rate": 1.9876073776022676e-05, "loss": 1.4071, "step": 161 }, { "epoch": 0.12301850972947319, "grad_norm": 1.6953125, "learning_rate": 1.987198607875319e-05, "loss": 1.4707, "step": 162 }, { "epoch": 0.12377788324632179, "grad_norm": 1.8125, "learning_rate": 1.9867832488846702e-05, "loss": 1.4729, "step": 163 }, { "epoch": 0.12453725676317039, "grad_norm": 1.6328125, "learning_rate": 1.9863613034027224e-05, "loss": 1.4967, "step": 164 }, { "epoch": 0.12529663028001897, "grad_norm": 1.6171875, "learning_rate": 1.9859327742458387e-05, "loss": 1.4463, "step": 165 }, { "epoch": 0.12605600379686757, "grad_norm": 1.65625, "learning_rate": 1.985497664274326e-05, "loss": 1.4763, "step": 166 }, { "epoch": 0.12681537731371617, "grad_norm": 1.6953125, "learning_rate": 1.9850559763924176e-05, "loss": 1.5175, "step": 167 }, { "epoch": 0.12757475083056477, "grad_norm": 1.609375, "learning_rate": 1.9846077135482513e-05, "loss": 1.4363, "step": 168 }, { "epoch": 0.1283341243474134, "grad_norm": 1.4609375, "learning_rate": 1.9841528787338513e-05, "loss": 1.3922, "step": 169 }, { "epoch": 0.129093497864262, "grad_norm": 1.578125, "learning_rate": 1.983691474985108e-05, "loss": 1.4937, "step": 170 }, { "epoch": 0.1298528713811106, "grad_norm": 1.421875, "learning_rate": 1.983223505381757e-05, "loss": 1.4381, "step": 171 }, { "epoch": 0.1306122448979592, "grad_norm": 1.6484375, "learning_rate": 1.9827489730473597e-05, "loss": 1.5019, "step": 172 }, { "epoch": 0.1313716184148078, "grad_norm": 1.4140625, "learning_rate": 1.982267881149281e-05, "loss": 1.3798, "step": 173 }, { "epoch": 0.1321309919316564, "grad_norm": 1.6796875, "learning_rate": 1.9817802328986696e-05, "loss": 1.5623, "step": 174 }, { "epoch": 0.132890365448505, "grad_norm": 1.5390625, "learning_rate": 1.9812860315504362e-05, "loss": 1.4497, "step": 175 }, { "epoch": 0.1336497389653536, "grad_norm": 1.4921875, "learning_rate": 1.9807852804032306e-05, "loss": 1.4442, "step": 176 }, { "epoch": 0.1344091124822022, "grad_norm": 1.734375, "learning_rate": 1.9802779827994214e-05, "loss": 1.5552, "step": 177 }, { "epoch": 0.1351684859990508, "grad_norm": 1.421875, "learning_rate": 1.9797641421250725e-05, "loss": 1.4411, "step": 178 }, { "epoch": 0.1359278595158994, "grad_norm": 1.3515625, "learning_rate": 1.9792437618099215e-05, "loss": 1.4569, "step": 179 }, { "epoch": 0.136687233032748, "grad_norm": 1.40625, "learning_rate": 1.9787168453273546e-05, "loss": 1.4257, "step": 180 }, { "epoch": 0.1374466065495966, "grad_norm": 1.359375, "learning_rate": 1.9781833961943874e-05, "loss": 1.417, "step": 181 }, { "epoch": 0.1382059800664452, "grad_norm": 1.5625, "learning_rate": 1.9776434179716365e-05, "loss": 1.4831, "step": 182 }, { "epoch": 0.1389653535832938, "grad_norm": 1.265625, "learning_rate": 1.977096914263301e-05, "loss": 1.3927, "step": 183 }, { "epoch": 0.1397247271001424, "grad_norm": 1.3671875, "learning_rate": 1.9765438887171327e-05, "loss": 1.431, "step": 184 }, { "epoch": 0.140484100616991, "grad_norm": 1.4609375, "learning_rate": 1.975984345024418e-05, "loss": 1.4798, "step": 185 }, { "epoch": 0.14124347413383959, "grad_norm": 1.484375, "learning_rate": 1.975418286919947e-05, "loss": 1.4939, "step": 186 }, { "epoch": 0.14200284765068819, "grad_norm": 1.390625, "learning_rate": 1.9748457181819937e-05, "loss": 1.4784, "step": 187 }, { "epoch": 0.14276222116753678, "grad_norm": 1.2421875, "learning_rate": 1.9742666426322877e-05, "loss": 1.3947, "step": 188 }, { "epoch": 0.14352159468438538, "grad_norm": 1.2109375, "learning_rate": 1.97368106413599e-05, "loss": 1.3783, "step": 189 }, { "epoch": 0.14428096820123398, "grad_norm": 1.0859375, "learning_rate": 1.9730889866016668e-05, "loss": 1.3301, "step": 190 }, { "epoch": 0.14504034171808258, "grad_norm": 1.1953125, "learning_rate": 1.9724904139812636e-05, "loss": 1.4403, "step": 191 }, { "epoch": 0.14579971523493118, "grad_norm": 1.1484375, "learning_rate": 1.9718853502700783e-05, "loss": 1.4301, "step": 192 }, { "epoch": 0.14655908875177978, "grad_norm": 1.0859375, "learning_rate": 1.9712737995067357e-05, "loss": 1.3473, "step": 193 }, { "epoch": 0.14731846226862838, "grad_norm": 1.078125, "learning_rate": 1.970655765773159e-05, "loss": 1.3557, "step": 194 }, { "epoch": 0.14807783578547698, "grad_norm": 1.0703125, "learning_rate": 1.9700312531945444e-05, "loss": 1.3979, "step": 195 }, { "epoch": 0.14883720930232558, "grad_norm": 1.375, "learning_rate": 1.9694002659393306e-05, "loss": 1.5305, "step": 196 }, { "epoch": 0.14959658281917418, "grad_norm": 1.1875, "learning_rate": 1.9687628082191748e-05, "loss": 1.5078, "step": 197 }, { "epoch": 0.15035595633602278, "grad_norm": 1.265625, "learning_rate": 1.9681188842889222e-05, "loss": 1.4817, "step": 198 }, { "epoch": 0.15111532985287138, "grad_norm": 1.0234375, "learning_rate": 1.9674684984465774e-05, "loss": 1.3599, "step": 199 }, { "epoch": 0.15187470336971998, "grad_norm": 1.0625, "learning_rate": 1.966811655033277e-05, "loss": 1.384, "step": 200 }, { "epoch": 0.15263407688656858, "grad_norm": 1.3515625, "learning_rate": 1.9661483584332592e-05, "loss": 1.514, "step": 201 }, { "epoch": 0.15339345040341718, "grad_norm": 1.1328125, "learning_rate": 1.9654786130738372e-05, "loss": 1.3908, "step": 202 }, { "epoch": 0.15415282392026577, "grad_norm": 1.015625, "learning_rate": 1.9648024234253654e-05, "loss": 1.336, "step": 203 }, { "epoch": 0.15491219743711437, "grad_norm": 1.0078125, "learning_rate": 1.9641197940012136e-05, "loss": 1.3723, "step": 204 }, { "epoch": 0.15567157095396297, "grad_norm": 1.015625, "learning_rate": 1.963430729357735e-05, "loss": 1.3784, "step": 205 }, { "epoch": 0.15643094447081157, "grad_norm": 0.9375, "learning_rate": 1.9627352340942355e-05, "loss": 1.3541, "step": 206 }, { "epoch": 0.15719031798766017, "grad_norm": 1.0546875, "learning_rate": 1.9620333128529436e-05, "loss": 1.3969, "step": 207 }, { "epoch": 0.15794969150450877, "grad_norm": 1.1953125, "learning_rate": 1.96132497031898e-05, "loss": 1.4611, "step": 208 }, { "epoch": 0.15870906502135737, "grad_norm": 0.96484375, "learning_rate": 1.9606102112203243e-05, "loss": 1.3631, "step": 209 }, { "epoch": 0.15946843853820597, "grad_norm": 0.9375, "learning_rate": 1.9598890403277867e-05, "loss": 1.3605, "step": 210 }, { "epoch": 0.16022781205505457, "grad_norm": 1.078125, "learning_rate": 1.9591614624549724e-05, "loss": 1.4721, "step": 211 }, { "epoch": 0.16098718557190317, "grad_norm": 1.0234375, "learning_rate": 1.958427482458253e-05, "loss": 1.429, "step": 212 }, { "epoch": 0.16174655908875177, "grad_norm": 0.94921875, "learning_rate": 1.9576871052367307e-05, "loss": 1.3866, "step": 213 }, { "epoch": 0.16250593260560037, "grad_norm": 0.9140625, "learning_rate": 1.956940335732209e-05, "loss": 1.4103, "step": 214 }, { "epoch": 0.16326530612244897, "grad_norm": 0.953125, "learning_rate": 1.956187178929157e-05, "loss": 1.3547, "step": 215 }, { "epoch": 0.16402467963929757, "grad_norm": 1.0078125, "learning_rate": 1.9554276398546767e-05, "loss": 1.4262, "step": 216 }, { "epoch": 0.1647840531561462, "grad_norm": 0.89453125, "learning_rate": 1.9546617235784716e-05, "loss": 1.3589, "step": 217 }, { "epoch": 0.1655434266729948, "grad_norm": 0.91796875, "learning_rate": 1.95388943521281e-05, "loss": 1.3694, "step": 218 }, { "epoch": 0.1663028001898434, "grad_norm": 0.859375, "learning_rate": 1.953110779912492e-05, "loss": 1.3515, "step": 219 }, { "epoch": 0.167062173706692, "grad_norm": 0.96875, "learning_rate": 1.9523257628748148e-05, "loss": 1.419, "step": 220 }, { "epoch": 0.1678215472235406, "grad_norm": 0.8515625, "learning_rate": 1.9515343893395394e-05, "loss": 1.3665, "step": 221 }, { "epoch": 0.1685809207403892, "grad_norm": 0.7734375, "learning_rate": 1.9507366645888544e-05, "loss": 1.3448, "step": 222 }, { "epoch": 0.1693402942572378, "grad_norm": 0.7578125, "learning_rate": 1.9499325939473403e-05, "loss": 1.3186, "step": 223 }, { "epoch": 0.1700996677740864, "grad_norm": 0.6875, "learning_rate": 1.9491221827819348e-05, "loss": 1.2722, "step": 224 }, { "epoch": 0.170859041290935, "grad_norm": 0.9375, "learning_rate": 1.948305436501897e-05, "loss": 1.4339, "step": 225 }, { "epoch": 0.1716184148077836, "grad_norm": 0.796875, "learning_rate": 1.9474823605587705e-05, "loss": 1.3838, "step": 226 }, { "epoch": 0.1723777883246322, "grad_norm": 0.98828125, "learning_rate": 1.9466529604463484e-05, "loss": 1.4411, "step": 227 }, { "epoch": 0.17313716184148079, "grad_norm": 0.78515625, "learning_rate": 1.9458172417006347e-05, "loss": 1.3107, "step": 228 }, { "epoch": 0.17389653535832938, "grad_norm": 0.86328125, "learning_rate": 1.9449752098998097e-05, "loss": 1.4422, "step": 229 }, { "epoch": 0.17465590887517798, "grad_norm": 0.80078125, "learning_rate": 1.9441268706641907e-05, "loss": 1.3728, "step": 230 }, { "epoch": 0.17541528239202658, "grad_norm": 0.9453125, "learning_rate": 1.9432722296561954e-05, "loss": 1.4489, "step": 231 }, { "epoch": 0.17617465590887518, "grad_norm": 0.78125, "learning_rate": 1.942411292580304e-05, "loss": 1.3594, "step": 232 }, { "epoch": 0.17693402942572378, "grad_norm": 0.7421875, "learning_rate": 1.941544065183021e-05, "loss": 1.3176, "step": 233 }, { "epoch": 0.17769340294257238, "grad_norm": 0.71875, "learning_rate": 1.9406705532528373e-05, "loss": 1.3331, "step": 234 }, { "epoch": 0.17845277645942098, "grad_norm": 0.73828125, "learning_rate": 1.9397907626201915e-05, "loss": 1.3217, "step": 235 }, { "epoch": 0.17921214997626958, "grad_norm": 0.7578125, "learning_rate": 1.9389046991574298e-05, "loss": 1.3825, "step": 236 }, { "epoch": 0.17997152349311818, "grad_norm": 0.78515625, "learning_rate": 1.938012368778768e-05, "loss": 1.3604, "step": 237 }, { "epoch": 0.18073089700996678, "grad_norm": 0.6875, "learning_rate": 1.9371137774402528e-05, "loss": 1.3345, "step": 238 }, { "epoch": 0.18149027052681538, "grad_norm": 0.87890625, "learning_rate": 1.9362089311397194e-05, "loss": 1.417, "step": 239 }, { "epoch": 0.18224964404366398, "grad_norm": 0.63671875, "learning_rate": 1.935297835916754e-05, "loss": 1.2646, "step": 240 }, { "epoch": 0.18300901756051258, "grad_norm": 0.67578125, "learning_rate": 1.9343804978526525e-05, "loss": 1.3089, "step": 241 }, { "epoch": 0.18376839107736118, "grad_norm": 0.6328125, "learning_rate": 1.9334569230703794e-05, "loss": 1.2812, "step": 242 }, { "epoch": 0.18452776459420978, "grad_norm": 0.76171875, "learning_rate": 1.9325271177345284e-05, "loss": 1.3355, "step": 243 }, { "epoch": 0.18528713811105837, "grad_norm": 0.6484375, "learning_rate": 1.9315910880512792e-05, "loss": 1.3089, "step": 244 }, { "epoch": 0.18604651162790697, "grad_norm": 0.703125, "learning_rate": 1.9306488402683582e-05, "loss": 1.3573, "step": 245 }, { "epoch": 0.18680588514475557, "grad_norm": 0.62890625, "learning_rate": 1.929700380674995e-05, "loss": 1.2955, "step": 246 }, { "epoch": 0.18756525866160417, "grad_norm": 0.6015625, "learning_rate": 1.9287457156018824e-05, "loss": 1.2819, "step": 247 }, { "epoch": 0.18832463217845277, "grad_norm": 0.60546875, "learning_rate": 1.927784851421132e-05, "loss": 1.2677, "step": 248 }, { "epoch": 0.18908400569530137, "grad_norm": 0.75, "learning_rate": 1.926817794546232e-05, "loss": 1.3524, "step": 249 }, { "epoch": 0.18984337921214997, "grad_norm": 0.859375, "learning_rate": 1.9258445514320064e-05, "loss": 1.4673, "step": 250 }, { "epoch": 0.19060275272899857, "grad_norm": 0.70703125, "learning_rate": 1.9248651285745708e-05, "loss": 1.3484, "step": 251 }, { "epoch": 0.19136212624584717, "grad_norm": 0.7109375, "learning_rate": 1.9238795325112867e-05, "loss": 1.3565, "step": 252 }, { "epoch": 0.19212149976269577, "grad_norm": 0.625, "learning_rate": 1.9228877698207227e-05, "loss": 1.3004, "step": 253 }, { "epoch": 0.19288087327954437, "grad_norm": 0.66796875, "learning_rate": 1.921889847122605e-05, "loss": 1.3457, "step": 254 }, { "epoch": 0.19364024679639297, "grad_norm": 0.69921875, "learning_rate": 1.9208857710777785e-05, "loss": 1.314, "step": 255 }, { "epoch": 0.19439962031324157, "grad_norm": 0.8046875, "learning_rate": 1.9198755483881585e-05, "loss": 1.4202, "step": 256 }, { "epoch": 0.19515899383009017, "grad_norm": 0.59375, "learning_rate": 1.9188591857966875e-05, "loss": 1.3255, "step": 257 }, { "epoch": 0.19591836734693877, "grad_norm": 0.828125, "learning_rate": 1.917836690087291e-05, "loss": 1.4397, "step": 258 }, { "epoch": 0.19667774086378736, "grad_norm": 0.640625, "learning_rate": 1.91680806808483e-05, "loss": 1.3296, "step": 259 }, { "epoch": 0.19743711438063596, "grad_norm": 0.5859375, "learning_rate": 1.9157733266550577e-05, "loss": 1.2916, "step": 260 }, { "epoch": 0.19819648789748456, "grad_norm": 0.64453125, "learning_rate": 1.914732472704572e-05, "loss": 1.3308, "step": 261 }, { "epoch": 0.19895586141433316, "grad_norm": 0.6484375, "learning_rate": 1.9136855131807705e-05, "loss": 1.3426, "step": 262 }, { "epoch": 0.19971523493118176, "grad_norm": 0.5390625, "learning_rate": 1.9126324550718036e-05, "loss": 1.2745, "step": 263 }, { "epoch": 0.20047460844803036, "grad_norm": 0.6015625, "learning_rate": 1.911573305406528e-05, "loss": 1.3073, "step": 264 }, { "epoch": 0.201233981964879, "grad_norm": 0.578125, "learning_rate": 1.9105080712544603e-05, "loss": 1.2674, "step": 265 }, { "epoch": 0.2019933554817276, "grad_norm": 0.53515625, "learning_rate": 1.909436759725728e-05, "loss": 1.3087, "step": 266 }, { "epoch": 0.2027527289985762, "grad_norm": 0.56640625, "learning_rate": 1.908359377971025e-05, "loss": 1.284, "step": 267 }, { "epoch": 0.2035121025154248, "grad_norm": 0.5625, "learning_rate": 1.9072759331815602e-05, "loss": 1.2451, "step": 268 }, { "epoch": 0.20427147603227339, "grad_norm": 0.5703125, "learning_rate": 1.9061864325890132e-05, "loss": 1.2624, "step": 269 }, { "epoch": 0.20503084954912199, "grad_norm": 0.515625, "learning_rate": 1.9050908834654834e-05, "loss": 1.2392, "step": 270 }, { "epoch": 0.20579022306597058, "grad_norm": 0.546875, "learning_rate": 1.9039892931234434e-05, "loss": 1.2405, "step": 271 }, { "epoch": 0.20654959658281918, "grad_norm": 0.5625, "learning_rate": 1.902881668915688e-05, "loss": 1.2509, "step": 272 }, { "epoch": 0.20730897009966778, "grad_norm": 0.5625, "learning_rate": 1.9017680182352866e-05, "loss": 1.3047, "step": 273 }, { "epoch": 0.20806834361651638, "grad_norm": 0.68359375, "learning_rate": 1.9006483485155338e-05, "loss": 1.3492, "step": 274 }, { "epoch": 0.20882771713336498, "grad_norm": 0.54296875, "learning_rate": 1.8995226672298993e-05, "loss": 1.2451, "step": 275 }, { "epoch": 0.20958709065021358, "grad_norm": 0.62890625, "learning_rate": 1.898390981891979e-05, "loss": 1.3577, "step": 276 }, { "epoch": 0.21034646416706218, "grad_norm": 0.58984375, "learning_rate": 1.897253300055443e-05, "loss": 1.3152, "step": 277 }, { "epoch": 0.21110583768391078, "grad_norm": 0.58203125, "learning_rate": 1.896109629313987e-05, "loss": 1.3153, "step": 278 }, { "epoch": 0.21186521120075938, "grad_norm": 0.60546875, "learning_rate": 1.8949599773012808e-05, "loss": 1.3153, "step": 279 }, { "epoch": 0.21262458471760798, "grad_norm": 0.578125, "learning_rate": 1.8938043516909173e-05, "loss": 1.2932, "step": 280 }, { "epoch": 0.21338395823445658, "grad_norm": 0.50390625, "learning_rate": 1.892642760196361e-05, "loss": 1.2294, "step": 281 }, { "epoch": 0.21414333175130518, "grad_norm": 0.64453125, "learning_rate": 1.891475210570898e-05, "loss": 1.3246, "step": 282 }, { "epoch": 0.21490270526815378, "grad_norm": 0.51953125, "learning_rate": 1.890301710607582e-05, "loss": 1.2312, "step": 283 }, { "epoch": 0.21566207878500238, "grad_norm": 0.4609375, "learning_rate": 1.8891222681391853e-05, "loss": 1.2243, "step": 284 }, { "epoch": 0.21642145230185098, "grad_norm": 0.5234375, "learning_rate": 1.8879368910381423e-05, "loss": 1.2593, "step": 285 }, { "epoch": 0.21718082581869957, "grad_norm": 0.640625, "learning_rate": 1.8867455872165006e-05, "loss": 1.3375, "step": 286 }, { "epoch": 0.21794019933554817, "grad_norm": 0.51171875, "learning_rate": 1.8855483646258677e-05, "loss": 1.2492, "step": 287 }, { "epoch": 0.21869957285239677, "grad_norm": 0.61328125, "learning_rate": 1.8843452312573557e-05, "loss": 1.3306, "step": 288 }, { "epoch": 0.21945894636924537, "grad_norm": 0.5546875, "learning_rate": 1.8831361951415298e-05, "loss": 1.2743, "step": 289 }, { "epoch": 0.22021831988609397, "grad_norm": 0.6875, "learning_rate": 1.881921264348355e-05, "loss": 1.3699, "step": 290 }, { "epoch": 0.22097769340294257, "grad_norm": 0.63671875, "learning_rate": 1.880700446987141e-05, "loss": 1.3548, "step": 291 }, { "epoch": 0.22173706691979117, "grad_norm": 0.671875, "learning_rate": 1.879473751206489e-05, "loss": 1.3974, "step": 292 }, { "epoch": 0.22249644043663977, "grad_norm": 0.5625, "learning_rate": 1.8782411851942365e-05, "loss": 1.29, "step": 293 }, { "epoch": 0.22325581395348837, "grad_norm": 0.53125, "learning_rate": 1.877002757177403e-05, "loss": 1.2906, "step": 294 }, { "epoch": 0.22401518747033697, "grad_norm": 0.462890625, "learning_rate": 1.8757584754221363e-05, "loss": 1.2135, "step": 295 }, { "epoch": 0.22477456098718557, "grad_norm": 0.52734375, "learning_rate": 1.8745083482336547e-05, "loss": 1.3045, "step": 296 }, { "epoch": 0.22553393450403417, "grad_norm": 0.5703125, "learning_rate": 1.8732523839561934e-05, "loss": 1.2641, "step": 297 }, { "epoch": 0.22629330802088277, "grad_norm": 0.4921875, "learning_rate": 1.8719905909729493e-05, "loss": 1.2492, "step": 298 }, { "epoch": 0.22705268153773137, "grad_norm": 0.5078125, "learning_rate": 1.8707229777060242e-05, "loss": 1.2867, "step": 299 }, { "epoch": 0.22781205505457996, "grad_norm": 0.5390625, "learning_rate": 1.869449552616367e-05, "loss": 1.2946, "step": 300 }, { "epoch": 0.22857142857142856, "grad_norm": 0.53125, "learning_rate": 1.8681703242037208e-05, "loss": 1.3014, "step": 301 }, { "epoch": 0.22933080208827716, "grad_norm": 0.470703125, "learning_rate": 1.8668853010065633e-05, "loss": 1.2937, "step": 302 }, { "epoch": 0.23009017560512576, "grad_norm": 0.57421875, "learning_rate": 1.86559449160205e-05, "loss": 1.2866, "step": 303 }, { "epoch": 0.23084954912197436, "grad_norm": 0.51953125, "learning_rate": 1.8642979046059595e-05, "loss": 1.2542, "step": 304 }, { "epoch": 0.23160892263882296, "grad_norm": 0.5078125, "learning_rate": 1.8629955486726324e-05, "loss": 1.2718, "step": 305 }, { "epoch": 0.23236829615567156, "grad_norm": 0.5234375, "learning_rate": 1.861687432494916e-05, "loss": 1.2645, "step": 306 }, { "epoch": 0.23312766967252016, "grad_norm": 0.54296875, "learning_rate": 1.8603735648041054e-05, "loss": 1.2895, "step": 307 }, { "epoch": 0.23388704318936876, "grad_norm": 0.578125, "learning_rate": 1.8590539543698852e-05, "loss": 1.322, "step": 308 }, { "epoch": 0.23464641670621736, "grad_norm": 0.49609375, "learning_rate": 1.8577286100002723e-05, "loss": 1.2584, "step": 309 }, { "epoch": 0.23540579022306596, "grad_norm": 0.52734375, "learning_rate": 1.856397540541554e-05, "loss": 1.2814, "step": 310 }, { "epoch": 0.23616516373991456, "grad_norm": 0.52734375, "learning_rate": 1.855060754878233e-05, "loss": 1.2865, "step": 311 }, { "epoch": 0.23692453725676316, "grad_norm": 0.466796875, "learning_rate": 1.853718261932964e-05, "loss": 1.2597, "step": 312 }, { "epoch": 0.23768391077361178, "grad_norm": 0.5078125, "learning_rate": 1.852370070666498e-05, "loss": 1.2556, "step": 313 }, { "epoch": 0.23844328429046038, "grad_norm": 0.55078125, "learning_rate": 1.8510161900776186e-05, "loss": 1.304, "step": 314 }, { "epoch": 0.23920265780730898, "grad_norm": 0.439453125, "learning_rate": 1.8496566292030864e-05, "loss": 1.2148, "step": 315 }, { "epoch": 0.23996203132415758, "grad_norm": 0.515625, "learning_rate": 1.8482913971175737e-05, "loss": 1.2887, "step": 316 }, { "epoch": 0.24072140484100618, "grad_norm": 0.57421875, "learning_rate": 1.846920502933609e-05, "loss": 1.3276, "step": 317 }, { "epoch": 0.24148077835785478, "grad_norm": 0.474609375, "learning_rate": 1.8455439558015117e-05, "loss": 1.2681, "step": 318 }, { "epoch": 0.24224015187470338, "grad_norm": 0.55078125, "learning_rate": 1.8441617649093334e-05, "loss": 1.2898, "step": 319 }, { "epoch": 0.24299952539155198, "grad_norm": 0.5234375, "learning_rate": 1.8427739394827976e-05, "loss": 1.2785, "step": 320 }, { "epoch": 0.24375889890840058, "grad_norm": 0.4765625, "learning_rate": 1.8413804887852343e-05, "loss": 1.1799, "step": 321 }, { "epoch": 0.24451827242524918, "grad_norm": 0.45703125, "learning_rate": 1.839981422117523e-05, "loss": 1.1951, "step": 322 }, { "epoch": 0.24527764594209778, "grad_norm": 0.546875, "learning_rate": 1.8385767488180255e-05, "loss": 1.3233, "step": 323 }, { "epoch": 0.24603701945894638, "grad_norm": 0.451171875, "learning_rate": 1.8371664782625287e-05, "loss": 1.2204, "step": 324 }, { "epoch": 0.24679639297579498, "grad_norm": 0.478515625, "learning_rate": 1.8357506198641784e-05, "loss": 1.2763, "step": 325 }, { "epoch": 0.24755576649264358, "grad_norm": 0.578125, "learning_rate": 1.8343291830734176e-05, "loss": 1.3397, "step": 326 }, { "epoch": 0.24831514000949217, "grad_norm": 0.52734375, "learning_rate": 1.8329021773779242e-05, "loss": 1.3029, "step": 327 }, { "epoch": 0.24907451352634077, "grad_norm": 0.54296875, "learning_rate": 1.8314696123025456e-05, "loss": 1.2977, "step": 328 }, { "epoch": 0.24983388704318937, "grad_norm": 0.5078125, "learning_rate": 1.8300314974092372e-05, "loss": 1.2915, "step": 329 }, { "epoch": 0.25059326056003794, "grad_norm": 0.478515625, "learning_rate": 1.8285878422969982e-05, "loss": 1.2278, "step": 330 }, { "epoch": 0.25135263407688657, "grad_norm": 0.421875, "learning_rate": 1.827138656601807e-05, "loss": 1.2337, "step": 331 }, { "epoch": 0.25211200759373514, "grad_norm": 0.52734375, "learning_rate": 1.825683949996556e-05, "loss": 1.2978, "step": 332 }, { "epoch": 0.25287138111058377, "grad_norm": 0.5390625, "learning_rate": 1.8242237321909895e-05, "loss": 1.2512, "step": 333 }, { "epoch": 0.25363075462743234, "grad_norm": 0.49609375, "learning_rate": 1.8227580129316368e-05, "loss": 1.2702, "step": 334 }, { "epoch": 0.25439012814428097, "grad_norm": 0.4609375, "learning_rate": 1.821286802001747e-05, "loss": 1.2253, "step": 335 }, { "epoch": 0.25514950166112954, "grad_norm": 0.5390625, "learning_rate": 1.819810109221227e-05, "loss": 1.2708, "step": 336 }, { "epoch": 0.25590887517797817, "grad_norm": 0.44140625, "learning_rate": 1.81832794444657e-05, "loss": 1.2157, "step": 337 }, { "epoch": 0.2566682486948268, "grad_norm": 0.5703125, "learning_rate": 1.8168403175707958e-05, "loss": 1.3529, "step": 338 }, { "epoch": 0.25742762221167537, "grad_norm": 0.390625, "learning_rate": 1.815347238523381e-05, "loss": 1.1796, "step": 339 }, { "epoch": 0.258186995728524, "grad_norm": 0.466796875, "learning_rate": 1.813848717270195e-05, "loss": 1.2568, "step": 340 }, { "epoch": 0.25894636924537257, "grad_norm": 0.490234375, "learning_rate": 1.812344763813431e-05, "loss": 1.2732, "step": 341 }, { "epoch": 0.2597057427622212, "grad_norm": 0.44140625, "learning_rate": 1.8108353881915403e-05, "loss": 1.2737, "step": 342 }, { "epoch": 0.26046511627906976, "grad_norm": 0.45703125, "learning_rate": 1.8093206004791673e-05, "loss": 1.2281, "step": 343 }, { "epoch": 0.2612244897959184, "grad_norm": 0.546875, "learning_rate": 1.8078004107870797e-05, "loss": 1.3148, "step": 344 }, { "epoch": 0.26198386331276696, "grad_norm": 0.44921875, "learning_rate": 1.806274829262101e-05, "loss": 1.2584, "step": 345 }, { "epoch": 0.2627432368296156, "grad_norm": 0.5078125, "learning_rate": 1.8047438660870447e-05, "loss": 1.2665, "step": 346 }, { "epoch": 0.26350261034646416, "grad_norm": 0.51171875, "learning_rate": 1.803207531480645e-05, "loss": 1.2892, "step": 347 }, { "epoch": 0.2642619838633128, "grad_norm": 0.51953125, "learning_rate": 1.8016658356974885e-05, "loss": 1.2782, "step": 348 }, { "epoch": 0.26502135738016136, "grad_norm": 0.46484375, "learning_rate": 1.800118789027947e-05, "loss": 1.2857, "step": 349 }, { "epoch": 0.26578073089701, "grad_norm": 0.455078125, "learning_rate": 1.798566401798106e-05, "loss": 1.2529, "step": 350 }, { "epoch": 0.26654010441385856, "grad_norm": 0.466796875, "learning_rate": 1.7970086843697e-05, "loss": 1.2445, "step": 351 }, { "epoch": 0.2672994779307072, "grad_norm": 0.439453125, "learning_rate": 1.7954456471400393e-05, "loss": 1.2143, "step": 352 }, { "epoch": 0.26805885144755576, "grad_norm": 0.421875, "learning_rate": 1.793877300541944e-05, "loss": 1.2444, "step": 353 }, { "epoch": 0.2688182249644044, "grad_norm": 0.474609375, "learning_rate": 1.7923036550436706e-05, "loss": 1.2674, "step": 354 }, { "epoch": 0.26957759848125296, "grad_norm": 0.5, "learning_rate": 1.7907247211488456e-05, "loss": 1.2926, "step": 355 }, { "epoch": 0.2703369719981016, "grad_norm": 0.439453125, "learning_rate": 1.789140509396394e-05, "loss": 1.2125, "step": 356 }, { "epoch": 0.27109634551495015, "grad_norm": 0.443359375, "learning_rate": 1.7875510303604678e-05, "loss": 1.1936, "step": 357 }, { "epoch": 0.2718557190317988, "grad_norm": 0.462890625, "learning_rate": 1.7859562946503787e-05, "loss": 1.2251, "step": 358 }, { "epoch": 0.27261509254864735, "grad_norm": 0.470703125, "learning_rate": 1.784356312910523e-05, "loss": 1.2829, "step": 359 }, { "epoch": 0.273374466065496, "grad_norm": 0.44140625, "learning_rate": 1.7827510958203147e-05, "loss": 1.2277, "step": 360 }, { "epoch": 0.27413383958234455, "grad_norm": 0.486328125, "learning_rate": 1.78114065409411e-05, "loss": 1.2715, "step": 361 }, { "epoch": 0.2748932130991932, "grad_norm": 0.47265625, "learning_rate": 1.7795249984811397e-05, "loss": 1.2467, "step": 362 }, { "epoch": 0.27565258661604175, "grad_norm": 0.455078125, "learning_rate": 1.7779041397654355e-05, "loss": 1.2529, "step": 363 }, { "epoch": 0.2764119601328904, "grad_norm": 0.5, "learning_rate": 1.7762780887657576e-05, "loss": 1.2749, "step": 364 }, { "epoch": 0.27717133364973895, "grad_norm": 0.40234375, "learning_rate": 1.7746468563355243e-05, "loss": 1.1978, "step": 365 }, { "epoch": 0.2779307071665876, "grad_norm": 0.451171875, "learning_rate": 1.773010453362737e-05, "loss": 1.244, "step": 366 }, { "epoch": 0.27869008068343615, "grad_norm": 0.53515625, "learning_rate": 1.7713688907699107e-05, "loss": 1.3013, "step": 367 }, { "epoch": 0.2794494542002848, "grad_norm": 0.482421875, "learning_rate": 1.769722179513998e-05, "loss": 1.2608, "step": 368 }, { "epoch": 0.28020882771713335, "grad_norm": 0.412109375, "learning_rate": 1.7680703305863177e-05, "loss": 1.1853, "step": 369 }, { "epoch": 0.280968201233982, "grad_norm": 0.44140625, "learning_rate": 1.7664133550124815e-05, "loss": 1.2565, "step": 370 }, { "epoch": 0.28172757475083055, "grad_norm": 0.41796875, "learning_rate": 1.7647512638523193e-05, "loss": 1.1891, "step": 371 }, { "epoch": 0.28248694826767917, "grad_norm": 0.4375, "learning_rate": 1.7630840681998068e-05, "loss": 1.231, "step": 372 }, { "epoch": 0.28324632178452774, "grad_norm": 0.490234375, "learning_rate": 1.7614117791829897e-05, "loss": 1.2935, "step": 373 }, { "epoch": 0.28400569530137637, "grad_norm": 0.52734375, "learning_rate": 1.759734407963911e-05, "loss": 1.2953, "step": 374 }, { "epoch": 0.28476506881822494, "grad_norm": 0.482421875, "learning_rate": 1.7580519657385368e-05, "loss": 1.2782, "step": 375 }, { "epoch": 0.28552444233507357, "grad_norm": 0.55859375, "learning_rate": 1.7563644637366786e-05, "loss": 1.333, "step": 376 }, { "epoch": 0.28628381585192214, "grad_norm": 0.515625, "learning_rate": 1.754671913221923e-05, "loss": 1.2813, "step": 377 }, { "epoch": 0.28704318936877077, "grad_norm": 0.498046875, "learning_rate": 1.752974325491551e-05, "loss": 1.2581, "step": 378 }, { "epoch": 0.28780256288561934, "grad_norm": 0.400390625, "learning_rate": 1.7512717118764687e-05, "loss": 1.2302, "step": 379 }, { "epoch": 0.28856193640246797, "grad_norm": 0.466796875, "learning_rate": 1.7495640837411265e-05, "loss": 1.2359, "step": 380 }, { "epoch": 0.28932130991931654, "grad_norm": 0.51953125, "learning_rate": 1.747851452483445e-05, "loss": 1.2548, "step": 381 }, { "epoch": 0.29008068343616517, "grad_norm": 0.47265625, "learning_rate": 1.7461338295347404e-05, "loss": 1.2752, "step": 382 }, { "epoch": 0.29084005695301374, "grad_norm": 0.392578125, "learning_rate": 1.7444112263596474e-05, "loss": 1.2092, "step": 383 }, { "epoch": 0.29159943046986236, "grad_norm": 0.51171875, "learning_rate": 1.74268365445604e-05, "loss": 1.3045, "step": 384 }, { "epoch": 0.292358803986711, "grad_norm": 0.46484375, "learning_rate": 1.7409511253549592e-05, "loss": 1.2586, "step": 385 }, { "epoch": 0.29311817750355956, "grad_norm": 0.439453125, "learning_rate": 1.7392136506205332e-05, "loss": 1.1966, "step": 386 }, { "epoch": 0.2938775510204082, "grad_norm": 0.408203125, "learning_rate": 1.7374712418498997e-05, "loss": 1.1853, "step": 387 }, { "epoch": 0.29463692453725676, "grad_norm": 0.439453125, "learning_rate": 1.735723910673132e-05, "loss": 1.2408, "step": 388 }, { "epoch": 0.2953962980541054, "grad_norm": 0.412109375, "learning_rate": 1.7339716687531564e-05, "loss": 1.163, "step": 389 }, { "epoch": 0.29615567157095396, "grad_norm": 0.50390625, "learning_rate": 1.7322145277856793e-05, "loss": 1.2941, "step": 390 }, { "epoch": 0.2969150450878026, "grad_norm": 0.419921875, "learning_rate": 1.7304524994991056e-05, "loss": 1.2504, "step": 391 }, { "epoch": 0.29767441860465116, "grad_norm": 0.470703125, "learning_rate": 1.7286855956544616e-05, "loss": 1.2842, "step": 392 }, { "epoch": 0.2984337921214998, "grad_norm": 0.41796875, "learning_rate": 1.726913828045317e-05, "loss": 1.2403, "step": 393 }, { "epoch": 0.29919316563834836, "grad_norm": 0.498046875, "learning_rate": 1.725137208497705e-05, "loss": 1.254, "step": 394 }, { "epoch": 0.299952539155197, "grad_norm": 0.4609375, "learning_rate": 1.7233557488700453e-05, "loss": 1.2395, "step": 395 }, { "epoch": 0.30071191267204556, "grad_norm": 0.53125, "learning_rate": 1.7215694610530624e-05, "loss": 1.2705, "step": 396 }, { "epoch": 0.3014712861888942, "grad_norm": 0.416015625, "learning_rate": 1.7197783569697084e-05, "loss": 1.2212, "step": 397 }, { "epoch": 0.30223065970574275, "grad_norm": 0.51171875, "learning_rate": 1.7179824485750824e-05, "loss": 1.2975, "step": 398 }, { "epoch": 0.3029900332225914, "grad_norm": 0.43359375, "learning_rate": 1.7161817478563504e-05, "loss": 1.2402, "step": 399 }, { "epoch": 0.30374940673943995, "grad_norm": 0.423828125, "learning_rate": 1.7143762668326667e-05, "loss": 1.2287, "step": 400 }, { "epoch": 0.3045087802562886, "grad_norm": 0.451171875, "learning_rate": 1.712566017555092e-05, "loss": 1.2097, "step": 401 }, { "epoch": 0.30526815377313715, "grad_norm": 0.51953125, "learning_rate": 1.7107510121065138e-05, "loss": 1.3114, "step": 402 }, { "epoch": 0.3060275272899858, "grad_norm": 0.423828125, "learning_rate": 1.7089312626015663e-05, "loss": 1.2468, "step": 403 }, { "epoch": 0.30678690080683435, "grad_norm": 0.384765625, "learning_rate": 1.7071067811865477e-05, "loss": 1.1837, "step": 404 }, { "epoch": 0.307546274323683, "grad_norm": 0.470703125, "learning_rate": 1.7052775800393415e-05, "loss": 1.238, "step": 405 }, { "epoch": 0.30830564784053155, "grad_norm": 0.39453125, "learning_rate": 1.703443671369333e-05, "loss": 1.217, "step": 406 }, { "epoch": 0.3090650213573802, "grad_norm": 0.384765625, "learning_rate": 1.7016050674173304e-05, "loss": 1.2202, "step": 407 }, { "epoch": 0.30982439487422875, "grad_norm": 0.45703125, "learning_rate": 1.69976178045548e-05, "loss": 1.2238, "step": 408 }, { "epoch": 0.3105837683910774, "grad_norm": 0.435546875, "learning_rate": 1.6979138227871858e-05, "loss": 1.2318, "step": 409 }, { "epoch": 0.31134314190792595, "grad_norm": 0.453125, "learning_rate": 1.696061206747029e-05, "loss": 1.2208, "step": 410 }, { "epoch": 0.3121025154247746, "grad_norm": 0.453125, "learning_rate": 1.6942039447006823e-05, "loss": 1.2223, "step": 411 }, { "epoch": 0.31286188894162315, "grad_norm": 0.33984375, "learning_rate": 1.6923420490448298e-05, "loss": 1.1626, "step": 412 }, { "epoch": 0.3136212624584718, "grad_norm": 0.5625, "learning_rate": 1.6904755322070846e-05, "loss": 1.2768, "step": 413 }, { "epoch": 0.31438063597532034, "grad_norm": 0.462890625, "learning_rate": 1.688604406645903e-05, "loss": 1.2694, "step": 414 }, { "epoch": 0.31514000949216897, "grad_norm": 0.39453125, "learning_rate": 1.686728684850505e-05, "loss": 1.1856, "step": 415 }, { "epoch": 0.31589938300901754, "grad_norm": 0.380859375, "learning_rate": 1.6848483793407874e-05, "loss": 1.2184, "step": 416 }, { "epoch": 0.31665875652586617, "grad_norm": 0.361328125, "learning_rate": 1.6829635026672432e-05, "loss": 1.1899, "step": 417 }, { "epoch": 0.31741813004271474, "grad_norm": 0.44921875, "learning_rate": 1.6810740674108763e-05, "loss": 1.2078, "step": 418 }, { "epoch": 0.31817750355956337, "grad_norm": 0.46484375, "learning_rate": 1.6791800861831176e-05, "loss": 1.2226, "step": 419 }, { "epoch": 0.31893687707641194, "grad_norm": 0.404296875, "learning_rate": 1.6772815716257414e-05, "loss": 1.2044, "step": 420 }, { "epoch": 0.31969625059326057, "grad_norm": 0.44921875, "learning_rate": 1.6753785364107796e-05, "loss": 1.2699, "step": 421 }, { "epoch": 0.32045562411010914, "grad_norm": 0.37109375, "learning_rate": 1.6734709932404404e-05, "loss": 1.1732, "step": 422 }, { "epoch": 0.32121499762695777, "grad_norm": 0.48046875, "learning_rate": 1.6715589548470187e-05, "loss": 1.2655, "step": 423 }, { "epoch": 0.32197437114380634, "grad_norm": 0.40625, "learning_rate": 1.6696424339928153e-05, "loss": 1.2044, "step": 424 }, { "epoch": 0.32273374466065496, "grad_norm": 0.427734375, "learning_rate": 1.6677214434700495e-05, "loss": 1.2083, "step": 425 }, { "epoch": 0.32349311817750354, "grad_norm": 0.4453125, "learning_rate": 1.665795996100775e-05, "loss": 1.2273, "step": 426 }, { "epoch": 0.32425249169435216, "grad_norm": 0.5, "learning_rate": 1.663866104736793e-05, "loss": 1.2407, "step": 427 }, { "epoch": 0.32501186521120073, "grad_norm": 0.390625, "learning_rate": 1.6619317822595666e-05, "loss": 1.2166, "step": 428 }, { "epoch": 0.32577123872804936, "grad_norm": 0.42578125, "learning_rate": 1.6599930415801374e-05, "loss": 1.238, "step": 429 }, { "epoch": 0.32653061224489793, "grad_norm": 0.390625, "learning_rate": 1.658049895639034e-05, "loss": 1.1813, "step": 430 }, { "epoch": 0.32728998576174656, "grad_norm": 0.423828125, "learning_rate": 1.6561023574061925e-05, "loss": 1.2264, "step": 431 }, { "epoch": 0.32804935927859513, "grad_norm": 0.40625, "learning_rate": 1.6541504398808633e-05, "loss": 1.2364, "step": 432 }, { "epoch": 0.32880873279544376, "grad_norm": 0.44921875, "learning_rate": 1.6521941560915284e-05, "loss": 1.2339, "step": 433 }, { "epoch": 0.3295681063122924, "grad_norm": 0.4765625, "learning_rate": 1.6502335190958135e-05, "loss": 1.2952, "step": 434 }, { "epoch": 0.33032747982914096, "grad_norm": 0.380859375, "learning_rate": 1.648268541980401e-05, "loss": 1.195, "step": 435 }, { "epoch": 0.3310868533459896, "grad_norm": 0.490234375, "learning_rate": 1.646299237860941e-05, "loss": 1.2866, "step": 436 }, { "epoch": 0.33184622686283816, "grad_norm": 0.392578125, "learning_rate": 1.6443256198819665e-05, "loss": 1.2219, "step": 437 }, { "epoch": 0.3326056003796868, "grad_norm": 0.427734375, "learning_rate": 1.6423477012168038e-05, "loss": 1.2458, "step": 438 }, { "epoch": 0.33336497389653535, "grad_norm": 0.384765625, "learning_rate": 1.640365495067485e-05, "loss": 1.21, "step": 439 }, { "epoch": 0.334124347413384, "grad_norm": 0.416015625, "learning_rate": 1.638379014664659e-05, "loss": 1.2286, "step": 440 }, { "epoch": 0.33488372093023255, "grad_norm": 0.40625, "learning_rate": 1.636388273267506e-05, "loss": 1.1945, "step": 441 }, { "epoch": 0.3356430944470812, "grad_norm": 0.5078125, "learning_rate": 1.6343932841636455e-05, "loss": 1.3204, "step": 442 }, { "epoch": 0.33640246796392975, "grad_norm": 0.423828125, "learning_rate": 1.63239406066905e-05, "loss": 1.2361, "step": 443 }, { "epoch": 0.3371618414807784, "grad_norm": 0.40234375, "learning_rate": 1.6303906161279554e-05, "loss": 1.1951, "step": 444 }, { "epoch": 0.33792121499762695, "grad_norm": 0.43359375, "learning_rate": 1.6283829639127705e-05, "loss": 1.2686, "step": 445 }, { "epoch": 0.3386805885144756, "grad_norm": 0.482421875, "learning_rate": 1.6263711174239914e-05, "loss": 1.264, "step": 446 }, { "epoch": 0.33943996203132415, "grad_norm": 0.4375, "learning_rate": 1.6243550900901076e-05, "loss": 1.2668, "step": 447 }, { "epoch": 0.3401993355481728, "grad_norm": 0.408203125, "learning_rate": 1.6223348953675163e-05, "loss": 1.1683, "step": 448 }, { "epoch": 0.34095870906502135, "grad_norm": 0.408203125, "learning_rate": 1.6203105467404284e-05, "loss": 1.2147, "step": 449 }, { "epoch": 0.34171808258187, "grad_norm": 0.400390625, "learning_rate": 1.6182820577207842e-05, "loss": 1.2178, "step": 450 }, { "epoch": 0.34247745609871855, "grad_norm": 0.408203125, "learning_rate": 1.6162494418481574e-05, "loss": 1.2321, "step": 451 }, { "epoch": 0.3432368296155672, "grad_norm": 0.447265625, "learning_rate": 1.6142127126896682e-05, "loss": 1.2495, "step": 452 }, { "epoch": 0.34399620313241575, "grad_norm": 0.38671875, "learning_rate": 1.612171883839891e-05, "loss": 1.1807, "step": 453 }, { "epoch": 0.3447555766492644, "grad_norm": 0.38671875, "learning_rate": 1.6101269689207656e-05, "loss": 1.1941, "step": 454 }, { "epoch": 0.34551495016611294, "grad_norm": 0.369140625, "learning_rate": 1.6080779815815043e-05, "loss": 1.2159, "step": 455 }, { "epoch": 0.34627432368296157, "grad_norm": 0.412109375, "learning_rate": 1.6060249354985023e-05, "loss": 1.222, "step": 456 }, { "epoch": 0.34703369719981014, "grad_norm": 0.44140625, "learning_rate": 1.603967844375245e-05, "loss": 1.2526, "step": 457 }, { "epoch": 0.34779307071665877, "grad_norm": 0.3671875, "learning_rate": 1.6019067219422178e-05, "loss": 1.1691, "step": 458 }, { "epoch": 0.34855244423350734, "grad_norm": 0.390625, "learning_rate": 1.5998415819568135e-05, "loss": 1.1933, "step": 459 }, { "epoch": 0.34931181775035597, "grad_norm": 0.50390625, "learning_rate": 1.597772438203241e-05, "loss": 1.2525, "step": 460 }, { "epoch": 0.35007119126720454, "grad_norm": 0.38671875, "learning_rate": 1.5956993044924334e-05, "loss": 1.2022, "step": 461 }, { "epoch": 0.35083056478405317, "grad_norm": 0.470703125, "learning_rate": 1.593622194661956e-05, "loss": 1.2853, "step": 462 }, { "epoch": 0.35158993830090174, "grad_norm": 0.466796875, "learning_rate": 1.5915411225759122e-05, "loss": 1.3113, "step": 463 }, { "epoch": 0.35234931181775037, "grad_norm": 0.462890625, "learning_rate": 1.5894561021248535e-05, "loss": 1.246, "step": 464 }, { "epoch": 0.35310868533459894, "grad_norm": 0.376953125, "learning_rate": 1.5873671472256854e-05, "loss": 1.1929, "step": 465 }, { "epoch": 0.35386805885144756, "grad_norm": 0.416015625, "learning_rate": 1.5852742718215743e-05, "loss": 1.2469, "step": 466 }, { "epoch": 0.35462743236829614, "grad_norm": 0.35546875, "learning_rate": 1.5831774898818558e-05, "loss": 1.1592, "step": 467 }, { "epoch": 0.35538680588514476, "grad_norm": 0.40625, "learning_rate": 1.5810768154019386e-05, "loss": 1.2145, "step": 468 }, { "epoch": 0.35614617940199333, "grad_norm": 0.400390625, "learning_rate": 1.5789722624032143e-05, "loss": 1.1859, "step": 469 }, { "epoch": 0.35690555291884196, "grad_norm": 0.423828125, "learning_rate": 1.576863844932963e-05, "loss": 1.2184, "step": 470 }, { "epoch": 0.35766492643569053, "grad_norm": 0.435546875, "learning_rate": 1.5747515770642582e-05, "loss": 1.2126, "step": 471 }, { "epoch": 0.35842429995253916, "grad_norm": 0.443359375, "learning_rate": 1.5726354728958736e-05, "loss": 1.2569, "step": 472 }, { "epoch": 0.35918367346938773, "grad_norm": 0.39453125, "learning_rate": 1.570515546552189e-05, "loss": 1.2173, "step": 473 }, { "epoch": 0.35994304698623636, "grad_norm": 0.400390625, "learning_rate": 1.568391812183097e-05, "loss": 1.1995, "step": 474 }, { "epoch": 0.36070242050308493, "grad_norm": 0.40234375, "learning_rate": 1.566264283963907e-05, "loss": 1.238, "step": 475 }, { "epoch": 0.36146179401993356, "grad_norm": 0.400390625, "learning_rate": 1.5641329760952514e-05, "loss": 1.2179, "step": 476 }, { "epoch": 0.36222116753678213, "grad_norm": 0.41015625, "learning_rate": 1.5619979028029898e-05, "loss": 1.2148, "step": 477 }, { "epoch": 0.36298054105363076, "grad_norm": 0.3828125, "learning_rate": 1.5598590783381165e-05, "loss": 1.201, "step": 478 }, { "epoch": 0.36373991457047933, "grad_norm": 0.40234375, "learning_rate": 1.5577165169766627e-05, "loss": 1.2383, "step": 479 }, { "epoch": 0.36449928808732796, "grad_norm": 0.396484375, "learning_rate": 1.5555702330196024e-05, "loss": 1.2399, "step": 480 }, { "epoch": 0.3652586616041765, "grad_norm": 0.41015625, "learning_rate": 1.5534202407927574e-05, "loss": 1.2565, "step": 481 }, { "epoch": 0.36601803512102515, "grad_norm": 0.41796875, "learning_rate": 1.5512665546467008e-05, "loss": 1.2256, "step": 482 }, { "epoch": 0.3667774086378738, "grad_norm": 0.38671875, "learning_rate": 1.549109188956661e-05, "loss": 1.1796, "step": 483 }, { "epoch": 0.36753678215472235, "grad_norm": 0.404296875, "learning_rate": 1.5469481581224274e-05, "loss": 1.2004, "step": 484 }, { "epoch": 0.368296155671571, "grad_norm": 0.369140625, "learning_rate": 1.5447834765682515e-05, "loss": 1.1787, "step": 485 }, { "epoch": 0.36905552918841955, "grad_norm": 0.337890625, "learning_rate": 1.5426151587427548e-05, "loss": 1.1656, "step": 486 }, { "epoch": 0.3698149027052682, "grad_norm": 0.376953125, "learning_rate": 1.540443219118827e-05, "loss": 1.1887, "step": 487 }, { "epoch": 0.37057427622211675, "grad_norm": 0.427734375, "learning_rate": 1.5382676721935344e-05, "loss": 1.2309, "step": 488 }, { "epoch": 0.3713336497389654, "grad_norm": 0.365234375, "learning_rate": 1.5360885324880205e-05, "loss": 1.1869, "step": 489 }, { "epoch": 0.37209302325581395, "grad_norm": 0.4296875, "learning_rate": 1.5339058145474086e-05, "loss": 1.2477, "step": 490 }, { "epoch": 0.3728523967726626, "grad_norm": 0.412109375, "learning_rate": 1.5317195329407067e-05, "loss": 1.2257, "step": 491 }, { "epoch": 0.37361177028951115, "grad_norm": 0.4140625, "learning_rate": 1.529529702260709e-05, "loss": 1.2565, "step": 492 }, { "epoch": 0.3743711438063598, "grad_norm": 0.41015625, "learning_rate": 1.5273363371238983e-05, "loss": 1.1869, "step": 493 }, { "epoch": 0.37513051732320835, "grad_norm": 0.3828125, "learning_rate": 1.5251394521703496e-05, "loss": 1.2229, "step": 494 }, { "epoch": 0.375889890840057, "grad_norm": 0.427734375, "learning_rate": 1.5229390620636309e-05, "loss": 1.2105, "step": 495 }, { "epoch": 0.37664926435690554, "grad_norm": 0.37890625, "learning_rate": 1.5207351814907068e-05, "loss": 1.2271, "step": 496 }, { "epoch": 0.37740863787375417, "grad_norm": 0.361328125, "learning_rate": 1.5185278251618391e-05, "loss": 1.1995, "step": 497 }, { "epoch": 0.37816801139060274, "grad_norm": 0.318359375, "learning_rate": 1.51631700781049e-05, "loss": 1.1512, "step": 498 }, { "epoch": 0.37892738490745137, "grad_norm": 0.384765625, "learning_rate": 1.5141027441932217e-05, "loss": 1.2129, "step": 499 }, { "epoch": 0.37968675842429994, "grad_norm": 0.44921875, "learning_rate": 1.5118850490896012e-05, "loss": 1.2336, "step": 500 }, { "epoch": 0.38044613194114857, "grad_norm": 0.388671875, "learning_rate": 1.5096639373020976e-05, "loss": 1.1947, "step": 501 }, { "epoch": 0.38120550545799714, "grad_norm": 0.373046875, "learning_rate": 1.5074394236559871e-05, "loss": 1.2024, "step": 502 }, { "epoch": 0.38196487897484577, "grad_norm": 0.3828125, "learning_rate": 1.5052115229992512e-05, "loss": 1.2024, "step": 503 }, { "epoch": 0.38272425249169434, "grad_norm": 0.41796875, "learning_rate": 1.5029802502024788e-05, "loss": 1.2601, "step": 504 }, { "epoch": 0.38348362600854297, "grad_norm": 0.373046875, "learning_rate": 1.5007456201587676e-05, "loss": 1.2082, "step": 505 }, { "epoch": 0.38424299952539154, "grad_norm": 0.357421875, "learning_rate": 1.4985076477836232e-05, "loss": 1.1751, "step": 506 }, { "epoch": 0.38500237304224016, "grad_norm": 0.34375, "learning_rate": 1.4962663480148606e-05, "loss": 1.1682, "step": 507 }, { "epoch": 0.38576174655908874, "grad_norm": 0.400390625, "learning_rate": 1.4940217358125042e-05, "loss": 1.222, "step": 508 }, { "epoch": 0.38652112007593736, "grad_norm": 0.376953125, "learning_rate": 1.4917738261586878e-05, "loss": 1.1834, "step": 509 }, { "epoch": 0.38728049359278593, "grad_norm": 0.38671875, "learning_rate": 1.489522634057555e-05, "loss": 1.1874, "step": 510 }, { "epoch": 0.38803986710963456, "grad_norm": 0.41015625, "learning_rate": 1.4872681745351582e-05, "loss": 1.2168, "step": 511 }, { "epoch": 0.38879924062648313, "grad_norm": 0.44921875, "learning_rate": 1.4850104626393598e-05, "loss": 1.2838, "step": 512 }, { "epoch": 0.38955861414333176, "grad_norm": 0.39453125, "learning_rate": 1.4827495134397298e-05, "loss": 1.1814, "step": 513 }, { "epoch": 0.39031798766018033, "grad_norm": 0.421875, "learning_rate": 1.4804853420274471e-05, "loss": 1.2424, "step": 514 }, { "epoch": 0.39107736117702896, "grad_norm": 0.48046875, "learning_rate": 1.4782179635151978e-05, "loss": 1.2785, "step": 515 }, { "epoch": 0.39183673469387753, "grad_norm": 0.41015625, "learning_rate": 1.4759473930370738e-05, "loss": 1.2162, "step": 516 }, { "epoch": 0.39259610821072616, "grad_norm": 0.3828125, "learning_rate": 1.473673645748473e-05, "loss": 1.2142, "step": 517 }, { "epoch": 0.39335548172757473, "grad_norm": 0.3984375, "learning_rate": 1.4713967368259981e-05, "loss": 1.2056, "step": 518 }, { "epoch": 0.39411485524442336, "grad_norm": 0.427734375, "learning_rate": 1.469116681467353e-05, "loss": 1.2555, "step": 519 }, { "epoch": 0.39487422876127193, "grad_norm": 0.353515625, "learning_rate": 1.4668334948912455e-05, "loss": 1.1837, "step": 520 }, { "epoch": 0.39563360227812056, "grad_norm": 0.390625, "learning_rate": 1.4645471923372818e-05, "loss": 1.192, "step": 521 }, { "epoch": 0.3963929757949691, "grad_norm": 0.3828125, "learning_rate": 1.4622577890658668e-05, "loss": 1.2303, "step": 522 }, { "epoch": 0.39715234931181775, "grad_norm": 0.439453125, "learning_rate": 1.4599653003581016e-05, "loss": 1.2871, "step": 523 }, { "epoch": 0.3979117228286663, "grad_norm": 0.404296875, "learning_rate": 1.4576697415156818e-05, "loss": 1.2274, "step": 524 }, { "epoch": 0.39867109634551495, "grad_norm": 0.408203125, "learning_rate": 1.4553711278607953e-05, "loss": 1.2148, "step": 525 }, { "epoch": 0.3994304698623635, "grad_norm": 0.298828125, "learning_rate": 1.4530694747360203e-05, "loss": 1.123, "step": 526 }, { "epoch": 0.40018984337921215, "grad_norm": 0.380859375, "learning_rate": 1.4507647975042221e-05, "loss": 1.1685, "step": 527 }, { "epoch": 0.4009492168960607, "grad_norm": 0.388671875, "learning_rate": 1.4484571115484508e-05, "loss": 1.2304, "step": 528 }, { "epoch": 0.40170859041290935, "grad_norm": 0.375, "learning_rate": 1.44614643227184e-05, "loss": 1.1826, "step": 529 }, { "epoch": 0.402467963929758, "grad_norm": 0.369140625, "learning_rate": 1.4438327750975009e-05, "loss": 1.2434, "step": 530 }, { "epoch": 0.40322733744660655, "grad_norm": 0.3671875, "learning_rate": 1.4415161554684239e-05, "loss": 1.177, "step": 531 }, { "epoch": 0.4039867109634552, "grad_norm": 0.357421875, "learning_rate": 1.4391965888473705e-05, "loss": 1.1952, "step": 532 }, { "epoch": 0.40474608448030375, "grad_norm": 0.4609375, "learning_rate": 1.436874090716774e-05, "loss": 1.2767, "step": 533 }, { "epoch": 0.4055054579971524, "grad_norm": 0.408203125, "learning_rate": 1.434548676578634e-05, "loss": 1.2334, "step": 534 }, { "epoch": 0.40626483151400095, "grad_norm": 0.376953125, "learning_rate": 1.432220361954414e-05, "loss": 1.1755, "step": 535 }, { "epoch": 0.4070242050308496, "grad_norm": 0.32421875, "learning_rate": 1.429889162384937e-05, "loss": 1.1615, "step": 536 }, { "epoch": 0.40778357854769814, "grad_norm": 0.408203125, "learning_rate": 1.4275550934302822e-05, "loss": 1.2221, "step": 537 }, { "epoch": 0.40854295206454677, "grad_norm": 0.357421875, "learning_rate": 1.4252181706696817e-05, "loss": 1.2065, "step": 538 }, { "epoch": 0.40930232558139534, "grad_norm": 0.388671875, "learning_rate": 1.4228784097014156e-05, "loss": 1.2361, "step": 539 }, { "epoch": 0.41006169909824397, "grad_norm": 0.349609375, "learning_rate": 1.4205358261427076e-05, "loss": 1.1413, "step": 540 }, { "epoch": 0.41082107261509254, "grad_norm": 0.34765625, "learning_rate": 1.4181904356296225e-05, "loss": 1.1597, "step": 541 }, { "epoch": 0.41158044613194117, "grad_norm": 0.33984375, "learning_rate": 1.4158422538169596e-05, "loss": 1.1972, "step": 542 }, { "epoch": 0.41233981964878974, "grad_norm": 0.365234375, "learning_rate": 1.4134912963781501e-05, "loss": 1.1908, "step": 543 }, { "epoch": 0.41309919316563837, "grad_norm": 0.35546875, "learning_rate": 1.4111375790051511e-05, "loss": 1.2195, "step": 544 }, { "epoch": 0.41385856668248694, "grad_norm": 0.439453125, "learning_rate": 1.4087811174083422e-05, "loss": 1.2675, "step": 545 }, { "epoch": 0.41461794019933557, "grad_norm": 0.38671875, "learning_rate": 1.4064219273164192e-05, "loss": 1.2397, "step": 546 }, { "epoch": 0.41537731371618414, "grad_norm": 0.37109375, "learning_rate": 1.40406002447629e-05, "loss": 1.1723, "step": 547 }, { "epoch": 0.41613668723303276, "grad_norm": 0.361328125, "learning_rate": 1.4016954246529697e-05, "loss": 1.1875, "step": 548 }, { "epoch": 0.41689606074988134, "grad_norm": 0.3984375, "learning_rate": 1.3993281436294743e-05, "loss": 1.1678, "step": 549 }, { "epoch": 0.41765543426672996, "grad_norm": 0.44140625, "learning_rate": 1.3969581972067166e-05, "loss": 1.2402, "step": 550 }, { "epoch": 0.41841480778357854, "grad_norm": 0.40625, "learning_rate": 1.3945856012034003e-05, "loss": 1.2136, "step": 551 }, { "epoch": 0.41917418130042716, "grad_norm": 0.40625, "learning_rate": 1.392210371455913e-05, "loss": 1.1965, "step": 552 }, { "epoch": 0.41993355481727573, "grad_norm": 0.4140625, "learning_rate": 1.3898325238182235e-05, "loss": 1.1927, "step": 553 }, { "epoch": 0.42069292833412436, "grad_norm": 0.3671875, "learning_rate": 1.3874520741617734e-05, "loss": 1.2102, "step": 554 }, { "epoch": 0.42145230185097293, "grad_norm": 0.41796875, "learning_rate": 1.3850690383753718e-05, "loss": 1.2486, "step": 555 }, { "epoch": 0.42221167536782156, "grad_norm": 0.359375, "learning_rate": 1.3826834323650899e-05, "loss": 1.1525, "step": 556 }, { "epoch": 0.42297104888467013, "grad_norm": 0.4140625, "learning_rate": 1.3802952720541543e-05, "loss": 1.2107, "step": 557 }, { "epoch": 0.42373042240151876, "grad_norm": 0.41796875, "learning_rate": 1.377904573382841e-05, "loss": 1.22, "step": 558 }, { "epoch": 0.42448979591836733, "grad_norm": 0.34765625, "learning_rate": 1.3755113523083679e-05, "loss": 1.1559, "step": 559 }, { "epoch": 0.42524916943521596, "grad_norm": 0.361328125, "learning_rate": 1.3731156248047903e-05, "loss": 1.2233, "step": 560 }, { "epoch": 0.42600854295206453, "grad_norm": 0.314453125, "learning_rate": 1.3707174068628927e-05, "loss": 1.1299, "step": 561 }, { "epoch": 0.42676791646891316, "grad_norm": 0.361328125, "learning_rate": 1.3683167144900833e-05, "loss": 1.182, "step": 562 }, { "epoch": 0.4275272899857617, "grad_norm": 0.400390625, "learning_rate": 1.3659135637102845e-05, "loss": 1.2002, "step": 563 }, { "epoch": 0.42828666350261035, "grad_norm": 0.375, "learning_rate": 1.3635079705638298e-05, "loss": 1.2027, "step": 564 }, { "epoch": 0.4290460370194589, "grad_norm": 0.359375, "learning_rate": 1.3610999511073544e-05, "loss": 1.1353, "step": 565 }, { "epoch": 0.42980541053630755, "grad_norm": 0.349609375, "learning_rate": 1.3586895214136875e-05, "loss": 1.1544, "step": 566 }, { "epoch": 0.4305647840531561, "grad_norm": 0.318359375, "learning_rate": 1.3562766975717468e-05, "loss": 1.1621, "step": 567 }, { "epoch": 0.43132415757000475, "grad_norm": 0.30859375, "learning_rate": 1.3538614956864297e-05, "loss": 1.1351, "step": 568 }, { "epoch": 0.4320835310868533, "grad_norm": 0.3828125, "learning_rate": 1.3514439318785067e-05, "loss": 1.2011, "step": 569 }, { "epoch": 0.43284290460370195, "grad_norm": 0.34375, "learning_rate": 1.3490240222845139e-05, "loss": 1.1835, "step": 570 }, { "epoch": 0.4336022781205505, "grad_norm": 0.392578125, "learning_rate": 1.3466017830566433e-05, "loss": 1.1919, "step": 571 }, { "epoch": 0.43436165163739915, "grad_norm": 0.33203125, "learning_rate": 1.3441772303626387e-05, "loss": 1.1314, "step": 572 }, { "epoch": 0.4351210251542477, "grad_norm": 0.34375, "learning_rate": 1.3417503803856835e-05, "loss": 1.1481, "step": 573 }, { "epoch": 0.43588039867109635, "grad_norm": 0.36328125, "learning_rate": 1.3393212493242964e-05, "loss": 1.2217, "step": 574 }, { "epoch": 0.4366397721879449, "grad_norm": 0.39453125, "learning_rate": 1.3368898533922202e-05, "loss": 1.1553, "step": 575 }, { "epoch": 0.43739914570479355, "grad_norm": 0.41015625, "learning_rate": 1.3344562088183166e-05, "loss": 1.2189, "step": 576 }, { "epoch": 0.4381585192216421, "grad_norm": 0.330078125, "learning_rate": 1.3320203318464552e-05, "loss": 1.1301, "step": 577 }, { "epoch": 0.43891789273849074, "grad_norm": 0.40234375, "learning_rate": 1.3295822387354071e-05, "loss": 1.2088, "step": 578 }, { "epoch": 0.43967726625533937, "grad_norm": 0.337890625, "learning_rate": 1.3271419457587344e-05, "loss": 1.1475, "step": 579 }, { "epoch": 0.44043663977218794, "grad_norm": 0.33203125, "learning_rate": 1.3246994692046837e-05, "loss": 1.16, "step": 580 }, { "epoch": 0.44119601328903657, "grad_norm": 0.384765625, "learning_rate": 1.3222548253760756e-05, "loss": 1.1764, "step": 581 }, { "epoch": 0.44195538680588514, "grad_norm": 0.41015625, "learning_rate": 1.319808030590197e-05, "loss": 1.206, "step": 582 }, { "epoch": 0.44271476032273377, "grad_norm": 0.32421875, "learning_rate": 1.3173591011786917e-05, "loss": 1.1696, "step": 583 }, { "epoch": 0.44347413383958234, "grad_norm": 0.359375, "learning_rate": 1.3149080534874519e-05, "loss": 1.1935, "step": 584 }, { "epoch": 0.44423350735643097, "grad_norm": 0.384765625, "learning_rate": 1.3124549038765078e-05, "loss": 1.1915, "step": 585 }, { "epoch": 0.44499288087327954, "grad_norm": 0.326171875, "learning_rate": 1.3099996687199203e-05, "loss": 1.159, "step": 586 }, { "epoch": 0.44575225439012817, "grad_norm": 0.39453125, "learning_rate": 1.3075423644056699e-05, "loss": 1.2283, "step": 587 }, { "epoch": 0.44651162790697674, "grad_norm": 0.361328125, "learning_rate": 1.305083007335549e-05, "loss": 1.1949, "step": 588 }, { "epoch": 0.44727100142382537, "grad_norm": 0.3359375, "learning_rate": 1.3026216139250505e-05, "loss": 1.1641, "step": 589 }, { "epoch": 0.44803037494067394, "grad_norm": 0.375, "learning_rate": 1.3001582006032601e-05, "loss": 1.2071, "step": 590 }, { "epoch": 0.44878974845752256, "grad_norm": 0.3671875, "learning_rate": 1.2976927838127453e-05, "loss": 1.16, "step": 591 }, { "epoch": 0.44954912197437114, "grad_norm": 0.38671875, "learning_rate": 1.2952253800094467e-05, "loss": 1.2239, "step": 592 }, { "epoch": 0.45030849549121976, "grad_norm": 0.361328125, "learning_rate": 1.2927560056625672e-05, "loss": 1.1955, "step": 593 }, { "epoch": 0.45106786900806833, "grad_norm": 0.34375, "learning_rate": 1.2902846772544625e-05, "loss": 1.1833, "step": 594 }, { "epoch": 0.45182724252491696, "grad_norm": 0.36328125, "learning_rate": 1.2878114112805315e-05, "loss": 1.212, "step": 595 }, { "epoch": 0.45258661604176553, "grad_norm": 0.38671875, "learning_rate": 1.2853362242491054e-05, "loss": 1.1979, "step": 596 }, { "epoch": 0.45334598955861416, "grad_norm": 0.3203125, "learning_rate": 1.2828591326813382e-05, "loss": 1.1222, "step": 597 }, { "epoch": 0.45410536307546273, "grad_norm": 0.375, "learning_rate": 1.2803801531110956e-05, "loss": 1.1922, "step": 598 }, { "epoch": 0.45486473659231136, "grad_norm": 0.361328125, "learning_rate": 1.2778993020848457e-05, "loss": 1.1596, "step": 599 }, { "epoch": 0.45562411010915993, "grad_norm": 0.392578125, "learning_rate": 1.2754165961615482e-05, "loss": 1.2171, "step": 600 }, { "epoch": 0.45638348362600856, "grad_norm": 0.376953125, "learning_rate": 1.2729320519125426e-05, "loss": 1.1937, "step": 601 }, { "epoch": 0.45714285714285713, "grad_norm": 0.34375, "learning_rate": 1.2704456859214397e-05, "loss": 1.1604, "step": 602 }, { "epoch": 0.45790223065970576, "grad_norm": 0.380859375, "learning_rate": 1.2679575147840102e-05, "loss": 1.1724, "step": 603 }, { "epoch": 0.4586616041765543, "grad_norm": 0.353515625, "learning_rate": 1.2654675551080724e-05, "loss": 1.1699, "step": 604 }, { "epoch": 0.45942097769340295, "grad_norm": 0.3203125, "learning_rate": 1.2629758235133838e-05, "loss": 1.1697, "step": 605 }, { "epoch": 0.4601803512102515, "grad_norm": 0.376953125, "learning_rate": 1.2604823366315273e-05, "loss": 1.1973, "step": 606 }, { "epoch": 0.46093972472710015, "grad_norm": 0.451171875, "learning_rate": 1.2579871111058042e-05, "loss": 1.2494, "step": 607 }, { "epoch": 0.4616990982439487, "grad_norm": 0.3359375, "learning_rate": 1.2554901635911188e-05, "loss": 1.1515, "step": 608 }, { "epoch": 0.46245847176079735, "grad_norm": 0.353515625, "learning_rate": 1.2529915107538698e-05, "loss": 1.1638, "step": 609 }, { "epoch": 0.4632178452776459, "grad_norm": 0.40234375, "learning_rate": 1.2504911692718387e-05, "loss": 1.2225, "step": 610 }, { "epoch": 0.46397721879449455, "grad_norm": 0.365234375, "learning_rate": 1.2479891558340777e-05, "loss": 1.1996, "step": 611 }, { "epoch": 0.4647365923113431, "grad_norm": 0.466796875, "learning_rate": 1.2454854871407993e-05, "loss": 1.2728, "step": 612 }, { "epoch": 0.46549596582819175, "grad_norm": 0.31640625, "learning_rate": 1.242980179903264e-05, "loss": 1.1579, "step": 613 }, { "epoch": 0.4662553393450403, "grad_norm": 0.380859375, "learning_rate": 1.2404732508436693e-05, "loss": 1.2026, "step": 614 }, { "epoch": 0.46701471286188895, "grad_norm": 0.4140625, "learning_rate": 1.2379647166950381e-05, "loss": 1.1719, "step": 615 }, { "epoch": 0.4677740863787375, "grad_norm": 0.353515625, "learning_rate": 1.2354545942011058e-05, "loss": 1.1853, "step": 616 }, { "epoch": 0.46853345989558615, "grad_norm": 0.318359375, "learning_rate": 1.2329429001162114e-05, "loss": 1.1524, "step": 617 }, { "epoch": 0.4692928334124347, "grad_norm": 0.419921875, "learning_rate": 1.2304296512051814e-05, "loss": 1.2056, "step": 618 }, { "epoch": 0.47005220692928334, "grad_norm": 0.33984375, "learning_rate": 1.2279148642432229e-05, "loss": 1.187, "step": 619 }, { "epoch": 0.4708115804461319, "grad_norm": 0.330078125, "learning_rate": 1.2253985560158064e-05, "loss": 1.1578, "step": 620 }, { "epoch": 0.47157095396298054, "grad_norm": 0.3203125, "learning_rate": 1.2228807433185588e-05, "loss": 1.1355, "step": 621 }, { "epoch": 0.4723303274798291, "grad_norm": 0.326171875, "learning_rate": 1.2203614429571475e-05, "loss": 1.1617, "step": 622 }, { "epoch": 0.47308970099667774, "grad_norm": 0.416015625, "learning_rate": 1.2178406717471702e-05, "loss": 1.1254, "step": 623 }, { "epoch": 0.4738490745135263, "grad_norm": 0.3828125, "learning_rate": 1.2153184465140413e-05, "loss": 1.1904, "step": 624 }, { "epoch": 0.47460844803037494, "grad_norm": 0.337890625, "learning_rate": 1.2127947840928816e-05, "loss": 1.158, "step": 625 }, { "epoch": 0.47536782154722357, "grad_norm": 0.36328125, "learning_rate": 1.2102697013284035e-05, "loss": 1.1188, "step": 626 }, { "epoch": 0.47612719506407214, "grad_norm": 0.333984375, "learning_rate": 1.207743215074801e-05, "loss": 1.1458, "step": 627 }, { "epoch": 0.47688656858092077, "grad_norm": 0.33203125, "learning_rate": 1.2052153421956343e-05, "loss": 1.1472, "step": 628 }, { "epoch": 0.47764594209776934, "grad_norm": 0.3828125, "learning_rate": 1.2026860995637211e-05, "loss": 1.2092, "step": 629 }, { "epoch": 0.47840531561461797, "grad_norm": 0.3828125, "learning_rate": 1.2001555040610197e-05, "loss": 1.1966, "step": 630 }, { "epoch": 0.47916468913146654, "grad_norm": 0.3203125, "learning_rate": 1.1976235725785202e-05, "loss": 1.094, "step": 631 }, { "epoch": 0.47992406264831516, "grad_norm": 0.34765625, "learning_rate": 1.1950903220161286e-05, "loss": 1.1493, "step": 632 }, { "epoch": 0.48068343616516374, "grad_norm": 0.39453125, "learning_rate": 1.1925557692825558e-05, "loss": 1.2334, "step": 633 }, { "epoch": 0.48144280968201236, "grad_norm": 0.310546875, "learning_rate": 1.1900199312952047e-05, "loss": 1.1418, "step": 634 }, { "epoch": 0.48220218319886093, "grad_norm": 0.359375, "learning_rate": 1.1874828249800565e-05, "loss": 1.144, "step": 635 }, { "epoch": 0.48296155671570956, "grad_norm": 0.353515625, "learning_rate": 1.1849444672715587e-05, "loss": 1.1465, "step": 636 }, { "epoch": 0.48372093023255813, "grad_norm": 0.404296875, "learning_rate": 1.1824048751125101e-05, "loss": 1.2054, "step": 637 }, { "epoch": 0.48448030374940676, "grad_norm": 0.310546875, "learning_rate": 1.1798640654539511e-05, "loss": 1.1376, "step": 638 }, { "epoch": 0.48523967726625533, "grad_norm": 0.30859375, "learning_rate": 1.1773220552550463e-05, "loss": 1.1574, "step": 639 }, { "epoch": 0.48599905078310396, "grad_norm": 0.4140625, "learning_rate": 1.1747788614829758e-05, "loss": 1.2302, "step": 640 }, { "epoch": 0.48675842429995253, "grad_norm": 0.3046875, "learning_rate": 1.1722345011128183e-05, "loss": 1.1259, "step": 641 }, { "epoch": 0.48751779781680116, "grad_norm": 0.32421875, "learning_rate": 1.1696889911274394e-05, "loss": 1.1542, "step": 642 }, { "epoch": 0.48827717133364973, "grad_norm": 0.37109375, "learning_rate": 1.1671423485173783e-05, "loss": 1.23, "step": 643 }, { "epoch": 0.48903654485049836, "grad_norm": 0.44921875, "learning_rate": 1.164594590280734e-05, "loss": 1.2568, "step": 644 }, { "epoch": 0.4897959183673469, "grad_norm": 0.341796875, "learning_rate": 1.162045733423052e-05, "loss": 1.1619, "step": 645 }, { "epoch": 0.49055529188419555, "grad_norm": 0.40234375, "learning_rate": 1.159495794957211e-05, "loss": 1.2003, "step": 646 }, { "epoch": 0.4913146654010441, "grad_norm": 0.412109375, "learning_rate": 1.1569447919033086e-05, "loss": 1.2507, "step": 647 }, { "epoch": 0.49207403891789275, "grad_norm": 0.337890625, "learning_rate": 1.1543927412885489e-05, "loss": 1.1381, "step": 648 }, { "epoch": 0.4928334124347413, "grad_norm": 0.3515625, "learning_rate": 1.1518396601471273e-05, "loss": 1.1715, "step": 649 }, { "epoch": 0.49359278595158995, "grad_norm": 0.359375, "learning_rate": 1.149285565520119e-05, "loss": 1.1947, "step": 650 }, { "epoch": 0.4943521594684385, "grad_norm": 0.3515625, "learning_rate": 1.1467304744553618e-05, "loss": 1.1499, "step": 651 }, { "epoch": 0.49511153298528715, "grad_norm": 0.35546875, "learning_rate": 1.1441744040073469e-05, "loss": 1.1873, "step": 652 }, { "epoch": 0.4958709065021357, "grad_norm": 0.3203125, "learning_rate": 1.1416173712371008e-05, "loss": 1.1398, "step": 653 }, { "epoch": 0.49663028001898435, "grad_norm": 0.388671875, "learning_rate": 1.1390593932120742e-05, "loss": 1.2044, "step": 654 }, { "epoch": 0.4973896535358329, "grad_norm": 0.349609375, "learning_rate": 1.1365004870060266e-05, "loss": 1.1856, "step": 655 }, { "epoch": 0.49814902705268155, "grad_norm": 0.345703125, "learning_rate": 1.1339406696989128e-05, "loss": 1.1601, "step": 656 }, { "epoch": 0.4989084005695301, "grad_norm": 0.408203125, "learning_rate": 1.1313799583767693e-05, "loss": 1.2261, "step": 657 }, { "epoch": 0.49966777408637875, "grad_norm": 0.3515625, "learning_rate": 1.1288183701315996e-05, "loss": 1.1504, "step": 658 }, { "epoch": 0.5004271476032274, "grad_norm": 0.361328125, "learning_rate": 1.1262559220612602e-05, "loss": 1.1967, "step": 659 }, { "epoch": 0.5011865211200759, "grad_norm": 0.359375, "learning_rate": 1.123692631269348e-05, "loss": 1.1724, "step": 660 }, { "epoch": 0.5019458946369245, "grad_norm": 0.326171875, "learning_rate": 1.1211285148650826e-05, "loss": 1.158, "step": 661 }, { "epoch": 0.5027052681537731, "grad_norm": 0.36328125, "learning_rate": 1.1185635899631963e-05, "loss": 1.1994, "step": 662 }, { "epoch": 0.5034646416706218, "grad_norm": 0.3515625, "learning_rate": 1.1159978736838169e-05, "loss": 1.1844, "step": 663 }, { "epoch": 0.5042240151874703, "grad_norm": 0.322265625, "learning_rate": 1.1134313831523547e-05, "loss": 1.151, "step": 664 }, { "epoch": 0.5049833887043189, "grad_norm": 0.390625, "learning_rate": 1.1108641354993876e-05, "loss": 1.1455, "step": 665 }, { "epoch": 0.5057427622211675, "grad_norm": 0.373046875, "learning_rate": 1.1082961478605476e-05, "loss": 1.1656, "step": 666 }, { "epoch": 0.5065021357380162, "grad_norm": 0.328125, "learning_rate": 1.1057274373764056e-05, "loss": 1.141, "step": 667 }, { "epoch": 0.5072615092548647, "grad_norm": 0.302734375, "learning_rate": 1.103158021192357e-05, "loss": 1.136, "step": 668 }, { "epoch": 0.5080208827717133, "grad_norm": 0.361328125, "learning_rate": 1.1005879164585083e-05, "loss": 1.1902, "step": 669 }, { "epoch": 0.5087802562885619, "grad_norm": 0.345703125, "learning_rate": 1.098017140329561e-05, "loss": 1.1535, "step": 670 }, { "epoch": 0.5095396298054106, "grad_norm": 0.390625, "learning_rate": 1.0954457099646981e-05, "loss": 1.1909, "step": 671 }, { "epoch": 0.5102990033222591, "grad_norm": 0.40625, "learning_rate": 1.0928736425274702e-05, "loss": 1.1445, "step": 672 }, { "epoch": 0.5110583768391077, "grad_norm": 0.326171875, "learning_rate": 1.0903009551856795e-05, "loss": 1.1776, "step": 673 }, { "epoch": 0.5118177503559563, "grad_norm": 0.330078125, "learning_rate": 1.0877276651112662e-05, "loss": 1.1799, "step": 674 }, { "epoch": 0.512577123872805, "grad_norm": 0.451171875, "learning_rate": 1.0851537894801935e-05, "loss": 1.2681, "step": 675 }, { "epoch": 0.5133364973896536, "grad_norm": 0.392578125, "learning_rate": 1.0825793454723325e-05, "loss": 1.1858, "step": 676 }, { "epoch": 0.5140958709065021, "grad_norm": 0.388671875, "learning_rate": 1.0800043502713486e-05, "loss": 1.2268, "step": 677 }, { "epoch": 0.5148552444233507, "grad_norm": 0.375, "learning_rate": 1.0774288210645862e-05, "loss": 1.1628, "step": 678 }, { "epoch": 0.5156146179401994, "grad_norm": 0.400390625, "learning_rate": 1.0748527750429545e-05, "loss": 1.2508, "step": 679 }, { "epoch": 0.516373991457048, "grad_norm": 0.373046875, "learning_rate": 1.0722762294008107e-05, "loss": 1.1958, "step": 680 }, { "epoch": 0.5171333649738965, "grad_norm": 0.326171875, "learning_rate": 1.069699201335849e-05, "loss": 1.13, "step": 681 }, { "epoch": 0.5178927384907451, "grad_norm": 0.365234375, "learning_rate": 1.0671217080489816e-05, "loss": 1.2132, "step": 682 }, { "epoch": 0.5186521120075938, "grad_norm": 0.408203125, "learning_rate": 1.0645437667442273e-05, "loss": 1.2433, "step": 683 }, { "epoch": 0.5194114855244424, "grad_norm": 0.328125, "learning_rate": 1.0619653946285948e-05, "loss": 1.1013, "step": 684 }, { "epoch": 0.5201708590412909, "grad_norm": 0.365234375, "learning_rate": 1.0593866089119683e-05, "loss": 1.171, "step": 685 }, { "epoch": 0.5209302325581395, "grad_norm": 0.375, "learning_rate": 1.0568074268069928e-05, "loss": 1.1771, "step": 686 }, { "epoch": 0.5216896060749882, "grad_norm": 0.396484375, "learning_rate": 1.0542278655289588e-05, "loss": 1.1808, "step": 687 }, { "epoch": 0.5224489795918368, "grad_norm": 0.357421875, "learning_rate": 1.0516479422956882e-05, "loss": 1.1398, "step": 688 }, { "epoch": 0.5232083531086853, "grad_norm": 0.38671875, "learning_rate": 1.0490676743274181e-05, "loss": 1.1954, "step": 689 }, { "epoch": 0.5239677266255339, "grad_norm": 0.37890625, "learning_rate": 1.0464870788466875e-05, "loss": 1.1792, "step": 690 }, { "epoch": 0.5247271001423826, "grad_norm": 0.3359375, "learning_rate": 1.0439061730782207e-05, "loss": 1.1585, "step": 691 }, { "epoch": 0.5254864736592312, "grad_norm": 0.3203125, "learning_rate": 1.0413249742488132e-05, "loss": 1.1658, "step": 692 }, { "epoch": 0.5262458471760797, "grad_norm": 0.337890625, "learning_rate": 1.0387434995872174e-05, "loss": 1.1443, "step": 693 }, { "epoch": 0.5270052206929283, "grad_norm": 0.376953125, "learning_rate": 1.0361617663240253e-05, "loss": 1.176, "step": 694 }, { "epoch": 0.527764594209777, "grad_norm": 0.345703125, "learning_rate": 1.0335797916915568e-05, "loss": 1.2121, "step": 695 }, { "epoch": 0.5285239677266256, "grad_norm": 0.38671875, "learning_rate": 1.0309975929237408e-05, "loss": 1.209, "step": 696 }, { "epoch": 0.5292833412434741, "grad_norm": 0.3203125, "learning_rate": 1.0284151872560042e-05, "loss": 1.1629, "step": 697 }, { "epoch": 0.5300427147603227, "grad_norm": 0.376953125, "learning_rate": 1.0258325919251537e-05, "loss": 1.1606, "step": 698 }, { "epoch": 0.5308020882771713, "grad_norm": 0.30078125, "learning_rate": 1.0232498241692625e-05, "loss": 1.1405, "step": 699 }, { "epoch": 0.53156146179402, "grad_norm": 0.41796875, "learning_rate": 1.0206669012275546e-05, "loss": 1.1829, "step": 700 }, { "epoch": 0.5323208353108685, "grad_norm": 0.33984375, "learning_rate": 1.018083840340289e-05, "loss": 1.1182, "step": 701 }, { "epoch": 0.5330802088277171, "grad_norm": 0.380859375, "learning_rate": 1.0155006587486468e-05, "loss": 1.2416, "step": 702 }, { "epoch": 0.5338395823445657, "grad_norm": 0.349609375, "learning_rate": 1.0129173736946143e-05, "loss": 1.1733, "step": 703 }, { "epoch": 0.5345989558614144, "grad_norm": 0.333984375, "learning_rate": 1.0103340024208674e-05, "loss": 1.1117, "step": 704 }, { "epoch": 0.5353583293782629, "grad_norm": 0.353515625, "learning_rate": 1.007750562170659e-05, "loss": 1.2096, "step": 705 }, { "epoch": 0.5361177028951115, "grad_norm": 0.3515625, "learning_rate": 1.0051670701877011e-05, "loss": 1.1615, "step": 706 }, { "epoch": 0.5368770764119601, "grad_norm": 0.322265625, "learning_rate": 1.0025835437160523e-05, "loss": 1.181, "step": 707 }, { "epoch": 0.5376364499288088, "grad_norm": 0.40625, "learning_rate": 1e-05, "loss": 1.2599, "step": 708 }, { "epoch": 0.5383958234456573, "grad_norm": 0.306640625, "learning_rate": 9.97416456283948e-06, "loss": 1.1557, "step": 709 }, { "epoch": 0.5391551969625059, "grad_norm": 0.333984375, "learning_rate": 9.948329298122989e-06, "loss": 1.1486, "step": 710 }, { "epoch": 0.5399145704793545, "grad_norm": 0.32421875, "learning_rate": 9.922494378293414e-06, "loss": 1.146, "step": 711 }, { "epoch": 0.5406739439962032, "grad_norm": 0.375, "learning_rate": 9.89665997579133e-06, "loss": 1.1826, "step": 712 }, { "epoch": 0.5414333175130517, "grad_norm": 0.353515625, "learning_rate": 9.870826263053859e-06, "loss": 1.1607, "step": 713 }, { "epoch": 0.5421926910299003, "grad_norm": 0.318359375, "learning_rate": 9.844993412513533e-06, "loss": 1.1287, "step": 714 }, { "epoch": 0.5429520645467489, "grad_norm": 0.466796875, "learning_rate": 9.819161596597112e-06, "loss": 1.3019, "step": 715 }, { "epoch": 0.5437114380635976, "grad_norm": 0.330078125, "learning_rate": 9.79333098772446e-06, "loss": 1.1456, "step": 716 }, { "epoch": 0.5444708115804461, "grad_norm": 0.333984375, "learning_rate": 9.767501758307376e-06, "loss": 1.1532, "step": 717 }, { "epoch": 0.5452301850972947, "grad_norm": 0.275390625, "learning_rate": 9.741674080748465e-06, "loss": 1.1244, "step": 718 }, { "epoch": 0.5459895586141433, "grad_norm": 0.333984375, "learning_rate": 9.715848127439958e-06, "loss": 1.1617, "step": 719 }, { "epoch": 0.546748932130992, "grad_norm": 0.369140625, "learning_rate": 9.690024070762597e-06, "loss": 1.2031, "step": 720 }, { "epoch": 0.5475083056478405, "grad_norm": 0.35546875, "learning_rate": 9.664202083084437e-06, "loss": 1.1701, "step": 721 }, { "epoch": 0.5482676791646891, "grad_norm": 0.341796875, "learning_rate": 9.638382336759749e-06, "loss": 1.1756, "step": 722 }, { "epoch": 0.5490270526815377, "grad_norm": 0.34375, "learning_rate": 9.612565004127828e-06, "loss": 1.192, "step": 723 }, { "epoch": 0.5497864261983864, "grad_norm": 0.341796875, "learning_rate": 9.586750257511868e-06, "loss": 1.1673, "step": 724 }, { "epoch": 0.550545799715235, "grad_norm": 0.3359375, "learning_rate": 9.560938269217798e-06, "loss": 1.1835, "step": 725 }, { "epoch": 0.5513051732320835, "grad_norm": 0.34375, "learning_rate": 9.53512921153313e-06, "loss": 1.2177, "step": 726 }, { "epoch": 0.5520645467489321, "grad_norm": 0.33984375, "learning_rate": 9.50932325672582e-06, "loss": 1.1675, "step": 727 }, { "epoch": 0.5528239202657808, "grad_norm": 0.275390625, "learning_rate": 9.483520577043121e-06, "loss": 1.104, "step": 728 }, { "epoch": 0.5535832937826294, "grad_norm": 0.31640625, "learning_rate": 9.457721344710412e-06, "loss": 1.126, "step": 729 }, { "epoch": 0.5543426672994779, "grad_norm": 0.392578125, "learning_rate": 9.431925731930079e-06, "loss": 1.1852, "step": 730 }, { "epoch": 0.5551020408163265, "grad_norm": 0.330078125, "learning_rate": 9.406133910880319e-06, "loss": 1.1576, "step": 731 }, { "epoch": 0.5558614143331752, "grad_norm": 0.291015625, "learning_rate": 9.380346053714055e-06, "loss": 1.0863, "step": 732 }, { "epoch": 0.5566207878500238, "grad_norm": 0.318359375, "learning_rate": 9.354562332557728e-06, "loss": 1.1338, "step": 733 }, { "epoch": 0.5573801613668723, "grad_norm": 0.37890625, "learning_rate": 9.328782919510186e-06, "loss": 1.2238, "step": 734 }, { "epoch": 0.5581395348837209, "grad_norm": 0.330078125, "learning_rate": 9.303007986641515e-06, "loss": 1.1432, "step": 735 }, { "epoch": 0.5588989084005695, "grad_norm": 0.302734375, "learning_rate": 9.277237705991895e-06, "loss": 1.15, "step": 736 }, { "epoch": 0.5596582819174182, "grad_norm": 0.283203125, "learning_rate": 9.251472249570458e-06, "loss": 1.1075, "step": 737 }, { "epoch": 0.5604176554342667, "grad_norm": 0.33203125, "learning_rate": 9.225711789354138e-06, "loss": 1.1256, "step": 738 }, { "epoch": 0.5611770289511153, "grad_norm": 0.357421875, "learning_rate": 9.199956497286517e-06, "loss": 1.1923, "step": 739 }, { "epoch": 0.561936402467964, "grad_norm": 0.287109375, "learning_rate": 9.174206545276678e-06, "loss": 1.1069, "step": 740 }, { "epoch": 0.5626957759848126, "grad_norm": 0.375, "learning_rate": 9.148462105198068e-06, "loss": 1.2118, "step": 741 }, { "epoch": 0.5634551495016611, "grad_norm": 0.392578125, "learning_rate": 9.12272334888734e-06, "loss": 1.2203, "step": 742 }, { "epoch": 0.5642145230185097, "grad_norm": 0.375, "learning_rate": 9.096990448143203e-06, "loss": 1.1714, "step": 743 }, { "epoch": 0.5649738965353583, "grad_norm": 0.32421875, "learning_rate": 9.0712635747253e-06, "loss": 1.1562, "step": 744 }, { "epoch": 0.565733270052207, "grad_norm": 0.3125, "learning_rate": 9.045542900353022e-06, "loss": 1.138, "step": 745 }, { "epoch": 0.5664926435690555, "grad_norm": 0.376953125, "learning_rate": 9.019828596704394e-06, "loss": 1.2036, "step": 746 }, { "epoch": 0.5672520170859041, "grad_norm": 0.3984375, "learning_rate": 8.99412083541492e-06, "loss": 1.2011, "step": 747 }, { "epoch": 0.5680113906027527, "grad_norm": 0.3828125, "learning_rate": 8.968419788076431e-06, "loss": 1.2146, "step": 748 }, { "epoch": 0.5687707641196014, "grad_norm": 0.3125, "learning_rate": 8.942725626235949e-06, "loss": 1.1499, "step": 749 }, { "epoch": 0.5695301376364499, "grad_norm": 0.357421875, "learning_rate": 8.917038521394526e-06, "loss": 1.1884, "step": 750 }, { "epoch": 0.5702895111532985, "grad_norm": 0.3359375, "learning_rate": 8.891358645006126e-06, "loss": 1.1455, "step": 751 }, { "epoch": 0.5710488846701471, "grad_norm": 0.2578125, "learning_rate": 8.865686168476458e-06, "loss": 1.1044, "step": 752 }, { "epoch": 0.5718082581869958, "grad_norm": 0.3671875, "learning_rate": 8.840021263161831e-06, "loss": 1.1989, "step": 753 }, { "epoch": 0.5725676317038443, "grad_norm": 0.328125, "learning_rate": 8.81436410036804e-06, "loss": 1.1432, "step": 754 }, { "epoch": 0.5733270052206929, "grad_norm": 0.30078125, "learning_rate": 8.788714851349177e-06, "loss": 1.1265, "step": 755 }, { "epoch": 0.5740863787375415, "grad_norm": 0.326171875, "learning_rate": 8.763073687306523e-06, "loss": 1.1427, "step": 756 }, { "epoch": 0.5748457522543902, "grad_norm": 0.345703125, "learning_rate": 8.737440779387398e-06, "loss": 1.1363, "step": 757 }, { "epoch": 0.5756051257712387, "grad_norm": 0.326171875, "learning_rate": 8.711816298684011e-06, "loss": 1.1628, "step": 758 }, { "epoch": 0.5763644992880873, "grad_norm": 0.4140625, "learning_rate": 8.686200416232314e-06, "loss": 1.2075, "step": 759 }, { "epoch": 0.5771238728049359, "grad_norm": 0.3203125, "learning_rate": 8.660593303010876e-06, "loss": 1.1384, "step": 760 }, { "epoch": 0.5778832463217846, "grad_norm": 0.3046875, "learning_rate": 8.634995129939737e-06, "loss": 1.1354, "step": 761 }, { "epoch": 0.5786426198386331, "grad_norm": 0.390625, "learning_rate": 8.609406067879258e-06, "loss": 1.1626, "step": 762 }, { "epoch": 0.5794019933554817, "grad_norm": 0.34765625, "learning_rate": 8.583826287628996e-06, "loss": 1.2072, "step": 763 }, { "epoch": 0.5801613668723303, "grad_norm": 0.328125, "learning_rate": 8.558255959926533e-06, "loss": 1.1492, "step": 764 }, { "epoch": 0.580920740389179, "grad_norm": 0.40234375, "learning_rate": 8.532695255446384e-06, "loss": 1.1948, "step": 765 }, { "epoch": 0.5816801139060275, "grad_norm": 0.345703125, "learning_rate": 8.507144344798814e-06, "loss": 1.1786, "step": 766 }, { "epoch": 0.5824394874228761, "grad_norm": 0.34765625, "learning_rate": 8.481603398528727e-06, "loss": 1.172, "step": 767 }, { "epoch": 0.5831988609397247, "grad_norm": 0.322265625, "learning_rate": 8.456072587114516e-06, "loss": 1.1431, "step": 768 }, { "epoch": 0.5839582344565734, "grad_norm": 0.3671875, "learning_rate": 8.430552080966918e-06, "loss": 1.2079, "step": 769 }, { "epoch": 0.584717607973422, "grad_norm": 0.349609375, "learning_rate": 8.405042050427891e-06, "loss": 1.1885, "step": 770 }, { "epoch": 0.5854769814902705, "grad_norm": 0.390625, "learning_rate": 8.37954266576948e-06, "loss": 1.1858, "step": 771 }, { "epoch": 0.5862363550071191, "grad_norm": 0.380859375, "learning_rate": 8.35405409719266e-06, "loss": 1.2242, "step": 772 }, { "epoch": 0.5869957285239678, "grad_norm": 0.369140625, "learning_rate": 8.328576514826222e-06, "loss": 1.1984, "step": 773 }, { "epoch": 0.5877551020408164, "grad_norm": 0.29296875, "learning_rate": 8.30311008872561e-06, "loss": 1.1178, "step": 774 }, { "epoch": 0.5885144755576649, "grad_norm": 0.2890625, "learning_rate": 8.277654988871819e-06, "loss": 1.1126, "step": 775 }, { "epoch": 0.5892738490745135, "grad_norm": 0.337890625, "learning_rate": 8.252211385170242e-06, "loss": 1.1394, "step": 776 }, { "epoch": 0.5900332225913621, "grad_norm": 0.341796875, "learning_rate": 8.226779447449538e-06, "loss": 1.1999, "step": 777 }, { "epoch": 0.5907925961082108, "grad_norm": 0.328125, "learning_rate": 8.201359345460496e-06, "loss": 1.1602, "step": 778 }, { "epoch": 0.5915519696250593, "grad_norm": 0.38671875, "learning_rate": 8.175951248874902e-06, "loss": 1.1864, "step": 779 }, { "epoch": 0.5923113431419079, "grad_norm": 0.302734375, "learning_rate": 8.150555327284417e-06, "loss": 1.1053, "step": 780 }, { "epoch": 0.5930707166587565, "grad_norm": 0.275390625, "learning_rate": 8.125171750199436e-06, "loss": 1.1004, "step": 781 }, { "epoch": 0.5938300901756052, "grad_norm": 0.36328125, "learning_rate": 8.099800687047958e-06, "loss": 1.1189, "step": 782 }, { "epoch": 0.5945894636924537, "grad_norm": 0.400390625, "learning_rate": 8.074442307174445e-06, "loss": 1.2653, "step": 783 }, { "epoch": 0.5953488372093023, "grad_norm": 0.298828125, "learning_rate": 8.04909677983872e-06, "loss": 1.1253, "step": 784 }, { "epoch": 0.5961082107261509, "grad_norm": 0.373046875, "learning_rate": 8.023764274214802e-06, "loss": 1.1351, "step": 785 }, { "epoch": 0.5968675842429996, "grad_norm": 0.33984375, "learning_rate": 7.998444959389803e-06, "loss": 1.145, "step": 786 }, { "epoch": 0.5976269577598481, "grad_norm": 0.302734375, "learning_rate": 7.973139004362794e-06, "loss": 1.1679, "step": 787 }, { "epoch": 0.5983863312766967, "grad_norm": 0.3203125, "learning_rate": 7.947846578043658e-06, "loss": 1.1475, "step": 788 }, { "epoch": 0.5991457047935453, "grad_norm": 0.34765625, "learning_rate": 7.922567849251995e-06, "loss": 1.1941, "step": 789 }, { "epoch": 0.599905078310394, "grad_norm": 0.365234375, "learning_rate": 7.897302986715967e-06, "loss": 1.1754, "step": 790 }, { "epoch": 0.6006644518272425, "grad_norm": 0.37890625, "learning_rate": 7.872052159071186e-06, "loss": 1.1762, "step": 791 }, { "epoch": 0.6014238253440911, "grad_norm": 0.302734375, "learning_rate": 7.846815534859592e-06, "loss": 1.1361, "step": 792 }, { "epoch": 0.6021831988609397, "grad_norm": 0.41015625, "learning_rate": 7.821593282528301e-06, "loss": 1.2727, "step": 793 }, { "epoch": 0.6029425723777884, "grad_norm": 0.291015625, "learning_rate": 7.796385570428527e-06, "loss": 1.1568, "step": 794 }, { "epoch": 0.6037019458946369, "grad_norm": 0.396484375, "learning_rate": 7.771192566814412e-06, "loss": 1.2494, "step": 795 }, { "epoch": 0.6044613194114855, "grad_norm": 0.3828125, "learning_rate": 7.746014439841941e-06, "loss": 1.223, "step": 796 }, { "epoch": 0.6052206929283341, "grad_norm": 0.337890625, "learning_rate": 7.720851357567778e-06, "loss": 1.1366, "step": 797 }, { "epoch": 0.6059800664451828, "grad_norm": 0.3203125, "learning_rate": 7.69570348794819e-06, "loss": 1.1451, "step": 798 }, { "epoch": 0.6067394399620313, "grad_norm": 0.29296875, "learning_rate": 7.670570998837889e-06, "loss": 1.1189, "step": 799 }, { "epoch": 0.6074988134788799, "grad_norm": 0.25390625, "learning_rate": 7.645454057988942e-06, "loss": 1.1005, "step": 800 }, { "epoch": 0.6082581869957285, "grad_norm": 0.31640625, "learning_rate": 7.6203528330496245e-06, "loss": 1.1741, "step": 801 }, { "epoch": 0.6090175605125772, "grad_norm": 0.2734375, "learning_rate": 7.595267491563311e-06, "loss": 1.1124, "step": 802 }, { "epoch": 0.6097769340294257, "grad_norm": 0.33984375, "learning_rate": 7.570198200967363e-06, "loss": 1.1459, "step": 803 }, { "epoch": 0.6105363075462743, "grad_norm": 0.36328125, "learning_rate": 7.545145128592009e-06, "loss": 1.1668, "step": 804 }, { "epoch": 0.6112956810631229, "grad_norm": 0.29296875, "learning_rate": 7.520108441659223e-06, "loss": 1.1384, "step": 805 }, { "epoch": 0.6120550545799716, "grad_norm": 0.314453125, "learning_rate": 7.495088307281619e-06, "loss": 1.1462, "step": 806 }, { "epoch": 0.6128144280968201, "grad_norm": 0.330078125, "learning_rate": 7.470084892461305e-06, "loss": 1.1645, "step": 807 }, { "epoch": 0.6135738016136687, "grad_norm": 0.3359375, "learning_rate": 7.445098364088815e-06, "loss": 1.1709, "step": 808 }, { "epoch": 0.6143331751305173, "grad_norm": 0.298828125, "learning_rate": 7.420128888941958e-06, "loss": 1.1914, "step": 809 }, { "epoch": 0.615092548647366, "grad_norm": 0.4140625, "learning_rate": 7.395176633684726e-06, "loss": 1.2529, "step": 810 }, { "epoch": 0.6158519221642145, "grad_norm": 0.30859375, "learning_rate": 7.370241764866169e-06, "loss": 1.1245, "step": 811 }, { "epoch": 0.6166112956810631, "grad_norm": 0.359375, "learning_rate": 7.34532444891928e-06, "loss": 1.1952, "step": 812 }, { "epoch": 0.6173706691979117, "grad_norm": 0.296875, "learning_rate": 7.3204248521599e-06, "loss": 1.1247, "step": 813 }, { "epoch": 0.6181300427147604, "grad_norm": 0.33203125, "learning_rate": 7.295543140785604e-06, "loss": 1.1417, "step": 814 }, { "epoch": 0.6188894162316089, "grad_norm": 0.27734375, "learning_rate": 7.27067948087458e-06, "loss": 1.1264, "step": 815 }, { "epoch": 0.6196487897484575, "grad_norm": 0.322265625, "learning_rate": 7.245834038384523e-06, "loss": 1.176, "step": 816 }, { "epoch": 0.6204081632653061, "grad_norm": 0.314453125, "learning_rate": 7.221006979151546e-06, "loss": 1.1171, "step": 817 }, { "epoch": 0.6211675367821547, "grad_norm": 0.3828125, "learning_rate": 7.196198468889047e-06, "loss": 1.1906, "step": 818 }, { "epoch": 0.6219269102990034, "grad_norm": 0.3046875, "learning_rate": 7.171408673186619e-06, "loss": 1.1394, "step": 819 }, { "epoch": 0.6226862838158519, "grad_norm": 0.3125, "learning_rate": 7.14663775750895e-06, "loss": 1.1334, "step": 820 }, { "epoch": 0.6234456573327005, "grad_norm": 0.3359375, "learning_rate": 7.1218858871946885e-06, "loss": 1.149, "step": 821 }, { "epoch": 0.6242050308495491, "grad_norm": 0.3359375, "learning_rate": 7.097153227455379e-06, "loss": 1.1593, "step": 822 }, { "epoch": 0.6249644043663978, "grad_norm": 0.3984375, "learning_rate": 7.072439943374331e-06, "loss": 1.1399, "step": 823 }, { "epoch": 0.6257237778832463, "grad_norm": 0.376953125, "learning_rate": 7.0477461999055365e-06, "loss": 1.2022, "step": 824 }, { "epoch": 0.6264831514000949, "grad_norm": 0.337890625, "learning_rate": 7.023072161872551e-06, "loss": 1.1374, "step": 825 }, { "epoch": 0.6272425249169435, "grad_norm": 0.2734375, "learning_rate": 6.998417993967403e-06, "loss": 1.1267, "step": 826 }, { "epoch": 0.6280018984337922, "grad_norm": 0.330078125, "learning_rate": 6.973783860749499e-06, "loss": 1.179, "step": 827 }, { "epoch": 0.6287612719506407, "grad_norm": 0.349609375, "learning_rate": 6.949169926644513e-06, "loss": 1.1685, "step": 828 }, { "epoch": 0.6295206454674893, "grad_norm": 0.365234375, "learning_rate": 6.9245763559432996e-06, "loss": 1.2012, "step": 829 }, { "epoch": 0.6302800189843379, "grad_norm": 0.353515625, "learning_rate": 6.9000033128008e-06, "loss": 1.187, "step": 830 }, { "epoch": 0.6310393925011866, "grad_norm": 0.373046875, "learning_rate": 6.875450961234924e-06, "loss": 1.1949, "step": 831 }, { "epoch": 0.6317987660180351, "grad_norm": 0.3515625, "learning_rate": 6.8509194651254825e-06, "loss": 1.1995, "step": 832 }, { "epoch": 0.6325581395348837, "grad_norm": 0.376953125, "learning_rate": 6.826408988213083e-06, "loss": 1.1705, "step": 833 }, { "epoch": 0.6333175130517323, "grad_norm": 0.326171875, "learning_rate": 6.801919694098034e-06, "loss": 1.1469, "step": 834 }, { "epoch": 0.634076886568581, "grad_norm": 0.357421875, "learning_rate": 6.777451746239249e-06, "loss": 1.1363, "step": 835 }, { "epoch": 0.6348362600854295, "grad_norm": 0.33984375, "learning_rate": 6.7530053079531664e-06, "loss": 1.1968, "step": 836 }, { "epoch": 0.6355956336022781, "grad_norm": 0.376953125, "learning_rate": 6.7285805424126585e-06, "loss": 1.2189, "step": 837 }, { "epoch": 0.6363550071191267, "grad_norm": 0.298828125, "learning_rate": 6.70417761264593e-06, "loss": 1.1232, "step": 838 }, { "epoch": 0.6371143806359754, "grad_norm": 0.33984375, "learning_rate": 6.679796681535451e-06, "loss": 1.1898, "step": 839 }, { "epoch": 0.6378737541528239, "grad_norm": 0.296875, "learning_rate": 6.655437911816838e-06, "loss": 1.1666, "step": 840 }, { "epoch": 0.6386331276696725, "grad_norm": 0.296875, "learning_rate": 6.631101466077801e-06, "loss": 1.146, "step": 841 }, { "epoch": 0.6393925011865211, "grad_norm": 0.419921875, "learning_rate": 6.60678750675704e-06, "loss": 1.1723, "step": 842 }, { "epoch": 0.6401518747033698, "grad_norm": 0.34375, "learning_rate": 6.582496196143167e-06, "loss": 1.1488, "step": 843 }, { "epoch": 0.6409112482202183, "grad_norm": 0.3203125, "learning_rate": 6.558227696373617e-06, "loss": 1.1899, "step": 844 }, { "epoch": 0.6416706217370669, "grad_norm": 0.3515625, "learning_rate": 6.533982169433568e-06, "loss": 1.1478, "step": 845 }, { "epoch": 0.6424299952539155, "grad_norm": 0.333984375, "learning_rate": 6.509759777154864e-06, "loss": 1.1353, "step": 846 }, { "epoch": 0.6431893687707642, "grad_norm": 0.28515625, "learning_rate": 6.485560681214933e-06, "loss": 1.1481, "step": 847 }, { "epoch": 0.6439487422876127, "grad_norm": 0.298828125, "learning_rate": 6.461385043135704e-06, "loss": 1.1222, "step": 848 }, { "epoch": 0.6447081158044613, "grad_norm": 0.328125, "learning_rate": 6.437233024282538e-06, "loss": 1.1029, "step": 849 }, { "epoch": 0.6454674893213099, "grad_norm": 0.376953125, "learning_rate": 6.413104785863128e-06, "loss": 1.192, "step": 850 }, { "epoch": 0.6462268628381586, "grad_norm": 0.36328125, "learning_rate": 6.389000488926459e-06, "loss": 1.2227, "step": 851 }, { "epoch": 0.6469862363550071, "grad_norm": 0.279296875, "learning_rate": 6.364920294361701e-06, "loss": 1.0898, "step": 852 }, { "epoch": 0.6477456098718557, "grad_norm": 0.375, "learning_rate": 6.3408643628971585e-06, "loss": 1.1882, "step": 853 }, { "epoch": 0.6485049833887043, "grad_norm": 0.33984375, "learning_rate": 6.316832855099173e-06, "loss": 1.1572, "step": 854 }, { "epoch": 0.649264356905553, "grad_norm": 0.296875, "learning_rate": 6.292825931371075e-06, "loss": 1.1056, "step": 855 }, { "epoch": 0.6500237304224015, "grad_norm": 0.298828125, "learning_rate": 6.2688437519521e-06, "loss": 1.1232, "step": 856 }, { "epoch": 0.6507831039392501, "grad_norm": 0.373046875, "learning_rate": 6.244886476916325e-06, "loss": 1.1479, "step": 857 }, { "epoch": 0.6515424774560987, "grad_norm": 0.294921875, "learning_rate": 6.220954266171597e-06, "loss": 1.1355, "step": 858 }, { "epoch": 0.6523018509729474, "grad_norm": 0.357421875, "learning_rate": 6.197047279458459e-06, "loss": 1.185, "step": 859 }, { "epoch": 0.6530612244897959, "grad_norm": 0.341796875, "learning_rate": 6.173165676349103e-06, "loss": 1.141, "step": 860 }, { "epoch": 0.6538205980066445, "grad_norm": 0.314453125, "learning_rate": 6.149309616246285e-06, "loss": 1.129, "step": 861 }, { "epoch": 0.6545799715234931, "grad_norm": 0.34375, "learning_rate": 6.125479258382268e-06, "loss": 1.1517, "step": 862 }, { "epoch": 0.6553393450403417, "grad_norm": 0.326171875, "learning_rate": 6.101674761817769e-06, "loss": 1.0984, "step": 863 }, { "epoch": 0.6560987185571903, "grad_norm": 0.341796875, "learning_rate": 6.077896285440874e-06, "loss": 1.175, "step": 864 }, { "epoch": 0.6568580920740389, "grad_norm": 0.34375, "learning_rate": 6.054143987966001e-06, "loss": 1.1625, "step": 865 }, { "epoch": 0.6576174655908875, "grad_norm": 0.357421875, "learning_rate": 6.030418027932835e-06, "loss": 1.2025, "step": 866 }, { "epoch": 0.6583768391077361, "grad_norm": 0.3671875, "learning_rate": 6.006718563705258e-06, "loss": 1.1843, "step": 867 }, { "epoch": 0.6591362126245848, "grad_norm": 0.3671875, "learning_rate": 5.983045753470308e-06, "loss": 1.1775, "step": 868 }, { "epoch": 0.6598955861414333, "grad_norm": 0.3984375, "learning_rate": 5.959399755237103e-06, "loss": 1.1727, "step": 869 }, { "epoch": 0.6606549596582819, "grad_norm": 0.353515625, "learning_rate": 5.935780726835811e-06, "loss": 1.1502, "step": 870 }, { "epoch": 0.6614143331751305, "grad_norm": 0.3515625, "learning_rate": 5.91218882591658e-06, "loss": 1.1346, "step": 871 }, { "epoch": 0.6621737066919792, "grad_norm": 0.41796875, "learning_rate": 5.888624209948495e-06, "loss": 1.1899, "step": 872 }, { "epoch": 0.6629330802088277, "grad_norm": 0.345703125, "learning_rate": 5.865087036218504e-06, "loss": 1.1826, "step": 873 }, { "epoch": 0.6636924537256763, "grad_norm": 0.337890625, "learning_rate": 5.841577461830408e-06, "loss": 1.1627, "step": 874 }, { "epoch": 0.6644518272425249, "grad_norm": 0.33984375, "learning_rate": 5.818095643703779e-06, "loss": 1.1732, "step": 875 }, { "epoch": 0.6652112007593736, "grad_norm": 0.294921875, "learning_rate": 5.794641738572925e-06, "loss": 1.1294, "step": 876 }, { "epoch": 0.6659705742762221, "grad_norm": 0.271484375, "learning_rate": 5.771215902985848e-06, "loss": 1.1594, "step": 877 }, { "epoch": 0.6667299477930707, "grad_norm": 0.279296875, "learning_rate": 5.747818293303185e-06, "loss": 1.1273, "step": 878 }, { "epoch": 0.6674893213099193, "grad_norm": 0.3359375, "learning_rate": 5.724449065697182e-06, "loss": 1.1463, "step": 879 }, { "epoch": 0.668248694826768, "grad_norm": 0.333984375, "learning_rate": 5.701108376150635e-06, "loss": 1.1557, "step": 880 }, { "epoch": 0.6690080683436165, "grad_norm": 0.35546875, "learning_rate": 5.677796380455862e-06, "loss": 1.1537, "step": 881 }, { "epoch": 0.6697674418604651, "grad_norm": 0.30859375, "learning_rate": 5.654513234213663e-06, "loss": 1.1203, "step": 882 }, { "epoch": 0.6705268153773137, "grad_norm": 0.33203125, "learning_rate": 5.631259092832265e-06, "loss": 1.1744, "step": 883 }, { "epoch": 0.6712861888941624, "grad_norm": 0.35546875, "learning_rate": 5.608034111526298e-06, "loss": 1.1531, "step": 884 }, { "epoch": 0.6720455624110109, "grad_norm": 0.37109375, "learning_rate": 5.584838445315764e-06, "loss": 1.1989, "step": 885 }, { "epoch": 0.6728049359278595, "grad_norm": 0.39453125, "learning_rate": 5.561672249024988e-06, "loss": 1.2282, "step": 886 }, { "epoch": 0.6735643094447081, "grad_norm": 0.36328125, "learning_rate": 5.538535677281608e-06, "loss": 1.186, "step": 887 }, { "epoch": 0.6743236829615568, "grad_norm": 0.357421875, "learning_rate": 5.515428884515495e-06, "loss": 1.1552, "step": 888 }, { "epoch": 0.6750830564784053, "grad_norm": 0.349609375, "learning_rate": 5.492352024957781e-06, "loss": 1.1389, "step": 889 }, { "epoch": 0.6758424299952539, "grad_norm": 0.33984375, "learning_rate": 5.4693052526397965e-06, "loss": 1.133, "step": 890 }, { "epoch": 0.6766018035121025, "grad_norm": 0.365234375, "learning_rate": 5.446288721392048e-06, "loss": 1.2011, "step": 891 }, { "epoch": 0.6773611770289512, "grad_norm": 0.30859375, "learning_rate": 5.423302584843186e-06, "loss": 1.1344, "step": 892 }, { "epoch": 0.6781205505457997, "grad_norm": 0.328125, "learning_rate": 5.400346996418988e-06, "loss": 1.161, "step": 893 }, { "epoch": 0.6788799240626483, "grad_norm": 0.2734375, "learning_rate": 5.377422109341332e-06, "loss": 1.1067, "step": 894 }, { "epoch": 0.6796392975794969, "grad_norm": 0.306640625, "learning_rate": 5.354528076627185e-06, "loss": 1.1321, "step": 895 }, { "epoch": 0.6803986710963456, "grad_norm": 0.37109375, "learning_rate": 5.331665051087549e-06, "loss": 1.1952, "step": 896 }, { "epoch": 0.6811580446131941, "grad_norm": 0.267578125, "learning_rate": 5.308833185326472e-06, "loss": 1.1063, "step": 897 }, { "epoch": 0.6819174181300427, "grad_norm": 0.357421875, "learning_rate": 5.286032631740023e-06, "loss": 1.19, "step": 898 }, { "epoch": 0.6826767916468913, "grad_norm": 0.365234375, "learning_rate": 5.263263542515273e-06, "loss": 1.1727, "step": 899 }, { "epoch": 0.68343616516374, "grad_norm": 0.3046875, "learning_rate": 5.240526069629265e-06, "loss": 1.172, "step": 900 }, { "epoch": 0.6841955386805885, "grad_norm": 0.357421875, "learning_rate": 5.217820364848027e-06, "loss": 1.1787, "step": 901 }, { "epoch": 0.6849549121974371, "grad_norm": 0.390625, "learning_rate": 5.19514657972553e-06, "loss": 1.2442, "step": 902 }, { "epoch": 0.6857142857142857, "grad_norm": 0.337890625, "learning_rate": 5.172504865602701e-06, "loss": 1.1876, "step": 903 }, { "epoch": 0.6864736592311343, "grad_norm": 0.37109375, "learning_rate": 5.149895373606405e-06, "loss": 1.2092, "step": 904 }, { "epoch": 0.6872330327479829, "grad_norm": 0.265625, "learning_rate": 5.127318254648418e-06, "loss": 1.1086, "step": 905 }, { "epoch": 0.6879924062648315, "grad_norm": 0.328125, "learning_rate": 5.104773659424453e-06, "loss": 1.1276, "step": 906 }, { "epoch": 0.6887517797816801, "grad_norm": 0.369140625, "learning_rate": 5.082261738413124e-06, "loss": 1.2118, "step": 907 }, { "epoch": 0.6895111532985287, "grad_norm": 0.33203125, "learning_rate": 5.059782641874962e-06, "loss": 1.1634, "step": 908 }, { "epoch": 0.6902705268153773, "grad_norm": 0.33203125, "learning_rate": 5.037336519851397e-06, "loss": 1.1525, "step": 909 }, { "epoch": 0.6910299003322259, "grad_norm": 0.306640625, "learning_rate": 5.014923522163773e-06, "loss": 1.1586, "step": 910 }, { "epoch": 0.6917892738490745, "grad_norm": 0.318359375, "learning_rate": 4.992543798412327e-06, "loss": 1.185, "step": 911 }, { "epoch": 0.6925486473659231, "grad_norm": 0.328125, "learning_rate": 4.970197497975216e-06, "loss": 1.1233, "step": 912 }, { "epoch": 0.6933080208827717, "grad_norm": 0.337890625, "learning_rate": 4.947884770007491e-06, "loss": 1.1646, "step": 913 }, { "epoch": 0.6940673943996203, "grad_norm": 0.373046875, "learning_rate": 4.92560576344013e-06, "loss": 1.1766, "step": 914 }, { "epoch": 0.6948267679164689, "grad_norm": 0.337890625, "learning_rate": 4.903360626979026e-06, "loss": 1.1797, "step": 915 }, { "epoch": 0.6955861414333175, "grad_norm": 0.291015625, "learning_rate": 4.881149509103993e-06, "loss": 1.1327, "step": 916 }, { "epoch": 0.6963455149501662, "grad_norm": 0.3125, "learning_rate": 4.858972558067784e-06, "loss": 1.1353, "step": 917 }, { "epoch": 0.6971048884670147, "grad_norm": 0.33984375, "learning_rate": 4.836829921895103e-06, "loss": 1.1603, "step": 918 }, { "epoch": 0.6978642619838633, "grad_norm": 0.3359375, "learning_rate": 4.814721748381608e-06, "loss": 1.1768, "step": 919 }, { "epoch": 0.6986236355007119, "grad_norm": 0.349609375, "learning_rate": 4.7926481850929376e-06, "loss": 1.1515, "step": 920 }, { "epoch": 0.6993830090175606, "grad_norm": 0.380859375, "learning_rate": 4.770609379363694e-06, "loss": 1.2258, "step": 921 }, { "epoch": 0.7001423825344091, "grad_norm": 0.3515625, "learning_rate": 4.748605478296508e-06, "loss": 1.1553, "step": 922 }, { "epoch": 0.7009017560512577, "grad_norm": 0.380859375, "learning_rate": 4.726636628761018e-06, "loss": 1.1856, "step": 923 }, { "epoch": 0.7016611295681063, "grad_norm": 0.33203125, "learning_rate": 4.704702977392914e-06, "loss": 1.172, "step": 924 }, { "epoch": 0.702420503084955, "grad_norm": 0.318359375, "learning_rate": 4.682804670592937e-06, "loss": 1.145, "step": 925 }, { "epoch": 0.7031798766018035, "grad_norm": 0.341796875, "learning_rate": 4.660941854525917e-06, "loss": 1.1645, "step": 926 }, { "epoch": 0.7039392501186521, "grad_norm": 0.314453125, "learning_rate": 4.639114675119797e-06, "loss": 1.1369, "step": 927 }, { "epoch": 0.7046986236355007, "grad_norm": 0.291015625, "learning_rate": 4.617323278064657e-06, "loss": 1.1206, "step": 928 }, { "epoch": 0.7054579971523494, "grad_norm": 0.267578125, "learning_rate": 4.595567808811735e-06, "loss": 1.1056, "step": 929 }, { "epoch": 0.7062173706691979, "grad_norm": 0.40234375, "learning_rate": 4.573848412572458e-06, "loss": 1.1796, "step": 930 }, { "epoch": 0.7069767441860465, "grad_norm": 0.341796875, "learning_rate": 4.552165234317486e-06, "loss": 1.1623, "step": 931 }, { "epoch": 0.7077361177028951, "grad_norm": 0.345703125, "learning_rate": 4.530518418775734e-06, "loss": 1.1729, "step": 932 }, { "epoch": 0.7084954912197438, "grad_norm": 0.33984375, "learning_rate": 4.508908110433393e-06, "loss": 1.1316, "step": 933 }, { "epoch": 0.7092548647365923, "grad_norm": 0.3515625, "learning_rate": 4.487334453532998e-06, "loss": 1.198, "step": 934 }, { "epoch": 0.7100142382534409, "grad_norm": 0.369140625, "learning_rate": 4.465797592072428e-06, "loss": 1.2132, "step": 935 }, { "epoch": 0.7107736117702895, "grad_norm": 0.341796875, "learning_rate": 4.444297669803981e-06, "loss": 1.1731, "step": 936 }, { "epoch": 0.7115329852871382, "grad_norm": 0.298828125, "learning_rate": 4.422834830233378e-06, "loss": 1.119, "step": 937 }, { "epoch": 0.7122923588039867, "grad_norm": 0.29296875, "learning_rate": 4.4014092166188375e-06, "loss": 1.1435, "step": 938 }, { "epoch": 0.7130517323208353, "grad_norm": 0.3671875, "learning_rate": 4.3800209719701055e-06, "loss": 1.1884, "step": 939 }, { "epoch": 0.7138111058376839, "grad_norm": 0.369140625, "learning_rate": 4.35867023904749e-06, "loss": 1.1715, "step": 940 }, { "epoch": 0.7145704793545326, "grad_norm": 0.33203125, "learning_rate": 4.337357160360931e-06, "loss": 1.1819, "step": 941 }, { "epoch": 0.7153298528713811, "grad_norm": 0.326171875, "learning_rate": 4.3160818781690286e-06, "loss": 1.165, "step": 942 }, { "epoch": 0.7160892263882297, "grad_norm": 0.302734375, "learning_rate": 4.294844534478107e-06, "loss": 1.0917, "step": 943 }, { "epoch": 0.7168485999050783, "grad_norm": 0.322265625, "learning_rate": 4.2736452710412645e-06, "loss": 1.1302, "step": 944 }, { "epoch": 0.717607973421927, "grad_norm": 0.365234375, "learning_rate": 4.25248422935742e-06, "loss": 1.1528, "step": 945 }, { "epoch": 0.7183673469387755, "grad_norm": 0.341796875, "learning_rate": 4.2313615506703685e-06, "loss": 1.1557, "step": 946 }, { "epoch": 0.7191267204556241, "grad_norm": 0.37109375, "learning_rate": 4.210277375967855e-06, "loss": 1.2004, "step": 947 }, { "epoch": 0.7198860939724727, "grad_norm": 0.326171875, "learning_rate": 4.189231845980618e-06, "loss": 1.1886, "step": 948 }, { "epoch": 0.7206454674893213, "grad_norm": 0.33984375, "learning_rate": 4.168225101181449e-06, "loss": 1.1163, "step": 949 }, { "epoch": 0.7214048410061699, "grad_norm": 0.287109375, "learning_rate": 4.147257281784257e-06, "loss": 1.1078, "step": 950 }, { "epoch": 0.7221642145230185, "grad_norm": 0.306640625, "learning_rate": 4.1263285277431465e-06, "loss": 1.1385, "step": 951 }, { "epoch": 0.7229235880398671, "grad_norm": 0.3515625, "learning_rate": 4.105438978751465e-06, "loss": 1.1829, "step": 952 }, { "epoch": 0.7236829615567157, "grad_norm": 0.31640625, "learning_rate": 4.084588774240884e-06, "loss": 1.1458, "step": 953 }, { "epoch": 0.7244423350735643, "grad_norm": 0.31640625, "learning_rate": 4.063778053380446e-06, "loss": 1.1388, "step": 954 }, { "epoch": 0.7252017085904129, "grad_norm": 0.3125, "learning_rate": 4.043006955075667e-06, "loss": 1.1234, "step": 955 }, { "epoch": 0.7259610821072615, "grad_norm": 0.3359375, "learning_rate": 4.0222756179675915e-06, "loss": 1.171, "step": 956 }, { "epoch": 0.7267204556241101, "grad_norm": 0.30078125, "learning_rate": 4.001584180431869e-06, "loss": 1.1435, "step": 957 }, { "epoch": 0.7274798291409587, "grad_norm": 0.2578125, "learning_rate": 3.980932780577827e-06, "loss": 1.1021, "step": 958 }, { "epoch": 0.7282392026578073, "grad_norm": 0.357421875, "learning_rate": 3.960321556247552e-06, "loss": 1.1885, "step": 959 }, { "epoch": 0.7289985761746559, "grad_norm": 0.29296875, "learning_rate": 3.939750645014977e-06, "loss": 1.1244, "step": 960 }, { "epoch": 0.7297579496915045, "grad_norm": 0.3125, "learning_rate": 3.919220184184959e-06, "loss": 1.1245, "step": 961 }, { "epoch": 0.730517323208353, "grad_norm": 0.314453125, "learning_rate": 3.898730310792346e-06, "loss": 1.1353, "step": 962 }, { "epoch": 0.7312766967252017, "grad_norm": 0.29296875, "learning_rate": 3.878281161601094e-06, "loss": 1.1653, "step": 963 }, { "epoch": 0.7320360702420503, "grad_norm": 0.294921875, "learning_rate": 3.857872873103322e-06, "loss": 1.1238, "step": 964 }, { "epoch": 0.7327954437588989, "grad_norm": 0.380859375, "learning_rate": 3.837505581518429e-06, "loss": 1.1952, "step": 965 }, { "epoch": 0.7335548172757476, "grad_norm": 0.380859375, "learning_rate": 3.8171794227921585e-06, "loss": 1.2425, "step": 966 }, { "epoch": 0.7343141907925961, "grad_norm": 0.33203125, "learning_rate": 3.7968945325957175e-06, "loss": 1.099, "step": 967 }, { "epoch": 0.7350735643094447, "grad_norm": 0.35546875, "learning_rate": 3.776651046324843e-06, "loss": 1.151, "step": 968 }, { "epoch": 0.7358329378262933, "grad_norm": 0.287109375, "learning_rate": 3.7564490990989276e-06, "loss": 1.1206, "step": 969 }, { "epoch": 0.736592311343142, "grad_norm": 0.302734375, "learning_rate": 3.7362888257600894e-06, "loss": 1.1203, "step": 970 }, { "epoch": 0.7373516848599905, "grad_norm": 0.3671875, "learning_rate": 3.716170360872294e-06, "loss": 1.19, "step": 971 }, { "epoch": 0.7381110583768391, "grad_norm": 0.365234375, "learning_rate": 3.69609383872045e-06, "loss": 1.1872, "step": 972 }, { "epoch": 0.7388704318936877, "grad_norm": 0.32421875, "learning_rate": 3.676059393309499e-06, "loss": 1.1264, "step": 973 }, { "epoch": 0.7396298054105364, "grad_norm": 0.392578125, "learning_rate": 3.6560671583635467e-06, "loss": 1.1832, "step": 974 }, { "epoch": 0.7403891789273849, "grad_norm": 0.30859375, "learning_rate": 3.636117267324941e-06, "loss": 1.1855, "step": 975 }, { "epoch": 0.7411485524442335, "grad_norm": 0.373046875, "learning_rate": 3.6162098533534095e-06, "loss": 1.2236, "step": 976 }, { "epoch": 0.7419079259610821, "grad_norm": 0.30078125, "learning_rate": 3.5963450493251552e-06, "loss": 1.1248, "step": 977 }, { "epoch": 0.7426672994779308, "grad_norm": 0.283203125, "learning_rate": 3.576522987831965e-06, "loss": 1.0895, "step": 978 }, { "epoch": 0.7434266729947793, "grad_norm": 0.322265625, "learning_rate": 3.5567438011803356e-06, "loss": 1.1789, "step": 979 }, { "epoch": 0.7441860465116279, "grad_norm": 0.283203125, "learning_rate": 3.5370076213905904e-06, "loss": 1.1332, "step": 980 }, { "epoch": 0.7449454200284765, "grad_norm": 0.33203125, "learning_rate": 3.5173145801959942e-06, "loss": 1.1575, "step": 981 }, { "epoch": 0.7457047935453252, "grad_norm": 0.29296875, "learning_rate": 3.4976648090418685e-06, "loss": 1.1542, "step": 982 }, { "epoch": 0.7464641670621737, "grad_norm": 0.376953125, "learning_rate": 3.4780584390847193e-06, "loss": 1.2163, "step": 983 }, { "epoch": 0.7472235405790223, "grad_norm": 0.341796875, "learning_rate": 3.4584956011913693e-06, "loss": 1.1658, "step": 984 }, { "epoch": 0.7479829140958709, "grad_norm": 0.3125, "learning_rate": 3.4389764259380754e-06, "loss": 1.1344, "step": 985 }, { "epoch": 0.7487422876127195, "grad_norm": 0.3203125, "learning_rate": 3.4195010436096622e-06, "loss": 1.1608, "step": 986 }, { "epoch": 0.7495016611295681, "grad_norm": 0.38671875, "learning_rate": 3.400069584198633e-06, "loss": 1.2214, "step": 987 }, { "epoch": 0.7502610346464167, "grad_norm": 0.353515625, "learning_rate": 3.380682177404335e-06, "loss": 1.1724, "step": 988 }, { "epoch": 0.7510204081632653, "grad_norm": 0.333984375, "learning_rate": 3.361338952632074e-06, "loss": 1.1665, "step": 989 }, { "epoch": 0.751779781680114, "grad_norm": 0.375, "learning_rate": 3.3420400389922535e-06, "loss": 1.2119, "step": 990 }, { "epoch": 0.7525391551969625, "grad_norm": 0.296875, "learning_rate": 3.32278556529951e-06, "loss": 1.1508, "step": 991 }, { "epoch": 0.7532985287138111, "grad_norm": 0.328125, "learning_rate": 3.3035756600718515e-06, "loss": 1.1584, "step": 992 }, { "epoch": 0.7540579022306597, "grad_norm": 0.322265625, "learning_rate": 3.284410451529816e-06, "loss": 1.1329, "step": 993 }, { "epoch": 0.7548172757475083, "grad_norm": 0.3515625, "learning_rate": 3.2652900675956e-06, "loss": 1.1675, "step": 994 }, { "epoch": 0.7555766492643569, "grad_norm": 0.32421875, "learning_rate": 3.2462146358922033e-06, "loss": 1.1203, "step": 995 }, { "epoch": 0.7563360227812055, "grad_norm": 0.2890625, "learning_rate": 3.2271842837425917e-06, "loss": 1.1085, "step": 996 }, { "epoch": 0.7570953962980541, "grad_norm": 0.29296875, "learning_rate": 3.208199138168826e-06, "loss": 1.1281, "step": 997 }, { "epoch": 0.7578547698149027, "grad_norm": 0.375, "learning_rate": 3.1892593258912407e-06, "loss": 1.1927, "step": 998 }, { "epoch": 0.7586141433317513, "grad_norm": 0.34375, "learning_rate": 3.1703649733275697e-06, "loss": 1.1877, "step": 999 }, { "epoch": 0.7593735168485999, "grad_norm": 0.326171875, "learning_rate": 3.151516206592128e-06, "loss": 1.1486, "step": 1000 }, { "epoch": 0.7601328903654485, "grad_norm": 0.373046875, "learning_rate": 3.132713151494955e-06, "loss": 1.1856, "step": 1001 }, { "epoch": 0.7608922638822971, "grad_norm": 0.30859375, "learning_rate": 3.113955933540973e-06, "loss": 1.1627, "step": 1002 }, { "epoch": 0.7616516373991457, "grad_norm": 0.33203125, "learning_rate": 3.0952446779291577e-06, "loss": 1.1441, "step": 1003 }, { "epoch": 0.7624110109159943, "grad_norm": 0.33203125, "learning_rate": 3.0765795095517026e-06, "loss": 1.1066, "step": 1004 }, { "epoch": 0.7631703844328429, "grad_norm": 0.341796875, "learning_rate": 3.0579605529931832e-06, "loss": 1.1927, "step": 1005 }, { "epoch": 0.7639297579496915, "grad_norm": 0.34375, "learning_rate": 3.0393879325297136e-06, "loss": 1.1468, "step": 1006 }, { "epoch": 0.76468913146654, "grad_norm": 0.333984375, "learning_rate": 3.020861772128145e-06, "loss": 1.1106, "step": 1007 }, { "epoch": 0.7654485049833887, "grad_norm": 0.326171875, "learning_rate": 3.0023821954452036e-06, "loss": 1.1217, "step": 1008 }, { "epoch": 0.7662078785002373, "grad_norm": 0.318359375, "learning_rate": 2.983949325826696e-06, "loss": 1.156, "step": 1009 }, { "epoch": 0.7669672520170859, "grad_norm": 0.33984375, "learning_rate": 2.9655632863066696e-06, "loss": 1.1315, "step": 1010 }, { "epoch": 0.7677266255339346, "grad_norm": 0.328125, "learning_rate": 2.9472241996065897e-06, "loss": 1.1651, "step": 1011 }, { "epoch": 0.7684859990507831, "grad_norm": 0.291015625, "learning_rate": 2.9289321881345257e-06, "loss": 1.1209, "step": 1012 }, { "epoch": 0.7692453725676317, "grad_norm": 0.2890625, "learning_rate": 2.910687373984339e-06, "loss": 1.1137, "step": 1013 }, { "epoch": 0.7700047460844803, "grad_norm": 0.326171875, "learning_rate": 2.8924898789348645e-06, "loss": 1.1695, "step": 1014 }, { "epoch": 0.770764119601329, "grad_norm": 0.33984375, "learning_rate": 2.874339824449085e-06, "loss": 1.1603, "step": 1015 }, { "epoch": 0.7715234931181775, "grad_norm": 0.296875, "learning_rate": 2.856237331673336e-06, "loss": 1.1263, "step": 1016 }, { "epoch": 0.7722828666350261, "grad_norm": 0.30859375, "learning_rate": 2.838182521436498e-06, "loss": 1.1512, "step": 1017 }, { "epoch": 0.7730422401518747, "grad_norm": 0.40234375, "learning_rate": 2.8201755142491814e-06, "loss": 1.2103, "step": 1018 }, { "epoch": 0.7738016136687234, "grad_norm": 0.330078125, "learning_rate": 2.8022164303029186e-06, "loss": 1.1234, "step": 1019 }, { "epoch": 0.7745609871855719, "grad_norm": 0.296875, "learning_rate": 2.7843053894693805e-06, "loss": 1.1291, "step": 1020 }, { "epoch": 0.7753203607024205, "grad_norm": 0.3046875, "learning_rate": 2.76644251129955e-06, "loss": 1.1616, "step": 1021 }, { "epoch": 0.7760797342192691, "grad_norm": 0.31640625, "learning_rate": 2.74862791502295e-06, "loss": 1.1467, "step": 1022 }, { "epoch": 0.7768391077361178, "grad_norm": 0.314453125, "learning_rate": 2.7308617195468336e-06, "loss": 1.1435, "step": 1023 }, { "epoch": 0.7775984812529663, "grad_norm": 0.353515625, "learning_rate": 2.713144043455388e-06, "loss": 1.1323, "step": 1024 }, { "epoch": 0.7783578547698149, "grad_norm": 0.322265625, "learning_rate": 2.695475005008946e-06, "loss": 1.1765, "step": 1025 }, { "epoch": 0.7791172282866635, "grad_norm": 0.30859375, "learning_rate": 2.6778547221432063e-06, "loss": 1.1441, "step": 1026 }, { "epoch": 0.7798766018035121, "grad_norm": 0.31640625, "learning_rate": 2.660283312468438e-06, "loss": 1.1428, "step": 1027 }, { "epoch": 0.7806359753203607, "grad_norm": 0.298828125, "learning_rate": 2.642760893268684e-06, "loss": 1.1243, "step": 1028 }, { "epoch": 0.7813953488372093, "grad_norm": 0.349609375, "learning_rate": 2.625287581501006e-06, "loss": 1.1824, "step": 1029 }, { "epoch": 0.7821547223540579, "grad_norm": 0.359375, "learning_rate": 2.6078634937946724e-06, "loss": 1.1663, "step": 1030 }, { "epoch": 0.7829140958709065, "grad_norm": 0.365234375, "learning_rate": 2.5904887464504115e-06, "loss": 1.1911, "step": 1031 }, { "epoch": 0.7836734693877551, "grad_norm": 0.359375, "learning_rate": 2.573163455439601e-06, "loss": 1.1811, "step": 1032 }, { "epoch": 0.7844328429046037, "grad_norm": 0.37109375, "learning_rate": 2.5558877364035286e-06, "loss": 1.2266, "step": 1033 }, { "epoch": 0.7851922164214523, "grad_norm": 0.333984375, "learning_rate": 2.538661704652595e-06, "loss": 1.1456, "step": 1034 }, { "epoch": 0.7859515899383009, "grad_norm": 0.33203125, "learning_rate": 2.521485475165555e-06, "loss": 1.177, "step": 1035 }, { "epoch": 0.7867109634551495, "grad_norm": 0.341796875, "learning_rate": 2.504359162588741e-06, "loss": 1.18, "step": 1036 }, { "epoch": 0.7874703369719981, "grad_norm": 0.345703125, "learning_rate": 2.4872828812353146e-06, "loss": 1.1414, "step": 1037 }, { "epoch": 0.7882297104888467, "grad_norm": 0.384765625, "learning_rate": 2.470256745084488e-06, "loss": 1.1995, "step": 1038 }, { "epoch": 0.7889890840056953, "grad_norm": 0.349609375, "learning_rate": 2.4532808677807772e-06, "loss": 1.1283, "step": 1039 }, { "epoch": 0.7897484575225439, "grad_norm": 0.345703125, "learning_rate": 2.4363553626332157e-06, "loss": 1.1844, "step": 1040 }, { "epoch": 0.7905078310393925, "grad_norm": 0.369140625, "learning_rate": 2.419480342614635e-06, "loss": 1.1947, "step": 1041 }, { "epoch": 0.7912672045562411, "grad_norm": 0.35546875, "learning_rate": 2.402655920360889e-06, "loss": 1.1751, "step": 1042 }, { "epoch": 0.7920265780730897, "grad_norm": 0.365234375, "learning_rate": 2.385882208170106e-06, "loss": 1.1976, "step": 1043 }, { "epoch": 0.7927859515899383, "grad_norm": 0.36328125, "learning_rate": 2.369159318001937e-06, "loss": 1.1705, "step": 1044 }, { "epoch": 0.7935453251067869, "grad_norm": 0.30078125, "learning_rate": 2.3524873614768085e-06, "loss": 1.1149, "step": 1045 }, { "epoch": 0.7943046986236355, "grad_norm": 0.3203125, "learning_rate": 2.335866449875185e-06, "loss": 1.1556, "step": 1046 }, { "epoch": 0.7950640721404841, "grad_norm": 0.322265625, "learning_rate": 2.3192966941368247e-06, "loss": 1.1266, "step": 1047 }, { "epoch": 0.7958234456573327, "grad_norm": 0.28515625, "learning_rate": 2.3027782048600247e-06, "loss": 1.0954, "step": 1048 }, { "epoch": 0.7965828191741813, "grad_norm": 0.310546875, "learning_rate": 2.2863110923008958e-06, "loss": 1.1715, "step": 1049 }, { "epoch": 0.7973421926910299, "grad_norm": 0.40234375, "learning_rate": 2.26989546637263e-06, "loss": 1.2394, "step": 1050 }, { "epoch": 0.7981015662078785, "grad_norm": 0.37109375, "learning_rate": 2.2535314366447625e-06, "loss": 1.1812, "step": 1051 }, { "epoch": 0.798860939724727, "grad_norm": 0.330078125, "learning_rate": 2.237219112342426e-06, "loss": 1.146, "step": 1052 }, { "epoch": 0.7996203132415757, "grad_norm": 0.3046875, "learning_rate": 2.2209586023456495e-06, "loss": 1.1245, "step": 1053 }, { "epoch": 0.8003796867584243, "grad_norm": 0.3359375, "learning_rate": 2.2047500151886047e-06, "loss": 1.1608, "step": 1054 }, { "epoch": 0.8011390602752729, "grad_norm": 0.341796875, "learning_rate": 2.1885934590589008e-06, "loss": 1.1919, "step": 1055 }, { "epoch": 0.8018984337921214, "grad_norm": 0.314453125, "learning_rate": 2.172489041796856e-06, "loss": 1.1411, "step": 1056 }, { "epoch": 0.8026578073089701, "grad_norm": 0.3203125, "learning_rate": 2.156436870894767e-06, "loss": 1.1685, "step": 1057 }, { "epoch": 0.8034171808258187, "grad_norm": 0.341796875, "learning_rate": 2.140437053496214e-06, "loss": 1.1709, "step": 1058 }, { "epoch": 0.8041765543426673, "grad_norm": 0.353515625, "learning_rate": 2.124489696395321e-06, "loss": 1.1552, "step": 1059 }, { "epoch": 0.804935927859516, "grad_norm": 0.328125, "learning_rate": 2.1085949060360654e-06, "loss": 1.1587, "step": 1060 }, { "epoch": 0.8056953013763645, "grad_norm": 0.30859375, "learning_rate": 2.092752788511546e-06, "loss": 1.1752, "step": 1061 }, { "epoch": 0.8064546748932131, "grad_norm": 0.3125, "learning_rate": 2.0769634495632986e-06, "loss": 1.1594, "step": 1062 }, { "epoch": 0.8072140484100617, "grad_norm": 0.28515625, "learning_rate": 2.061226994580563e-06, "loss": 1.1164, "step": 1063 }, { "epoch": 0.8079734219269104, "grad_norm": 0.294921875, "learning_rate": 2.045543528599607e-06, "loss": 1.0982, "step": 1064 }, { "epoch": 0.8087327954437589, "grad_norm": 0.33984375, "learning_rate": 2.0299131563030016e-06, "loss": 1.1587, "step": 1065 }, { "epoch": 0.8094921689606075, "grad_norm": 0.388671875, "learning_rate": 2.0143359820189403e-06, "loss": 1.1613, "step": 1066 }, { "epoch": 0.8102515424774561, "grad_norm": 0.30078125, "learning_rate": 1.998812109720535e-06, "loss": 1.1486, "step": 1067 }, { "epoch": 0.8110109159943047, "grad_norm": 0.349609375, "learning_rate": 1.983341643025117e-06, "loss": 1.1652, "step": 1068 }, { "epoch": 0.8117702895111533, "grad_norm": 0.31640625, "learning_rate": 1.967924685193552e-06, "loss": 1.1593, "step": 1069 }, { "epoch": 0.8125296630280019, "grad_norm": 0.34375, "learning_rate": 1.952561339129554e-06, "loss": 1.1904, "step": 1070 }, { "epoch": 0.8132890365448505, "grad_norm": 0.32421875, "learning_rate": 1.93725170737899e-06, "loss": 1.151, "step": 1071 }, { "epoch": 0.8140484100616991, "grad_norm": 0.29296875, "learning_rate": 1.921995892129208e-06, "loss": 1.1097, "step": 1072 }, { "epoch": 0.8148077835785477, "grad_norm": 0.375, "learning_rate": 1.906793995208328e-06, "loss": 1.1875, "step": 1073 }, { "epoch": 0.8155671570953963, "grad_norm": 0.400390625, "learning_rate": 1.8916461180845968e-06, "loss": 1.2437, "step": 1074 }, { "epoch": 0.8163265306122449, "grad_norm": 0.375, "learning_rate": 1.8765523618656923e-06, "loss": 1.1949, "step": 1075 }, { "epoch": 0.8170859041290935, "grad_norm": 0.33203125, "learning_rate": 1.861512827298051e-06, "loss": 1.1321, "step": 1076 }, { "epoch": 0.8178452776459421, "grad_norm": 0.328125, "learning_rate": 1.8465276147661905e-06, "loss": 1.1811, "step": 1077 }, { "epoch": 0.8186046511627907, "grad_norm": 0.35546875, "learning_rate": 1.8315968242920446e-06, "loss": 1.2074, "step": 1078 }, { "epoch": 0.8193640246796393, "grad_norm": 0.345703125, "learning_rate": 1.8167205555343027e-06, "loss": 1.1378, "step": 1079 }, { "epoch": 0.8201233981964879, "grad_norm": 0.314453125, "learning_rate": 1.8018989077877368e-06, "loss": 1.1401, "step": 1080 }, { "epoch": 0.8208827717133365, "grad_norm": 0.3203125, "learning_rate": 1.7871319799825316e-06, "loss": 1.1455, "step": 1081 }, { "epoch": 0.8216421452301851, "grad_norm": 0.365234375, "learning_rate": 1.7724198706836372e-06, "loss": 1.1678, "step": 1082 }, { "epoch": 0.8224015187470337, "grad_norm": 0.447265625, "learning_rate": 1.757762678090107e-06, "loss": 1.1541, "step": 1083 }, { "epoch": 0.8231608922638823, "grad_norm": 0.365234375, "learning_rate": 1.743160500034443e-06, "loss": 1.1924, "step": 1084 }, { "epoch": 0.8239202657807309, "grad_norm": 0.30859375, "learning_rate": 1.7286134339819337e-06, "loss": 1.1414, "step": 1085 }, { "epoch": 0.8246796392975795, "grad_norm": 0.322265625, "learning_rate": 1.7141215770300202e-06, "loss": 1.1341, "step": 1086 }, { "epoch": 0.8254390128144281, "grad_norm": 0.359375, "learning_rate": 1.6996850259076303e-06, "loss": 1.1874, "step": 1087 }, { "epoch": 0.8261983863312767, "grad_norm": 0.3359375, "learning_rate": 1.6853038769745466e-06, "loss": 1.1982, "step": 1088 }, { "epoch": 0.8269577598481253, "grad_norm": 0.369140625, "learning_rate": 1.670978226220762e-06, "loss": 1.2065, "step": 1089 }, { "epoch": 0.8277171333649739, "grad_norm": 0.322265625, "learning_rate": 1.6567081692658238e-06, "loss": 1.148, "step": 1090 }, { "epoch": 0.8284765068818225, "grad_norm": 0.3046875, "learning_rate": 1.642493801358218e-06, "loss": 1.1179, "step": 1091 }, { "epoch": 0.8292358803986711, "grad_norm": 0.3359375, "learning_rate": 1.6283352173747148e-06, "loss": 1.1411, "step": 1092 }, { "epoch": 0.8299952539155196, "grad_norm": 0.369140625, "learning_rate": 1.6142325118197488e-06, "loss": 1.1431, "step": 1093 }, { "epoch": 0.8307546274323683, "grad_norm": 0.32421875, "learning_rate": 1.6001857788247755e-06, "loss": 1.1494, "step": 1094 }, { "epoch": 0.8315140009492169, "grad_norm": 0.365234375, "learning_rate": 1.5861951121476571e-06, "loss": 1.1864, "step": 1095 }, { "epoch": 0.8322733744660655, "grad_norm": 0.26171875, "learning_rate": 1.5722606051720268e-06, "loss": 1.1363, "step": 1096 }, { "epoch": 0.833032747982914, "grad_norm": 0.322265625, "learning_rate": 1.5583823509066665e-06, "loss": 1.1366, "step": 1097 }, { "epoch": 0.8337921214997627, "grad_norm": 0.275390625, "learning_rate": 1.5445604419848858e-06, "loss": 1.1422, "step": 1098 }, { "epoch": 0.8345514950166113, "grad_norm": 0.380859375, "learning_rate": 1.5307949706639114e-06, "loss": 1.1861, "step": 1099 }, { "epoch": 0.8353108685334599, "grad_norm": 0.408203125, "learning_rate": 1.5170860288242638e-06, "loss": 1.1732, "step": 1100 }, { "epoch": 0.8360702420503084, "grad_norm": 0.349609375, "learning_rate": 1.503433707969142e-06, "loss": 1.1638, "step": 1101 }, { "epoch": 0.8368296155671571, "grad_norm": 0.310546875, "learning_rate": 1.489838099223816e-06, "loss": 1.1235, "step": 1102 }, { "epoch": 0.8375889890840057, "grad_norm": 0.318359375, "learning_rate": 1.476299293335024e-06, "loss": 1.1356, "step": 1103 }, { "epoch": 0.8383483626008543, "grad_norm": 0.27734375, "learning_rate": 1.4628173806703594e-06, "loss": 1.1142, "step": 1104 }, { "epoch": 0.8391077361177028, "grad_norm": 0.30859375, "learning_rate": 1.4493924512176748e-06, "loss": 1.1373, "step": 1105 }, { "epoch": 0.8398671096345515, "grad_norm": 0.40625, "learning_rate": 1.436024594584461e-06, "loss": 1.2117, "step": 1106 }, { "epoch": 0.8406264831514001, "grad_norm": 0.248046875, "learning_rate": 1.4227138999972801e-06, "loss": 1.077, "step": 1107 }, { "epoch": 0.8413858566682487, "grad_norm": 0.353515625, "learning_rate": 1.409460456301147e-06, "loss": 1.1294, "step": 1108 }, { "epoch": 0.8421452301850973, "grad_norm": 0.31640625, "learning_rate": 1.3962643519589502e-06, "loss": 1.1354, "step": 1109 }, { "epoch": 0.8429046037019459, "grad_norm": 0.412109375, "learning_rate": 1.3831256750508449e-06, "loss": 1.1973, "step": 1110 }, { "epoch": 0.8436639772187945, "grad_norm": 0.30078125, "learning_rate": 1.3700445132736795e-06, "loss": 1.1396, "step": 1111 }, { "epoch": 0.8444233507356431, "grad_norm": 0.302734375, "learning_rate": 1.3570209539404067e-06, "loss": 1.1354, "step": 1112 }, { "epoch": 0.8451827242524917, "grad_norm": 0.322265625, "learning_rate": 1.3440550839795008e-06, "loss": 1.1847, "step": 1113 }, { "epoch": 0.8459420977693403, "grad_norm": 0.306640625, "learning_rate": 1.3311469899343698e-06, "loss": 1.1425, "step": 1114 }, { "epoch": 0.8467014712861889, "grad_norm": 0.298828125, "learning_rate": 1.3182967579627948e-06, "loss": 1.1266, "step": 1115 }, { "epoch": 0.8474608448030375, "grad_norm": 0.318359375, "learning_rate": 1.305504473836331e-06, "loss": 1.1409, "step": 1116 }, { "epoch": 0.8482202183198861, "grad_norm": 0.341796875, "learning_rate": 1.2927702229397633e-06, "loss": 1.1686, "step": 1117 }, { "epoch": 0.8489795918367347, "grad_norm": 0.36328125, "learning_rate": 1.2800940902705072e-06, "loss": 1.1655, "step": 1118 }, { "epoch": 0.8497389653535833, "grad_norm": 0.322265625, "learning_rate": 1.2674761604380692e-06, "loss": 1.1476, "step": 1119 }, { "epoch": 0.8504983388704319, "grad_norm": 0.388671875, "learning_rate": 1.2549165176634582e-06, "loss": 1.2241, "step": 1120 }, { "epoch": 0.8512577123872805, "grad_norm": 0.3203125, "learning_rate": 1.2424152457786408e-06, "loss": 1.1283, "step": 1121 }, { "epoch": 0.8520170859041291, "grad_norm": 0.330078125, "learning_rate": 1.2299724282259685e-06, "loss": 1.1519, "step": 1122 }, { "epoch": 0.8527764594209777, "grad_norm": 0.31640625, "learning_rate": 1.2175881480576347e-06, "loss": 1.1268, "step": 1123 }, { "epoch": 0.8535358329378263, "grad_norm": 0.30859375, "learning_rate": 1.2052624879351105e-06, "loss": 1.0941, "step": 1124 }, { "epoch": 0.8542952064546749, "grad_norm": 0.3359375, "learning_rate": 1.1929955301285889e-06, "loss": 1.1533, "step": 1125 }, { "epoch": 0.8550545799715235, "grad_norm": 0.365234375, "learning_rate": 1.1807873565164507e-06, "loss": 1.1927, "step": 1126 }, { "epoch": 0.8558139534883721, "grad_norm": 0.361328125, "learning_rate": 1.1686380485847027e-06, "loss": 1.1902, "step": 1127 }, { "epoch": 0.8565733270052207, "grad_norm": 0.287109375, "learning_rate": 1.1565476874264448e-06, "loss": 1.1152, "step": 1128 }, { "epoch": 0.8573327005220693, "grad_norm": 0.330078125, "learning_rate": 1.144516353741324e-06, "loss": 1.1328, "step": 1129 }, { "epoch": 0.8580920740389179, "grad_norm": 0.333984375, "learning_rate": 1.1325441278349935e-06, "loss": 1.1626, "step": 1130 }, { "epoch": 0.8588514475557665, "grad_norm": 0.3671875, "learning_rate": 1.120631089618579e-06, "loss": 1.1927, "step": 1131 }, { "epoch": 0.8596108210726151, "grad_norm": 0.365234375, "learning_rate": 1.1087773186081474e-06, "loss": 1.2139, "step": 1132 }, { "epoch": 0.8603701945894637, "grad_norm": 0.33984375, "learning_rate": 1.0969828939241779e-06, "loss": 1.1491, "step": 1133 }, { "epoch": 0.8611295681063122, "grad_norm": 0.341796875, "learning_rate": 1.0852478942910228e-06, "loss": 1.156, "step": 1134 }, { "epoch": 0.8618889416231609, "grad_norm": 0.33984375, "learning_rate": 1.0735723980363921e-06, "loss": 1.1736, "step": 1135 }, { "epoch": 0.8626483151400095, "grad_norm": 0.365234375, "learning_rate": 1.0619564830908303e-06, "loss": 1.1818, "step": 1136 }, { "epoch": 0.8634076886568581, "grad_norm": 0.3515625, "learning_rate": 1.0504002269871927e-06, "loss": 1.1886, "step": 1137 }, { "epoch": 0.8641670621737066, "grad_norm": 0.357421875, "learning_rate": 1.0389037068601325e-06, "loss": 1.2172, "step": 1138 }, { "epoch": 0.8649264356905553, "grad_norm": 0.302734375, "learning_rate": 1.027466999445572e-06, "loss": 1.1286, "step": 1139 }, { "epoch": 0.8656858092074039, "grad_norm": 0.32421875, "learning_rate": 1.0160901810802114e-06, "loss": 1.1688, "step": 1140 }, { "epoch": 0.8664451827242525, "grad_norm": 0.36328125, "learning_rate": 1.0047733277010064e-06, "loss": 1.2127, "step": 1141 }, { "epoch": 0.867204556241101, "grad_norm": 0.35546875, "learning_rate": 9.935165148446658e-07, "loss": 1.1628, "step": 1142 }, { "epoch": 0.8679639297579497, "grad_norm": 0.30859375, "learning_rate": 9.823198176471381e-07, "loss": 1.1454, "step": 1143 }, { "epoch": 0.8687233032747983, "grad_norm": 0.306640625, "learning_rate": 9.711833108431234e-07, "loss": 1.1546, "step": 1144 }, { "epoch": 0.8694826767916469, "grad_norm": 0.34765625, "learning_rate": 9.601070687655667e-07, "loss": 1.1958, "step": 1145 }, { "epoch": 0.8702420503084954, "grad_norm": 0.30859375, "learning_rate": 9.490911653451651e-07, "loss": 1.1511, "step": 1146 }, { "epoch": 0.8710014238253441, "grad_norm": 0.3125, "learning_rate": 9.381356741098702e-07, "loss": 1.148, "step": 1147 }, { "epoch": 0.8717607973421927, "grad_norm": 0.328125, "learning_rate": 9.272406681844015e-07, "loss": 1.1383, "step": 1148 }, { "epoch": 0.8725201708590413, "grad_norm": 0.345703125, "learning_rate": 9.164062202897539e-07, "loss": 1.137, "step": 1149 }, { "epoch": 0.8732795443758898, "grad_norm": 0.33984375, "learning_rate": 9.05632402742721e-07, "loss": 1.1381, "step": 1150 }, { "epoch": 0.8740389178927385, "grad_norm": 0.365234375, "learning_rate": 8.949192874553991e-07, "loss": 1.1854, "step": 1151 }, { "epoch": 0.8747982914095871, "grad_norm": 0.42578125, "learning_rate": 8.842669459347186e-07, "loss": 1.199, "step": 1152 }, { "epoch": 0.8755576649264357, "grad_norm": 0.35546875, "learning_rate": 8.736754492819655e-07, "loss": 1.1787, "step": 1153 }, { "epoch": 0.8763170384432842, "grad_norm": 0.36328125, "learning_rate": 8.631448681922994e-07, "loss": 1.1742, "step": 1154 }, { "epoch": 0.8770764119601329, "grad_norm": 0.3359375, "learning_rate": 8.526752729542831e-07, "loss": 1.1326, "step": 1155 }, { "epoch": 0.8778357854769815, "grad_norm": 0.365234375, "learning_rate": 8.42266733449425e-07, "loss": 1.1984, "step": 1156 }, { "epoch": 0.8785951589938301, "grad_norm": 0.296875, "learning_rate": 8.319193191517016e-07, "loss": 1.1403, "step": 1157 }, { "epoch": 0.8793545325106787, "grad_norm": 0.333984375, "learning_rate": 8.216330991270916e-07, "loss": 1.1532, "step": 1158 }, { "epoch": 0.8801139060275273, "grad_norm": 0.283203125, "learning_rate": 8.114081420331266e-07, "loss": 1.1398, "step": 1159 }, { "epoch": 0.8808732795443759, "grad_norm": 0.283203125, "learning_rate": 8.012445161184179e-07, "loss": 1.1201, "step": 1160 }, { "epoch": 0.8816326530612245, "grad_norm": 0.306640625, "learning_rate": 7.911422892222165e-07, "loss": 1.1367, "step": 1161 }, { "epoch": 0.8823920265780731, "grad_norm": 0.36328125, "learning_rate": 7.81101528773951e-07, "loss": 1.1888, "step": 1162 }, { "epoch": 0.8831514000949217, "grad_norm": 0.373046875, "learning_rate": 7.711223017927783e-07, "loss": 1.1283, "step": 1163 }, { "epoch": 0.8839107736117703, "grad_norm": 0.298828125, "learning_rate": 7.612046748871327e-07, "loss": 1.114, "step": 1164 }, { "epoch": 0.8846701471286189, "grad_norm": 0.330078125, "learning_rate": 7.513487142542941e-07, "loss": 1.1995, "step": 1165 }, { "epoch": 0.8854295206454675, "grad_norm": 0.302734375, "learning_rate": 7.415544856799362e-07, "loss": 1.1137, "step": 1166 }, { "epoch": 0.886188894162316, "grad_norm": 0.33984375, "learning_rate": 7.318220545376842e-07, "loss": 1.1919, "step": 1167 }, { "epoch": 0.8869482676791647, "grad_norm": 0.298828125, "learning_rate": 7.221514857886857e-07, "loss": 1.1217, "step": 1168 }, { "epoch": 0.8877076411960133, "grad_norm": 0.3046875, "learning_rate": 7.125428439811765e-07, "loss": 1.1266, "step": 1169 }, { "epoch": 0.8884670147128619, "grad_norm": 0.318359375, "learning_rate": 7.029961932500506e-07, "loss": 1.159, "step": 1170 }, { "epoch": 0.8892263882297105, "grad_norm": 0.33984375, "learning_rate": 6.935115973164208e-07, "loss": 1.1782, "step": 1171 }, { "epoch": 0.8899857617465591, "grad_norm": 0.3046875, "learning_rate": 6.840891194872112e-07, "loss": 1.109, "step": 1172 }, { "epoch": 0.8907451352634077, "grad_norm": 0.341796875, "learning_rate": 6.7472882265472e-07, "loss": 1.2068, "step": 1173 }, { "epoch": 0.8915045087802563, "grad_norm": 0.296875, "learning_rate": 6.65430769296207e-07, "loss": 1.1619, "step": 1174 }, { "epoch": 0.8922638822971048, "grad_norm": 0.306640625, "learning_rate": 6.56195021473478e-07, "loss": 1.1534, "step": 1175 }, { "epoch": 0.8930232558139535, "grad_norm": 0.384765625, "learning_rate": 6.470216408324626e-07, "loss": 1.1999, "step": 1176 }, { "epoch": 0.8937826293308021, "grad_norm": 0.3046875, "learning_rate": 6.379106886028086e-07, "loss": 1.1417, "step": 1177 }, { "epoch": 0.8945420028476507, "grad_norm": 0.328125, "learning_rate": 6.288622255974741e-07, "loss": 1.1552, "step": 1178 }, { "epoch": 0.8953013763644992, "grad_norm": 0.341796875, "learning_rate": 6.198763122123208e-07, "loss": 1.1639, "step": 1179 }, { "epoch": 0.8960607498813479, "grad_norm": 0.2890625, "learning_rate": 6.109530084257043e-07, "loss": 1.1234, "step": 1180 }, { "epoch": 0.8968201233981965, "grad_norm": 0.353515625, "learning_rate": 6.020923737980877e-07, "loss": 1.1633, "step": 1181 }, { "epoch": 0.8975794969150451, "grad_norm": 0.318359375, "learning_rate": 5.932944674716279e-07, "loss": 1.1606, "step": 1182 }, { "epoch": 0.8983388704318936, "grad_norm": 0.322265625, "learning_rate": 5.845593481697931e-07, "loss": 1.1113, "step": 1183 }, { "epoch": 0.8990982439487423, "grad_norm": 0.341796875, "learning_rate": 5.758870741969635e-07, "loss": 1.1429, "step": 1184 }, { "epoch": 0.8998576174655909, "grad_norm": 0.306640625, "learning_rate": 5.672777034380483e-07, "loss": 1.1521, "step": 1185 }, { "epoch": 0.9006169909824395, "grad_norm": 0.30078125, "learning_rate": 5.587312933580946e-07, "loss": 1.1341, "step": 1186 }, { "epoch": 0.901376364499288, "grad_norm": 0.318359375, "learning_rate": 5.502479010019046e-07, "loss": 1.143, "step": 1187 }, { "epoch": 0.9021357380161367, "grad_norm": 0.337890625, "learning_rate": 5.418275829936537e-07, "loss": 1.1586, "step": 1188 }, { "epoch": 0.9028951115329853, "grad_norm": 0.33203125, "learning_rate": 5.334703955365183e-07, "loss": 1.1349, "step": 1189 }, { "epoch": 0.9036544850498339, "grad_norm": 0.3671875, "learning_rate": 5.251763944122956e-07, "loss": 1.2187, "step": 1190 }, { "epoch": 0.9044138585666824, "grad_norm": 0.349609375, "learning_rate": 5.169456349810342e-07, "loss": 1.2073, "step": 1191 }, { "epoch": 0.9051732320835311, "grad_norm": 0.369140625, "learning_rate": 5.087781721806539e-07, "loss": 1.162, "step": 1192 }, { "epoch": 0.9059326056003797, "grad_norm": 0.36328125, "learning_rate": 5.00674060526598e-07, "loss": 1.1938, "step": 1193 }, { "epoch": 0.9066919791172283, "grad_norm": 0.345703125, "learning_rate": 4.926333541114558e-07, "loss": 1.1564, "step": 1194 }, { "epoch": 0.9074513526340768, "grad_norm": 0.412109375, "learning_rate": 4.846561066046063e-07, "loss": 1.2107, "step": 1195 }, { "epoch": 0.9082107261509255, "grad_norm": 0.380859375, "learning_rate": 4.7674237125185597e-07, "loss": 1.2019, "step": 1196 }, { "epoch": 0.9089700996677741, "grad_norm": 0.37109375, "learning_rate": 4.6889220087508514e-07, "loss": 1.1731, "step": 1197 }, { "epoch": 0.9097294731846227, "grad_norm": 0.341796875, "learning_rate": 4.611056478719023e-07, "loss": 1.1591, "step": 1198 }, { "epoch": 0.9104888467014712, "grad_norm": 0.349609375, "learning_rate": 4.5338276421528435e-07, "loss": 1.1698, "step": 1199 }, { "epoch": 0.9112482202183199, "grad_norm": 0.3359375, "learning_rate": 4.45723601453234e-07, "loss": 1.179, "step": 1200 }, { "epoch": 0.9120075937351685, "grad_norm": 0.35546875, "learning_rate": 4.3812821070843394e-07, "loss": 1.1383, "step": 1201 }, { "epoch": 0.9127669672520171, "grad_norm": 0.326171875, "learning_rate": 4.305966426779118e-07, "loss": 1.118, "step": 1202 }, { "epoch": 0.9135263407688657, "grad_norm": 0.267578125, "learning_rate": 4.2312894763269385e-07, "loss": 1.1147, "step": 1203 }, { "epoch": 0.9142857142857143, "grad_norm": 0.39453125, "learning_rate": 4.1572517541747294e-07, "loss": 1.2228, "step": 1204 }, { "epoch": 0.9150450878025629, "grad_norm": 0.326171875, "learning_rate": 4.0838537545027755e-07, "loss": 1.144, "step": 1205 }, { "epoch": 0.9158044613194115, "grad_norm": 0.32421875, "learning_rate": 4.0110959672213676e-07, "loss": 1.1403, "step": 1206 }, { "epoch": 0.9165638348362601, "grad_norm": 0.341796875, "learning_rate": 3.9389788779675806e-07, "loss": 1.1552, "step": 1207 }, { "epoch": 0.9173232083531087, "grad_norm": 0.349609375, "learning_rate": 3.867502968102055e-07, "loss": 1.1785, "step": 1208 }, { "epoch": 0.9180825818699573, "grad_norm": 0.357421875, "learning_rate": 3.7966687147056533e-07, "loss": 1.1487, "step": 1209 }, { "epoch": 0.9188419553868059, "grad_norm": 0.3125, "learning_rate": 3.7264765905764776e-07, "loss": 1.1304, "step": 1210 }, { "epoch": 0.9196013289036545, "grad_norm": 0.28515625, "learning_rate": 3.656927064226512e-07, "loss": 1.1109, "step": 1211 }, { "epoch": 0.920360702420503, "grad_norm": 0.298828125, "learning_rate": 3.588020599878639e-07, "loss": 1.148, "step": 1212 }, { "epoch": 0.9211200759373517, "grad_norm": 0.33984375, "learning_rate": 3.519757657463474e-07, "loss": 1.1745, "step": 1213 }, { "epoch": 0.9218794494542003, "grad_norm": 0.34765625, "learning_rate": 3.4521386926163134e-07, "loss": 1.1452, "step": 1214 }, { "epoch": 0.9226388229710489, "grad_norm": 0.330078125, "learning_rate": 3.3851641566740813e-07, "loss": 1.1598, "step": 1215 }, { "epoch": 0.9233981964878974, "grad_norm": 0.365234375, "learning_rate": 3.3188344966723516e-07, "loss": 1.1889, "step": 1216 }, { "epoch": 0.9241575700047461, "grad_norm": 0.345703125, "learning_rate": 3.2531501553422884e-07, "loss": 1.1822, "step": 1217 }, { "epoch": 0.9249169435215947, "grad_norm": 0.318359375, "learning_rate": 3.1881115711077994e-07, "loss": 1.1675, "step": 1218 }, { "epoch": 0.9256763170384433, "grad_norm": 0.294921875, "learning_rate": 3.123719178082529e-07, "loss": 1.1539, "step": 1219 }, { "epoch": 0.9264356905552918, "grad_norm": 0.3671875, "learning_rate": 3.059973406066963e-07, "loss": 1.1554, "step": 1220 }, { "epoch": 0.9271950640721405, "grad_norm": 0.361328125, "learning_rate": 2.996874680545603e-07, "loss": 1.1506, "step": 1221 }, { "epoch": 0.9279544375889891, "grad_norm": 0.34765625, "learning_rate": 2.9344234226840964e-07, "loss": 1.167, "step": 1222 }, { "epoch": 0.9287138111058377, "grad_norm": 0.302734375, "learning_rate": 2.872620049326436e-07, "loss": 1.1533, "step": 1223 }, { "epoch": 0.9294731846226862, "grad_norm": 0.345703125, "learning_rate": 2.811464972992195e-07, "loss": 1.1686, "step": 1224 }, { "epoch": 0.9302325581395349, "grad_norm": 0.361328125, "learning_rate": 2.7509586018736764e-07, "loss": 1.1638, "step": 1225 }, { "epoch": 0.9309919316563835, "grad_norm": 0.361328125, "learning_rate": 2.6911013398333464e-07, "loss": 1.1969, "step": 1226 }, { "epoch": 0.9317513051732321, "grad_norm": 0.333984375, "learning_rate": 2.6318935864010133e-07, "loss": 1.1527, "step": 1227 }, { "epoch": 0.9325106786900806, "grad_norm": 0.357421875, "learning_rate": 2.573335736771254e-07, "loss": 1.1725, "step": 1228 }, { "epoch": 0.9332700522069293, "grad_norm": 0.259765625, "learning_rate": 2.51542818180065e-07, "loss": 1.0826, "step": 1229 }, { "epoch": 0.9340294257237779, "grad_norm": 0.314453125, "learning_rate": 2.458171308005308e-07, "loss": 1.1372, "step": 1230 }, { "epoch": 0.9347887992406265, "grad_norm": 0.29296875, "learning_rate": 2.4015654975582225e-07, "loss": 1.1359, "step": 1231 }, { "epoch": 0.935548172757475, "grad_norm": 0.294921875, "learning_rate": 2.3456111282867178e-07, "loss": 1.1214, "step": 1232 }, { "epoch": 0.9363075462743237, "grad_norm": 0.28125, "learning_rate": 2.2903085736699414e-07, "loss": 1.0865, "step": 1233 }, { "epoch": 0.9370669197911723, "grad_norm": 0.3828125, "learning_rate": 2.2356582028363548e-07, "loss": 1.1849, "step": 1234 }, { "epoch": 0.9378262933080209, "grad_norm": 0.28125, "learning_rate": 2.1816603805613012e-07, "loss": 1.137, "step": 1235 }, { "epoch": 0.9385856668248694, "grad_norm": 0.30859375, "learning_rate": 2.1283154672645522e-07, "loss": 1.1179, "step": 1236 }, { "epoch": 0.9393450403417181, "grad_norm": 0.333984375, "learning_rate": 2.0756238190078991e-07, "loss": 1.1576, "step": 1237 }, { "epoch": 0.9401044138585667, "grad_norm": 0.3359375, "learning_rate": 2.0235857874927655e-07, "loss": 1.1685, "step": 1238 }, { "epoch": 0.9408637873754153, "grad_norm": 0.359375, "learning_rate": 1.9722017200578757e-07, "loss": 1.167, "step": 1239 }, { "epoch": 0.9416231608922638, "grad_norm": 0.302734375, "learning_rate": 1.921471959676957e-07, "loss": 1.0967, "step": 1240 }, { "epoch": 0.9423825344091125, "grad_norm": 0.35546875, "learning_rate": 1.8713968449564079e-07, "loss": 1.185, "step": 1241 }, { "epoch": 0.9431419079259611, "grad_norm": 0.265625, "learning_rate": 1.8219767101330442e-07, "loss": 1.1248, "step": 1242 }, { "epoch": 0.9439012814428097, "grad_norm": 0.353515625, "learning_rate": 1.7732118850719237e-07, "loss": 1.1056, "step": 1243 }, { "epoch": 0.9446606549596582, "grad_norm": 0.322265625, "learning_rate": 1.7251026952640583e-07, "loss": 1.1053, "step": 1244 }, { "epoch": 0.9454200284765069, "grad_norm": 0.328125, "learning_rate": 1.6776494618243156e-07, "loss": 1.1511, "step": 1245 }, { "epoch": 0.9461794019933555, "grad_norm": 0.341796875, "learning_rate": 1.6308525014892217e-07, "loss": 1.1568, "step": 1246 }, { "epoch": 0.9469387755102041, "grad_norm": 0.357421875, "learning_rate": 1.5847121266148847e-07, "loss": 1.1354, "step": 1247 }, { "epoch": 0.9476981490270526, "grad_norm": 0.345703125, "learning_rate": 1.539228645174895e-07, "loss": 1.2015, "step": 1248 }, { "epoch": 0.9484575225439013, "grad_norm": 0.29296875, "learning_rate": 1.4944023607582737e-07, "loss": 1.1045, "step": 1249 }, { "epoch": 0.9492168960607499, "grad_norm": 0.318359375, "learning_rate": 1.4502335725674165e-07, "loss": 1.1576, "step": 1250 }, { "epoch": 0.9499762695775985, "grad_norm": 0.32421875, "learning_rate": 1.406722575416164e-07, "loss": 1.1525, "step": 1251 }, { "epoch": 0.9507356430944471, "grad_norm": 0.384765625, "learning_rate": 1.3638696597277678e-07, "loss": 1.1828, "step": 1252 }, { "epoch": 0.9514950166112957, "grad_norm": 0.322265625, "learning_rate": 1.3216751115329718e-07, "loss": 1.1428, "step": 1253 }, { "epoch": 0.9522543901281443, "grad_norm": 0.294921875, "learning_rate": 1.2801392124681233e-07, "loss": 1.1528, "step": 1254 }, { "epoch": 0.9530137636449929, "grad_norm": 0.318359375, "learning_rate": 1.2392622397732756e-07, "loss": 1.1491, "step": 1255 }, { "epoch": 0.9537731371618415, "grad_norm": 0.326171875, "learning_rate": 1.1990444662903445e-07, "loss": 1.2012, "step": 1256 }, { "epoch": 0.95453251067869, "grad_norm": 0.275390625, "learning_rate": 1.159486160461265e-07, "loss": 1.1128, "step": 1257 }, { "epoch": 0.9552918841955387, "grad_norm": 0.33203125, "learning_rate": 1.1205875863262272e-07, "loss": 1.1725, "step": 1258 }, { "epoch": 0.9560512577123873, "grad_norm": 0.359375, "learning_rate": 1.0823490035218986e-07, "loss": 1.1942, "step": 1259 }, { "epoch": 0.9568106312292359, "grad_norm": 0.33984375, "learning_rate": 1.0447706672797264e-07, "loss": 1.1906, "step": 1260 }, { "epoch": 0.9575700047460844, "grad_norm": 0.3671875, "learning_rate": 1.0078528284241606e-07, "loss": 1.1831, "step": 1261 }, { "epoch": 0.9583293782629331, "grad_norm": 0.388671875, "learning_rate": 9.715957333710447e-08, "loss": 1.1504, "step": 1262 }, { "epoch": 0.9590887517797817, "grad_norm": 0.322265625, "learning_rate": 9.359996241259384e-08, "loss": 1.1406, "step": 1263 }, { "epoch": 0.9598481252966303, "grad_norm": 0.330078125, "learning_rate": 9.010647382825421e-08, "loss": 1.1464, "step": 1264 }, { "epoch": 0.9606074988134788, "grad_norm": 0.341796875, "learning_rate": 8.667913090210534e-08, "loss": 1.1418, "step": 1265 }, { "epoch": 0.9613668723303275, "grad_norm": 0.40234375, "learning_rate": 8.331795651066455e-08, "loss": 1.1785, "step": 1266 }, { "epoch": 0.9621262458471761, "grad_norm": 0.333984375, "learning_rate": 8.002297308879359e-08, "loss": 1.1703, "step": 1267 }, { "epoch": 0.9628856193640247, "grad_norm": 0.349609375, "learning_rate": 7.679420262954984e-08, "loss": 1.1569, "step": 1268 }, { "epoch": 0.9636449928808732, "grad_norm": 0.3125, "learning_rate": 7.363166668403643e-08, "loss": 1.1488, "step": 1269 }, { "epoch": 0.9644043663977219, "grad_norm": 0.37890625, "learning_rate": 7.053538636126123e-08, "loss": 1.1948, "step": 1270 }, { "epoch": 0.9651637399145705, "grad_norm": 0.341796875, "learning_rate": 6.750538232799586e-08, "loss": 1.1496, "step": 1271 }, { "epoch": 0.9659231134314191, "grad_norm": 0.330078125, "learning_rate": 6.454167480863694e-08, "loss": 1.1463, "step": 1272 }, { "epoch": 0.9666824869482676, "grad_norm": 0.302734375, "learning_rate": 6.164428358506947e-08, "loss": 1.1507, "step": 1273 }, { "epoch": 0.9674418604651163, "grad_norm": 0.369140625, "learning_rate": 5.881322799653699e-08, "loss": 1.1549, "step": 1274 }, { "epoch": 0.9682012339819649, "grad_norm": 0.3203125, "learning_rate": 5.6048526939512794e-08, "loss": 1.1406, "step": 1275 }, { "epoch": 0.9689606074988135, "grad_norm": 0.30859375, "learning_rate": 5.3350198867574424e-08, "loss": 1.1267, "step": 1276 }, { "epoch": 0.969719981015662, "grad_norm": 0.31640625, "learning_rate": 5.0718261791274924e-08, "loss": 1.147, "step": 1277 }, { "epoch": 0.9704793545325107, "grad_norm": 0.30078125, "learning_rate": 4.815273327803183e-08, "loss": 1.1504, "step": 1278 }, { "epoch": 0.9712387280493593, "grad_norm": 0.29296875, "learning_rate": 4.5653630451998335e-08, "loss": 1.1471, "step": 1279 }, { "epoch": 0.9719981015662079, "grad_norm": 0.3203125, "learning_rate": 4.32209699939623e-08, "loss": 1.1204, "step": 1280 }, { "epoch": 0.9727574750830564, "grad_norm": 0.353515625, "learning_rate": 4.085476814122413e-08, "loss": 1.1692, "step": 1281 }, { "epoch": 0.9735168485999051, "grad_norm": 0.310546875, "learning_rate": 3.8555040687493494e-08, "loss": 1.1089, "step": 1282 }, { "epoch": 0.9742762221167537, "grad_norm": 0.279296875, "learning_rate": 3.632180298278165e-08, "loss": 1.0833, "step": 1283 }, { "epoch": 0.9750355956336023, "grad_norm": 0.322265625, "learning_rate": 3.4155069933301535e-08, "loss": 1.1362, "step": 1284 }, { "epoch": 0.9757949691504508, "grad_norm": 0.365234375, "learning_rate": 3.2054856001366706e-08, "loss": 1.2, "step": 1285 }, { "epoch": 0.9765543426672995, "grad_norm": 0.439453125, "learning_rate": 3.0021175205294794e-08, "loss": 1.2642, "step": 1286 }, { "epoch": 0.9773137161841481, "grad_norm": 0.365234375, "learning_rate": 2.805404111931198e-08, "loss": 1.1712, "step": 1287 }, { "epoch": 0.9780730897009967, "grad_norm": 0.373046875, "learning_rate": 2.6153466873468646e-08, "loss": 1.1773, "step": 1288 }, { "epoch": 0.9788324632178452, "grad_norm": 0.314453125, "learning_rate": 2.4319465153543886e-08, "loss": 1.1556, "step": 1289 }, { "epoch": 0.9795918367346939, "grad_norm": 0.326171875, "learning_rate": 2.255204820096668e-08, "loss": 1.1467, "step": 1290 }, { "epoch": 0.9803512102515425, "grad_norm": 0.34375, "learning_rate": 2.0851227812731523e-08, "loss": 1.1793, "step": 1291 }, { "epoch": 0.9811105837683911, "grad_norm": 0.326171875, "learning_rate": 1.9217015341318478e-08, "loss": 1.1366, "step": 1292 }, { "epoch": 0.9818699572852396, "grad_norm": 0.33984375, "learning_rate": 1.764942169462325e-08, "loss": 1.1893, "step": 1293 }, { "epoch": 0.9826293308020883, "grad_norm": 0.291015625, "learning_rate": 1.6148457335876112e-08, "loss": 1.1308, "step": 1294 }, { "epoch": 0.9833887043189369, "grad_norm": 0.4453125, "learning_rate": 1.4714132283577543e-08, "loss": 1.2597, "step": 1295 }, { "epoch": 0.9841480778357855, "grad_norm": 0.294921875, "learning_rate": 1.3346456111430484e-08, "loss": 1.1048, "step": 1296 }, { "epoch": 0.984907451352634, "grad_norm": 0.30078125, "learning_rate": 1.2045437948275952e-08, "loss": 1.1165, "step": 1297 }, { "epoch": 0.9856668248694826, "grad_norm": 0.365234375, "learning_rate": 1.0811086478031973e-08, "loss": 1.1419, "step": 1298 }, { "epoch": 0.9864261983863313, "grad_norm": 0.330078125, "learning_rate": 9.643409939636972e-09, "loss": 1.1656, "step": 1299 }, { "epoch": 0.9871855719031799, "grad_norm": 0.318359375, "learning_rate": 8.542416126989805e-09, "loss": 1.1344, "step": 1300 }, { "epoch": 0.9879449454200285, "grad_norm": 0.34375, "learning_rate": 7.508112388905363e-09, "loss": 1.1509, "step": 1301 }, { "epoch": 0.988704318936877, "grad_norm": 0.365234375, "learning_rate": 6.540505629061278e-09, "loss": 1.1836, "step": 1302 }, { "epoch": 0.9894636924537257, "grad_norm": 0.3359375, "learning_rate": 5.639602305950176e-09, "loss": 1.1659, "step": 1303 }, { "epoch": 0.9902230659705743, "grad_norm": 0.32421875, "learning_rate": 4.80540843283972e-09, "loss": 1.1539, "step": 1304 }, { "epoch": 0.9909824394874229, "grad_norm": 0.298828125, "learning_rate": 4.037929577732636e-09, "loss": 1.1051, "step": 1305 }, { "epoch": 0.9917418130042714, "grad_norm": 0.3203125, "learning_rate": 3.3371708633267443e-09, "loss": 1.153, "step": 1306 }, { "epoch": 0.9925011865211201, "grad_norm": 0.3828125, "learning_rate": 2.7031369669816566e-09, "loss": 1.1997, "step": 1307 }, { "epoch": 0.9932605600379687, "grad_norm": 0.283203125, "learning_rate": 2.1358321206899067e-09, "loss": 1.1305, "step": 1308 }, { "epoch": 0.9940199335548173, "grad_norm": 0.265625, "learning_rate": 1.6352601110469768e-09, "loss": 1.0931, "step": 1309 }, { "epoch": 0.9947793070716658, "grad_norm": 0.333984375, "learning_rate": 1.20142427922465e-09, "loss": 1.1754, "step": 1310 }, { "epoch": 0.9955386805885145, "grad_norm": 0.39453125, "learning_rate": 8.343275209521384e-10, "loss": 1.2122, "step": 1311 }, { "epoch": 0.9962980541053631, "grad_norm": 0.345703125, "learning_rate": 5.339722864927677e-10, "loss": 1.1428, "step": 1312 }, { "epoch": 0.9970574276222117, "grad_norm": 0.326171875, "learning_rate": 3.003605806306542e-10, "loss": 1.1282, "step": 1313 }, { "epoch": 0.9978168011390602, "grad_norm": 0.322265625, "learning_rate": 1.3349396265516235e-10, "loss": 1.1608, "step": 1314 }, { "epoch": 0.9985761746559089, "grad_norm": 0.3203125, "learning_rate": 3.3373546353132614e-11, "loss": 1.1562, "step": 1315 }, { "epoch": 0.9993355481727575, "grad_norm": 0.28515625, "learning_rate": 0.0, "loss": 1.1401, "step": 1316 }, { "epoch": 0.9993355481727575, "eval_loss": 1.151589274406433, "eval_runtime": 640.5297, "eval_samples_per_second": 92.364, "eval_steps_per_second": 7.698, "step": 1316 } ], "logging_steps": 1, "max_steps": 1316, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.953779885289767e+18, "train_batch_size": 6, "trial_name": null, "trial_params": null }