{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5000154211516517, "eval_steps": 500, "global_step": 4053, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00012336921321284274, "grad_norm": 0.5708394646644592, "learning_rate": 1e-05, "loss": 0.5519, "step": 1 }, { "epoch": 0.0002467384264256855, "grad_norm": 0.652758002281189, "learning_rate": 1e-05, "loss": 0.6052, "step": 2 }, { "epoch": 0.00037010763963852823, "grad_norm": 0.5942118167877197, "learning_rate": 1e-05, "loss": 0.6571, "step": 3 }, { "epoch": 0.000493476852851371, "grad_norm": 0.6410077810287476, "learning_rate": 1e-05, "loss": 0.5584, "step": 4 }, { "epoch": 0.0006168460660642137, "grad_norm": 0.5987005233764648, "learning_rate": 1e-05, "loss": 0.5711, "step": 5 }, { "epoch": 0.0007402152792770565, "grad_norm": 0.6235069632530212, "learning_rate": 1e-05, "loss": 0.6831, "step": 6 }, { "epoch": 0.0008635844924898992, "grad_norm": 0.6291019320487976, "learning_rate": 1e-05, "loss": 0.6423, "step": 7 }, { "epoch": 0.000986953705702742, "grad_norm": 0.6797958016395569, "learning_rate": 1e-05, "loss": 0.7397, "step": 8 }, { "epoch": 0.0011103229189155846, "grad_norm": 0.6556028723716736, "learning_rate": 1e-05, "loss": 0.7037, "step": 9 }, { "epoch": 0.0012336921321284273, "grad_norm": 0.6383762359619141, "learning_rate": 1e-05, "loss": 0.6838, "step": 10 }, { "epoch": 0.00135706134534127, "grad_norm": 0.5579755306243896, "learning_rate": 1e-05, "loss": 0.5355, "step": 11 }, { "epoch": 0.001480430558554113, "grad_norm": 0.6254903078079224, "learning_rate": 1e-05, "loss": 0.6917, "step": 12 }, { "epoch": 0.0016037997717669556, "grad_norm": 0.5984361171722412, "learning_rate": 1e-05, "loss": 0.7223, "step": 13 }, { "epoch": 0.0017271689849797983, "grad_norm": 0.6492391228675842, "learning_rate": 1e-05, "loss": 0.624, "step": 14 }, { "epoch": 0.001850538198192641, "grad_norm": 0.6888169050216675, "learning_rate": 1e-05, "loss": 0.7419, "step": 15 }, { "epoch": 0.001973907411405484, "grad_norm": 0.5801921486854553, "learning_rate": 1e-05, "loss": 0.6726, "step": 16 }, { "epoch": 0.0020972766246183266, "grad_norm": 0.5549818277359009, "learning_rate": 1e-05, "loss": 0.6741, "step": 17 }, { "epoch": 0.0022206458378311693, "grad_norm": 0.6217571496963501, "learning_rate": 1e-05, "loss": 0.6965, "step": 18 }, { "epoch": 0.002344015051044012, "grad_norm": 0.6669058799743652, "learning_rate": 1e-05, "loss": 0.6544, "step": 19 }, { "epoch": 0.0024673842642568547, "grad_norm": 0.6149454712867737, "learning_rate": 1e-05, "loss": 0.6388, "step": 20 }, { "epoch": 0.0025907534774696974, "grad_norm": 0.5576744675636292, "learning_rate": 1e-05, "loss": 0.6731, "step": 21 }, { "epoch": 0.00271412269068254, "grad_norm": 0.5753223896026611, "learning_rate": 1e-05, "loss": 0.6547, "step": 22 }, { "epoch": 0.0028374919038953827, "grad_norm": 0.5793362855911255, "learning_rate": 1e-05, "loss": 0.6996, "step": 23 }, { "epoch": 0.002960861117108226, "grad_norm": 0.6037251949310303, "learning_rate": 1e-05, "loss": 0.7113, "step": 24 }, { "epoch": 0.0030842303303210685, "grad_norm": 0.636669933795929, "learning_rate": 1e-05, "loss": 0.7104, "step": 25 }, { "epoch": 0.0032075995435339112, "grad_norm": 0.4990972578525543, "learning_rate": 1e-05, "loss": 0.4806, "step": 26 }, { "epoch": 0.003330968756746754, "grad_norm": 0.6469066143035889, "learning_rate": 1e-05, "loss": 0.5997, "step": 27 }, { "epoch": 0.0034543379699595966, "grad_norm": 0.5182448029518127, "learning_rate": 1e-05, "loss": 0.5442, "step": 28 }, { "epoch": 0.0035777071831724393, "grad_norm": 0.6044508814811707, "learning_rate": 1e-05, "loss": 0.7074, "step": 29 }, { "epoch": 0.003701076396385282, "grad_norm": 0.554010808467865, "learning_rate": 1e-05, "loss": 0.5459, "step": 30 }, { "epoch": 0.0038244456095981247, "grad_norm": 0.5933326482772827, "learning_rate": 1e-05, "loss": 0.5684, "step": 31 }, { "epoch": 0.003947814822810968, "grad_norm": 0.6417567729949951, "learning_rate": 1e-05, "loss": 0.7863, "step": 32 }, { "epoch": 0.00407118403602381, "grad_norm": 0.6513707041740417, "learning_rate": 1e-05, "loss": 0.7908, "step": 33 }, { "epoch": 0.004194553249236653, "grad_norm": 0.5619110465049744, "learning_rate": 1e-05, "loss": 0.564, "step": 34 }, { "epoch": 0.0043179224624494954, "grad_norm": 0.5823797583580017, "learning_rate": 1e-05, "loss": 0.634, "step": 35 }, { "epoch": 0.004441291675662339, "grad_norm": 0.5955469608306885, "learning_rate": 1e-05, "loss": 0.6681, "step": 36 }, { "epoch": 0.004564660888875181, "grad_norm": 0.7208210229873657, "learning_rate": 1e-05, "loss": 0.7503, "step": 37 }, { "epoch": 0.004688030102088024, "grad_norm": 0.5888612866401672, "learning_rate": 1e-05, "loss": 0.5931, "step": 38 }, { "epoch": 0.004811399315300867, "grad_norm": 0.5900120735168457, "learning_rate": 1e-05, "loss": 0.7058, "step": 39 }, { "epoch": 0.004934768528513709, "grad_norm": 0.5452994704246521, "learning_rate": 1e-05, "loss": 0.5409, "step": 40 }, { "epoch": 0.0050581377417265524, "grad_norm": 0.6478936672210693, "learning_rate": 1e-05, "loss": 0.697, "step": 41 }, { "epoch": 0.005181506954939395, "grad_norm": 0.5088549256324768, "learning_rate": 1e-05, "loss": 0.593, "step": 42 }, { "epoch": 0.005304876168152238, "grad_norm": 0.6397771835327148, "learning_rate": 1e-05, "loss": 0.7712, "step": 43 }, { "epoch": 0.00542824538136508, "grad_norm": 0.5561406016349792, "learning_rate": 1e-05, "loss": 0.6114, "step": 44 }, { "epoch": 0.005551614594577923, "grad_norm": 0.4871182143688202, "learning_rate": 1e-05, "loss": 0.5341, "step": 45 }, { "epoch": 0.0056749838077907655, "grad_norm": 0.662618100643158, "learning_rate": 1e-05, "loss": 0.6786, "step": 46 }, { "epoch": 0.005798353021003609, "grad_norm": 0.5779640078544617, "learning_rate": 1e-05, "loss": 0.6535, "step": 47 }, { "epoch": 0.005921722234216452, "grad_norm": 0.677401602268219, "learning_rate": 1e-05, "loss": 0.687, "step": 48 }, { "epoch": 0.006045091447429294, "grad_norm": 0.6334659457206726, "learning_rate": 1e-05, "loss": 0.7088, "step": 49 }, { "epoch": 0.006168460660642137, "grad_norm": 0.6141123175621033, "learning_rate": 1e-05, "loss": 0.6288, "step": 50 }, { "epoch": 0.006291829873854979, "grad_norm": 0.5883814096450806, "learning_rate": 1e-05, "loss": 0.6129, "step": 51 }, { "epoch": 0.0064151990870678225, "grad_norm": 0.6697550415992737, "learning_rate": 1e-05, "loss": 0.6774, "step": 52 }, { "epoch": 0.006538568300280665, "grad_norm": 0.5372730493545532, "learning_rate": 1e-05, "loss": 0.5729, "step": 53 }, { "epoch": 0.006661937513493508, "grad_norm": 0.5836222767829895, "learning_rate": 1e-05, "loss": 0.6195, "step": 54 }, { "epoch": 0.00678530672670635, "grad_norm": 0.5315343141555786, "learning_rate": 1e-05, "loss": 0.5996, "step": 55 }, { "epoch": 0.006908675939919193, "grad_norm": 0.689345121383667, "learning_rate": 1e-05, "loss": 0.6991, "step": 56 }, { "epoch": 0.0070320451531320355, "grad_norm": 0.4362063407897949, "learning_rate": 1e-05, "loss": 0.3912, "step": 57 }, { "epoch": 0.007155414366344879, "grad_norm": 0.5628198385238647, "learning_rate": 1e-05, "loss": 0.6227, "step": 58 }, { "epoch": 0.007278783579557722, "grad_norm": 0.5504934787750244, "learning_rate": 1e-05, "loss": 0.6033, "step": 59 }, { "epoch": 0.007402152792770564, "grad_norm": 0.6005948781967163, "learning_rate": 1e-05, "loss": 0.5706, "step": 60 }, { "epoch": 0.007525522005983407, "grad_norm": 0.5873802304267883, "learning_rate": 1e-05, "loss": 0.6394, "step": 61 }, { "epoch": 0.007648891219196249, "grad_norm": 0.5917214155197144, "learning_rate": 1e-05, "loss": 0.691, "step": 62 }, { "epoch": 0.0077722604324090925, "grad_norm": 0.6838746070861816, "learning_rate": 1e-05, "loss": 0.6416, "step": 63 }, { "epoch": 0.007895629645621936, "grad_norm": 0.5355747938156128, "learning_rate": 1e-05, "loss": 0.5619, "step": 64 }, { "epoch": 0.008018998858834777, "grad_norm": 0.6108894348144531, "learning_rate": 1e-05, "loss": 0.8224, "step": 65 }, { "epoch": 0.00814236807204762, "grad_norm": 0.5407583713531494, "learning_rate": 1e-05, "loss": 0.5794, "step": 66 }, { "epoch": 0.008265737285260463, "grad_norm": 0.5703739523887634, "learning_rate": 1e-05, "loss": 0.5785, "step": 67 }, { "epoch": 0.008389106498473306, "grad_norm": 0.601225733757019, "learning_rate": 1e-05, "loss": 0.7449, "step": 68 }, { "epoch": 0.00851247571168615, "grad_norm": 0.6098017692565918, "learning_rate": 1e-05, "loss": 0.7316, "step": 69 }, { "epoch": 0.008635844924898991, "grad_norm": 0.5609059929847717, "learning_rate": 1e-05, "loss": 0.6586, "step": 70 }, { "epoch": 0.008759214138111834, "grad_norm": 0.5858104825019836, "learning_rate": 1e-05, "loss": 0.7211, "step": 71 }, { "epoch": 0.008882583351324677, "grad_norm": 0.4963826537132263, "learning_rate": 1e-05, "loss": 0.5042, "step": 72 }, { "epoch": 0.00900595256453752, "grad_norm": 0.6202996969223022, "learning_rate": 1e-05, "loss": 0.6645, "step": 73 }, { "epoch": 0.009129321777750362, "grad_norm": 0.5266162753105164, "learning_rate": 1e-05, "loss": 0.5625, "step": 74 }, { "epoch": 0.009252690990963205, "grad_norm": 0.5166581869125366, "learning_rate": 1e-05, "loss": 0.5765, "step": 75 }, { "epoch": 0.009376060204176048, "grad_norm": 0.5235215425491333, "learning_rate": 1e-05, "loss": 0.5486, "step": 76 }, { "epoch": 0.009499429417388891, "grad_norm": 0.58769291639328, "learning_rate": 1e-05, "loss": 0.6965, "step": 77 }, { "epoch": 0.009622798630601734, "grad_norm": 0.6081017851829529, "learning_rate": 1e-05, "loss": 0.5883, "step": 78 }, { "epoch": 0.009746167843814576, "grad_norm": 0.658943235874176, "learning_rate": 1e-05, "loss": 0.7366, "step": 79 }, { "epoch": 0.009869537057027419, "grad_norm": 0.631324052810669, "learning_rate": 1e-05, "loss": 0.8138, "step": 80 }, { "epoch": 0.009992906270240262, "grad_norm": 0.525015115737915, "learning_rate": 1e-05, "loss": 0.5952, "step": 81 }, { "epoch": 0.010116275483453105, "grad_norm": 0.5456830859184265, "learning_rate": 1e-05, "loss": 0.6872, "step": 82 }, { "epoch": 0.010239644696665946, "grad_norm": 0.53813236951828, "learning_rate": 1e-05, "loss": 0.4917, "step": 83 }, { "epoch": 0.01036301390987879, "grad_norm": 0.5596858263015747, "learning_rate": 1e-05, "loss": 0.6175, "step": 84 }, { "epoch": 0.010486383123091633, "grad_norm": 0.5809046626091003, "learning_rate": 1e-05, "loss": 0.6507, "step": 85 }, { "epoch": 0.010609752336304476, "grad_norm": 0.5732558965682983, "learning_rate": 1e-05, "loss": 0.6568, "step": 86 }, { "epoch": 0.010733121549517319, "grad_norm": 0.5653652548789978, "learning_rate": 1e-05, "loss": 0.6219, "step": 87 }, { "epoch": 0.01085649076273016, "grad_norm": 0.606846272945404, "learning_rate": 1e-05, "loss": 0.5987, "step": 88 }, { "epoch": 0.010979859975943003, "grad_norm": 0.4569697976112366, "learning_rate": 1e-05, "loss": 0.5, "step": 89 }, { "epoch": 0.011103229189155846, "grad_norm": 0.560084879398346, "learning_rate": 1e-05, "loss": 0.6081, "step": 90 }, { "epoch": 0.01122659840236869, "grad_norm": 0.5746961832046509, "learning_rate": 1e-05, "loss": 0.529, "step": 91 }, { "epoch": 0.011349967615581531, "grad_norm": 0.5632975697517395, "learning_rate": 1e-05, "loss": 0.5536, "step": 92 }, { "epoch": 0.011473336828794374, "grad_norm": 0.5613595247268677, "learning_rate": 1e-05, "loss": 0.5575, "step": 93 }, { "epoch": 0.011596706042007217, "grad_norm": 0.6033868193626404, "learning_rate": 1e-05, "loss": 0.7109, "step": 94 }, { "epoch": 0.01172007525522006, "grad_norm": 0.569161593914032, "learning_rate": 1e-05, "loss": 0.5599, "step": 95 }, { "epoch": 0.011843444468432903, "grad_norm": 0.5337980389595032, "learning_rate": 1e-05, "loss": 0.5001, "step": 96 }, { "epoch": 0.011966813681645745, "grad_norm": 0.6555222272872925, "learning_rate": 1e-05, "loss": 0.7391, "step": 97 }, { "epoch": 0.012090182894858588, "grad_norm": 0.6289132833480835, "learning_rate": 1e-05, "loss": 0.6797, "step": 98 }, { "epoch": 0.012213552108071431, "grad_norm": 0.5588833689689636, "learning_rate": 1e-05, "loss": 0.6844, "step": 99 }, { "epoch": 0.012336921321284274, "grad_norm": 0.6128599047660828, "learning_rate": 1e-05, "loss": 0.5969, "step": 100 }, { "epoch": 0.012460290534497116, "grad_norm": 0.5467958450317383, "learning_rate": 1e-05, "loss": 0.5967, "step": 101 }, { "epoch": 0.012583659747709959, "grad_norm": 0.5846125483512878, "learning_rate": 1e-05, "loss": 0.5848, "step": 102 }, { "epoch": 0.012707028960922802, "grad_norm": 0.5527158975601196, "learning_rate": 1e-05, "loss": 0.5703, "step": 103 }, { "epoch": 0.012830398174135645, "grad_norm": 0.5385388135910034, "learning_rate": 1e-05, "loss": 0.5906, "step": 104 }, { "epoch": 0.012953767387348486, "grad_norm": 0.5790643692016602, "learning_rate": 1e-05, "loss": 0.5812, "step": 105 }, { "epoch": 0.01307713660056133, "grad_norm": 0.6639407277107239, "learning_rate": 1e-05, "loss": 0.5534, "step": 106 }, { "epoch": 0.013200505813774173, "grad_norm": 0.6241705417633057, "learning_rate": 1e-05, "loss": 0.7162, "step": 107 }, { "epoch": 0.013323875026987016, "grad_norm": 0.5378038883209229, "learning_rate": 1e-05, "loss": 0.6282, "step": 108 }, { "epoch": 0.013447244240199859, "grad_norm": 0.5880443453788757, "learning_rate": 1e-05, "loss": 0.7206, "step": 109 }, { "epoch": 0.0135706134534127, "grad_norm": 0.5968008041381836, "learning_rate": 1e-05, "loss": 0.701, "step": 110 }, { "epoch": 0.013693982666625543, "grad_norm": 0.592665433883667, "learning_rate": 1e-05, "loss": 0.6753, "step": 111 }, { "epoch": 0.013817351879838386, "grad_norm": 0.5912931561470032, "learning_rate": 1e-05, "loss": 0.596, "step": 112 }, { "epoch": 0.01394072109305123, "grad_norm": 0.7017009258270264, "learning_rate": 1e-05, "loss": 0.7683, "step": 113 }, { "epoch": 0.014064090306264071, "grad_norm": 0.6260656714439392, "learning_rate": 1e-05, "loss": 0.6268, "step": 114 }, { "epoch": 0.014187459519476914, "grad_norm": 0.5583328604698181, "learning_rate": 1e-05, "loss": 0.6216, "step": 115 }, { "epoch": 0.014310828732689757, "grad_norm": 0.6100834608078003, "learning_rate": 1e-05, "loss": 0.6072, "step": 116 }, { "epoch": 0.0144341979459026, "grad_norm": 0.5873109102249146, "learning_rate": 1e-05, "loss": 0.5862, "step": 117 }, { "epoch": 0.014557567159115443, "grad_norm": 0.5454428195953369, "learning_rate": 1e-05, "loss": 0.6142, "step": 118 }, { "epoch": 0.014680936372328285, "grad_norm": 0.5969226360321045, "learning_rate": 1e-05, "loss": 0.6402, "step": 119 }, { "epoch": 0.014804305585541128, "grad_norm": 0.5189606547355652, "learning_rate": 1e-05, "loss": 0.5085, "step": 120 }, { "epoch": 0.014927674798753971, "grad_norm": 0.6401957273483276, "learning_rate": 1e-05, "loss": 0.6756, "step": 121 }, { "epoch": 0.015051044011966814, "grad_norm": 0.5551162362098694, "learning_rate": 1e-05, "loss": 0.5869, "step": 122 }, { "epoch": 0.015174413225179656, "grad_norm": 0.5893512964248657, "learning_rate": 1e-05, "loss": 0.734, "step": 123 }, { "epoch": 0.015297782438392499, "grad_norm": 0.5715296864509583, "learning_rate": 1e-05, "loss": 0.5942, "step": 124 }, { "epoch": 0.015421151651605342, "grad_norm": 0.5547999143600464, "learning_rate": 1e-05, "loss": 0.5378, "step": 125 }, { "epoch": 0.015544520864818185, "grad_norm": 0.5720294117927551, "learning_rate": 1e-05, "loss": 0.6104, "step": 126 }, { "epoch": 0.015667890078031026, "grad_norm": 0.5747402906417847, "learning_rate": 1e-05, "loss": 0.6763, "step": 127 }, { "epoch": 0.01579125929124387, "grad_norm": 0.5378895998001099, "learning_rate": 1e-05, "loss": 0.59, "step": 128 }, { "epoch": 0.015914628504456713, "grad_norm": 0.5735446810722351, "learning_rate": 1e-05, "loss": 0.66, "step": 129 }, { "epoch": 0.016037997717669554, "grad_norm": 0.5525258183479309, "learning_rate": 1e-05, "loss": 0.6793, "step": 130 }, { "epoch": 0.0161613669308824, "grad_norm": 0.5083274841308594, "learning_rate": 1e-05, "loss": 0.4978, "step": 131 }, { "epoch": 0.01628473614409524, "grad_norm": 0.6094419360160828, "learning_rate": 1e-05, "loss": 0.6444, "step": 132 }, { "epoch": 0.016408105357308085, "grad_norm": 0.719428300857544, "learning_rate": 1e-05, "loss": 0.7471, "step": 133 }, { "epoch": 0.016531474570520927, "grad_norm": 0.5654233694076538, "learning_rate": 1e-05, "loss": 0.5313, "step": 134 }, { "epoch": 0.016654843783733768, "grad_norm": 0.6386243104934692, "learning_rate": 1e-05, "loss": 0.7482, "step": 135 }, { "epoch": 0.016778212996946613, "grad_norm": 0.5899190306663513, "learning_rate": 1e-05, "loss": 0.6234, "step": 136 }, { "epoch": 0.016901582210159454, "grad_norm": 0.6327792406082153, "learning_rate": 1e-05, "loss": 0.6533, "step": 137 }, { "epoch": 0.0170249514233723, "grad_norm": 0.5674371123313904, "learning_rate": 1e-05, "loss": 0.5673, "step": 138 }, { "epoch": 0.01714832063658514, "grad_norm": 0.563936710357666, "learning_rate": 1e-05, "loss": 0.5979, "step": 139 }, { "epoch": 0.017271689849797982, "grad_norm": 0.5822043418884277, "learning_rate": 1e-05, "loss": 0.6277, "step": 140 }, { "epoch": 0.017395059063010827, "grad_norm": 0.5762021541595459, "learning_rate": 1e-05, "loss": 0.6198, "step": 141 }, { "epoch": 0.017518428276223668, "grad_norm": 0.5850471258163452, "learning_rate": 1e-05, "loss": 0.5076, "step": 142 }, { "epoch": 0.01764179748943651, "grad_norm": 0.5591238141059875, "learning_rate": 1e-05, "loss": 0.5995, "step": 143 }, { "epoch": 0.017765166702649354, "grad_norm": 0.5941254496574402, "learning_rate": 1e-05, "loss": 0.6562, "step": 144 }, { "epoch": 0.017888535915862196, "grad_norm": 0.7493178248405457, "learning_rate": 1e-05, "loss": 0.698, "step": 145 }, { "epoch": 0.01801190512907504, "grad_norm": 0.5551193952560425, "learning_rate": 1e-05, "loss": 0.6403, "step": 146 }, { "epoch": 0.018135274342287882, "grad_norm": 0.6121170520782471, "learning_rate": 1e-05, "loss": 0.6585, "step": 147 }, { "epoch": 0.018258643555500723, "grad_norm": 0.5266489386558533, "learning_rate": 1e-05, "loss": 0.5593, "step": 148 }, { "epoch": 0.018382012768713568, "grad_norm": 0.6231229901313782, "learning_rate": 1e-05, "loss": 0.6517, "step": 149 }, { "epoch": 0.01850538198192641, "grad_norm": 0.552106499671936, "learning_rate": 1e-05, "loss": 0.6302, "step": 150 }, { "epoch": 0.018628751195139254, "grad_norm": 0.614233136177063, "learning_rate": 1e-05, "loss": 0.7563, "step": 151 }, { "epoch": 0.018752120408352096, "grad_norm": 0.5100459456443787, "learning_rate": 1e-05, "loss": 0.4947, "step": 152 }, { "epoch": 0.018875489621564937, "grad_norm": 0.6191093921661377, "learning_rate": 1e-05, "loss": 0.6563, "step": 153 }, { "epoch": 0.018998858834777782, "grad_norm": 0.6372489333152771, "learning_rate": 1e-05, "loss": 0.7284, "step": 154 }, { "epoch": 0.019122228047990623, "grad_norm": 0.563498854637146, "learning_rate": 1e-05, "loss": 0.6549, "step": 155 }, { "epoch": 0.01924559726120347, "grad_norm": 0.5828518271446228, "learning_rate": 1e-05, "loss": 0.5862, "step": 156 }, { "epoch": 0.01936896647441631, "grad_norm": 0.5483551025390625, "learning_rate": 1e-05, "loss": 0.548, "step": 157 }, { "epoch": 0.01949233568762915, "grad_norm": 0.6161664128303528, "learning_rate": 1e-05, "loss": 0.7008, "step": 158 }, { "epoch": 0.019615704900841996, "grad_norm": 0.5531285405158997, "learning_rate": 1e-05, "loss": 0.6282, "step": 159 }, { "epoch": 0.019739074114054837, "grad_norm": 0.5916920304298401, "learning_rate": 1e-05, "loss": 0.5897, "step": 160 }, { "epoch": 0.01986244332726768, "grad_norm": 0.8015884160995483, "learning_rate": 1e-05, "loss": 0.6767, "step": 161 }, { "epoch": 0.019985812540480524, "grad_norm": 0.6055735349655151, "learning_rate": 1e-05, "loss": 0.6711, "step": 162 }, { "epoch": 0.020109181753693365, "grad_norm": 0.5961018800735474, "learning_rate": 1e-05, "loss": 0.5742, "step": 163 }, { "epoch": 0.02023255096690621, "grad_norm": 0.5990149974822998, "learning_rate": 1e-05, "loss": 0.6861, "step": 164 }, { "epoch": 0.02035592018011905, "grad_norm": 0.5862070322036743, "learning_rate": 1e-05, "loss": 0.6301, "step": 165 }, { "epoch": 0.020479289393331893, "grad_norm": 0.5805770754814148, "learning_rate": 1e-05, "loss": 0.6832, "step": 166 }, { "epoch": 0.020602658606544737, "grad_norm": 0.6087386012077332, "learning_rate": 1e-05, "loss": 0.6481, "step": 167 }, { "epoch": 0.02072602781975758, "grad_norm": 0.5373278260231018, "learning_rate": 1e-05, "loss": 0.5543, "step": 168 }, { "epoch": 0.020849397032970424, "grad_norm": 0.6400042176246643, "learning_rate": 1e-05, "loss": 0.6831, "step": 169 }, { "epoch": 0.020972766246183265, "grad_norm": 0.697137176990509, "learning_rate": 1e-05, "loss": 0.6544, "step": 170 }, { "epoch": 0.021096135459396106, "grad_norm": 0.5560331344604492, "learning_rate": 1e-05, "loss": 0.6638, "step": 171 }, { "epoch": 0.02121950467260895, "grad_norm": 0.5600760579109192, "learning_rate": 1e-05, "loss": 0.5681, "step": 172 }, { "epoch": 0.021342873885821793, "grad_norm": 0.5722455382347107, "learning_rate": 1e-05, "loss": 0.5335, "step": 173 }, { "epoch": 0.021466243099034638, "grad_norm": 0.5308880805969238, "learning_rate": 1e-05, "loss": 0.5625, "step": 174 }, { "epoch": 0.02158961231224748, "grad_norm": 0.5936367511749268, "learning_rate": 1e-05, "loss": 0.6573, "step": 175 }, { "epoch": 0.02171298152546032, "grad_norm": 0.5943384766578674, "learning_rate": 1e-05, "loss": 0.6819, "step": 176 }, { "epoch": 0.021836350738673165, "grad_norm": 0.5944376587867737, "learning_rate": 1e-05, "loss": 0.6215, "step": 177 }, { "epoch": 0.021959719951886007, "grad_norm": 0.5907491445541382, "learning_rate": 1e-05, "loss": 0.6685, "step": 178 }, { "epoch": 0.022083089165098848, "grad_norm": 0.5320159792900085, "learning_rate": 1e-05, "loss": 0.5858, "step": 179 }, { "epoch": 0.022206458378311693, "grad_norm": 0.5429009199142456, "learning_rate": 1e-05, "loss": 0.6159, "step": 180 }, { "epoch": 0.022329827591524534, "grad_norm": 0.6202471852302551, "learning_rate": 1e-05, "loss": 0.6778, "step": 181 }, { "epoch": 0.02245319680473738, "grad_norm": 0.59328693151474, "learning_rate": 1e-05, "loss": 0.677, "step": 182 }, { "epoch": 0.02257656601795022, "grad_norm": 0.6199949979782104, "learning_rate": 1e-05, "loss": 0.6578, "step": 183 }, { "epoch": 0.022699935231163062, "grad_norm": 0.5761519074440002, "learning_rate": 1e-05, "loss": 0.5794, "step": 184 }, { "epoch": 0.022823304444375907, "grad_norm": 0.5884418487548828, "learning_rate": 1e-05, "loss": 0.6648, "step": 185 }, { "epoch": 0.022946673657588748, "grad_norm": 0.5799095630645752, "learning_rate": 1e-05, "loss": 0.6195, "step": 186 }, { "epoch": 0.023070042870801593, "grad_norm": 0.5862032175064087, "learning_rate": 1e-05, "loss": 0.6427, "step": 187 }, { "epoch": 0.023193412084014434, "grad_norm": 0.6258410811424255, "learning_rate": 1e-05, "loss": 0.6867, "step": 188 }, { "epoch": 0.023316781297227276, "grad_norm": 0.5608225464820862, "learning_rate": 1e-05, "loss": 0.5527, "step": 189 }, { "epoch": 0.02344015051044012, "grad_norm": 0.5726523995399475, "learning_rate": 1e-05, "loss": 0.6769, "step": 190 }, { "epoch": 0.023563519723652962, "grad_norm": 0.6037228107452393, "learning_rate": 1e-05, "loss": 0.6142, "step": 191 }, { "epoch": 0.023686888936865807, "grad_norm": 0.6269227266311646, "learning_rate": 1e-05, "loss": 0.5747, "step": 192 }, { "epoch": 0.023810258150078648, "grad_norm": 0.5679430365562439, "learning_rate": 1e-05, "loss": 0.6547, "step": 193 }, { "epoch": 0.02393362736329149, "grad_norm": 0.5680457949638367, "learning_rate": 1e-05, "loss": 0.5943, "step": 194 }, { "epoch": 0.024056996576504334, "grad_norm": 0.5862423181533813, "learning_rate": 1e-05, "loss": 0.6615, "step": 195 }, { "epoch": 0.024180365789717176, "grad_norm": 0.6395202279090881, "learning_rate": 1e-05, "loss": 0.7539, "step": 196 }, { "epoch": 0.024303735002930017, "grad_norm": 0.5718379616737366, "learning_rate": 1e-05, "loss": 0.6562, "step": 197 }, { "epoch": 0.024427104216142862, "grad_norm": 0.6343801617622375, "learning_rate": 1e-05, "loss": 0.6622, "step": 198 }, { "epoch": 0.024550473429355704, "grad_norm": 0.5968673825263977, "learning_rate": 1e-05, "loss": 0.6374, "step": 199 }, { "epoch": 0.02467384264256855, "grad_norm": 0.5228399038314819, "learning_rate": 1e-05, "loss": 0.529, "step": 200 }, { "epoch": 0.02479721185578139, "grad_norm": 0.5803059339523315, "learning_rate": 1e-05, "loss": 0.4909, "step": 201 }, { "epoch": 0.02492058106899423, "grad_norm": 0.6518123745918274, "learning_rate": 1e-05, "loss": 0.6797, "step": 202 }, { "epoch": 0.025043950282207076, "grad_norm": 0.5939789414405823, "learning_rate": 1e-05, "loss": 0.6542, "step": 203 }, { "epoch": 0.025167319495419917, "grad_norm": 0.5913963317871094, "learning_rate": 1e-05, "loss": 0.6721, "step": 204 }, { "epoch": 0.025290688708632762, "grad_norm": 0.6423107385635376, "learning_rate": 1e-05, "loss": 0.6587, "step": 205 }, { "epoch": 0.025414057921845604, "grad_norm": 0.5642558932304382, "learning_rate": 1e-05, "loss": 0.5977, "step": 206 }, { "epoch": 0.025537427135058445, "grad_norm": 0.5707324743270874, "learning_rate": 1e-05, "loss": 0.6467, "step": 207 }, { "epoch": 0.02566079634827129, "grad_norm": 0.5053280591964722, "learning_rate": 1e-05, "loss": 0.574, "step": 208 }, { "epoch": 0.02578416556148413, "grad_norm": 0.6808720231056213, "learning_rate": 1e-05, "loss": 0.6392, "step": 209 }, { "epoch": 0.025907534774696973, "grad_norm": 0.5990216135978699, "learning_rate": 1e-05, "loss": 0.6934, "step": 210 }, { "epoch": 0.026030903987909818, "grad_norm": 0.629202663898468, "learning_rate": 1e-05, "loss": 0.6684, "step": 211 }, { "epoch": 0.02615427320112266, "grad_norm": 0.6516503691673279, "learning_rate": 1e-05, "loss": 0.7344, "step": 212 }, { "epoch": 0.026277642414335504, "grad_norm": 0.5921689867973328, "learning_rate": 1e-05, "loss": 0.7307, "step": 213 }, { "epoch": 0.026401011627548345, "grad_norm": 0.6024486422538757, "learning_rate": 1e-05, "loss": 0.6307, "step": 214 }, { "epoch": 0.026524380840761187, "grad_norm": 0.5615506172180176, "learning_rate": 1e-05, "loss": 0.594, "step": 215 }, { "epoch": 0.02664775005397403, "grad_norm": 0.6178188920021057, "learning_rate": 1e-05, "loss": 0.646, "step": 216 }, { "epoch": 0.026771119267186873, "grad_norm": 0.5966131687164307, "learning_rate": 1e-05, "loss": 0.607, "step": 217 }, { "epoch": 0.026894488480399718, "grad_norm": 0.6009190678596497, "learning_rate": 1e-05, "loss": 0.6326, "step": 218 }, { "epoch": 0.02701785769361256, "grad_norm": 0.6034905314445496, "learning_rate": 1e-05, "loss": 0.5831, "step": 219 }, { "epoch": 0.0271412269068254, "grad_norm": 0.6364794969558716, "learning_rate": 1e-05, "loss": 0.584, "step": 220 }, { "epoch": 0.027264596120038245, "grad_norm": 0.5923623442649841, "learning_rate": 1e-05, "loss": 0.5561, "step": 221 }, { "epoch": 0.027387965333251087, "grad_norm": 0.6238660216331482, "learning_rate": 1e-05, "loss": 0.6762, "step": 222 }, { "epoch": 0.02751133454646393, "grad_norm": 0.5562133193016052, "learning_rate": 1e-05, "loss": 0.5697, "step": 223 }, { "epoch": 0.027634703759676773, "grad_norm": 0.5467679500579834, "learning_rate": 1e-05, "loss": 0.525, "step": 224 }, { "epoch": 0.027758072972889614, "grad_norm": 0.6195748448371887, "learning_rate": 1e-05, "loss": 0.6971, "step": 225 }, { "epoch": 0.02788144218610246, "grad_norm": 0.6155471801757812, "learning_rate": 1e-05, "loss": 0.6196, "step": 226 }, { "epoch": 0.0280048113993153, "grad_norm": 0.5307950973510742, "learning_rate": 1e-05, "loss": 0.581, "step": 227 }, { "epoch": 0.028128180612528142, "grad_norm": 0.6357945799827576, "learning_rate": 1e-05, "loss": 0.679, "step": 228 }, { "epoch": 0.028251549825740987, "grad_norm": 0.5388103723526001, "learning_rate": 1e-05, "loss": 0.5716, "step": 229 }, { "epoch": 0.028374919038953828, "grad_norm": 0.6029490232467651, "learning_rate": 1e-05, "loss": 0.6345, "step": 230 }, { "epoch": 0.028498288252166673, "grad_norm": 0.5770479440689087, "learning_rate": 1e-05, "loss": 0.6288, "step": 231 }, { "epoch": 0.028621657465379514, "grad_norm": 0.5430986285209656, "learning_rate": 1e-05, "loss": 0.5016, "step": 232 }, { "epoch": 0.028745026678592356, "grad_norm": 0.683306097984314, "learning_rate": 1e-05, "loss": 0.6667, "step": 233 }, { "epoch": 0.0288683958918052, "grad_norm": 0.5612610578536987, "learning_rate": 1e-05, "loss": 0.5909, "step": 234 }, { "epoch": 0.028991765105018042, "grad_norm": 0.6114622950553894, "learning_rate": 1e-05, "loss": 0.7281, "step": 235 }, { "epoch": 0.029115134318230887, "grad_norm": 0.5673463940620422, "learning_rate": 1e-05, "loss": 0.6286, "step": 236 }, { "epoch": 0.02923850353144373, "grad_norm": 0.5651963949203491, "learning_rate": 1e-05, "loss": 0.5694, "step": 237 }, { "epoch": 0.02936187274465657, "grad_norm": 0.596037745475769, "learning_rate": 1e-05, "loss": 0.6796, "step": 238 }, { "epoch": 0.029485241957869415, "grad_norm": 0.607941746711731, "learning_rate": 1e-05, "loss": 0.6246, "step": 239 }, { "epoch": 0.029608611171082256, "grad_norm": 0.6492370963096619, "learning_rate": 1e-05, "loss": 0.7431, "step": 240 }, { "epoch": 0.0297319803842951, "grad_norm": 0.6176502704620361, "learning_rate": 1e-05, "loss": 0.6731, "step": 241 }, { "epoch": 0.029855349597507942, "grad_norm": 0.581643283367157, "learning_rate": 1e-05, "loss": 0.6236, "step": 242 }, { "epoch": 0.029978718810720784, "grad_norm": 0.7083696126937866, "learning_rate": 1e-05, "loss": 0.6476, "step": 243 }, { "epoch": 0.03010208802393363, "grad_norm": 0.6669256091117859, "learning_rate": 1e-05, "loss": 0.6983, "step": 244 }, { "epoch": 0.03022545723714647, "grad_norm": 0.578224241733551, "learning_rate": 1e-05, "loss": 0.6019, "step": 245 }, { "epoch": 0.03034882645035931, "grad_norm": 0.6006497144699097, "learning_rate": 1e-05, "loss": 0.6354, "step": 246 }, { "epoch": 0.030472195663572156, "grad_norm": 0.5446798205375671, "learning_rate": 1e-05, "loss": 0.5624, "step": 247 }, { "epoch": 0.030595564876784997, "grad_norm": 0.5593910217285156, "learning_rate": 1e-05, "loss": 0.6052, "step": 248 }, { "epoch": 0.030718934089997842, "grad_norm": 0.5567761063575745, "learning_rate": 1e-05, "loss": 0.5862, "step": 249 }, { "epoch": 0.030842303303210684, "grad_norm": 0.6027501225471497, "learning_rate": 1e-05, "loss": 0.5989, "step": 250 }, { "epoch": 0.030965672516423525, "grad_norm": 0.6850523352622986, "learning_rate": 1e-05, "loss": 0.6629, "step": 251 }, { "epoch": 0.03108904172963637, "grad_norm": 0.5129157304763794, "learning_rate": 1e-05, "loss": 0.5328, "step": 252 }, { "epoch": 0.03121241094284921, "grad_norm": 0.671238362789154, "learning_rate": 1e-05, "loss": 0.5901, "step": 253 }, { "epoch": 0.03133578015606205, "grad_norm": 0.5219271779060364, "learning_rate": 1e-05, "loss": 0.5413, "step": 254 }, { "epoch": 0.031459149369274894, "grad_norm": 0.5424880385398865, "learning_rate": 1e-05, "loss": 0.5251, "step": 255 }, { "epoch": 0.03158251858248774, "grad_norm": 0.6205207109451294, "learning_rate": 1e-05, "loss": 0.6714, "step": 256 }, { "epoch": 0.031705887795700584, "grad_norm": 0.7265514135360718, "learning_rate": 1e-05, "loss": 0.7479, "step": 257 }, { "epoch": 0.031829257008913425, "grad_norm": 0.6716250777244568, "learning_rate": 1e-05, "loss": 0.7515, "step": 258 }, { "epoch": 0.03195262622212627, "grad_norm": 0.5673280358314514, "learning_rate": 1e-05, "loss": 0.6492, "step": 259 }, { "epoch": 0.03207599543533911, "grad_norm": 0.576602041721344, "learning_rate": 1e-05, "loss": 0.6164, "step": 260 }, { "epoch": 0.032199364648551956, "grad_norm": 0.569580078125, "learning_rate": 1e-05, "loss": 0.5508, "step": 261 }, { "epoch": 0.0323227338617648, "grad_norm": 0.5972859859466553, "learning_rate": 1e-05, "loss": 0.6045, "step": 262 }, { "epoch": 0.03244610307497764, "grad_norm": 0.5597037076950073, "learning_rate": 1e-05, "loss": 0.5456, "step": 263 }, { "epoch": 0.03256947228819048, "grad_norm": 0.6447186470031738, "learning_rate": 1e-05, "loss": 0.6959, "step": 264 }, { "epoch": 0.03269284150140332, "grad_norm": 0.6534111499786377, "learning_rate": 1e-05, "loss": 0.6679, "step": 265 }, { "epoch": 0.03281621071461617, "grad_norm": 0.5485796332359314, "learning_rate": 1e-05, "loss": 0.5496, "step": 266 }, { "epoch": 0.03293957992782901, "grad_norm": 0.5354092121124268, "learning_rate": 1e-05, "loss": 0.5897, "step": 267 }, { "epoch": 0.03306294914104185, "grad_norm": 0.5783458948135376, "learning_rate": 1e-05, "loss": 0.6219, "step": 268 }, { "epoch": 0.033186318354254694, "grad_norm": 0.5340442061424255, "learning_rate": 1e-05, "loss": 0.556, "step": 269 }, { "epoch": 0.033309687567467536, "grad_norm": 0.6423029899597168, "learning_rate": 1e-05, "loss": 0.7766, "step": 270 }, { "epoch": 0.033433056780680384, "grad_norm": 0.5541422367095947, "learning_rate": 1e-05, "loss": 0.626, "step": 271 }, { "epoch": 0.033556425993893226, "grad_norm": 0.5196041464805603, "learning_rate": 1e-05, "loss": 0.5522, "step": 272 }, { "epoch": 0.03367979520710607, "grad_norm": 0.6330777406692505, "learning_rate": 1e-05, "loss": 0.6411, "step": 273 }, { "epoch": 0.03380316442031891, "grad_norm": 0.5406191349029541, "learning_rate": 1e-05, "loss": 0.5572, "step": 274 }, { "epoch": 0.03392653363353175, "grad_norm": 0.5991582870483398, "learning_rate": 1e-05, "loss": 0.7318, "step": 275 }, { "epoch": 0.0340499028467446, "grad_norm": 0.5921244025230408, "learning_rate": 1e-05, "loss": 0.5899, "step": 276 }, { "epoch": 0.03417327205995744, "grad_norm": 0.6615673899650574, "learning_rate": 1e-05, "loss": 0.5725, "step": 277 }, { "epoch": 0.03429664127317028, "grad_norm": 0.6490935683250427, "learning_rate": 1e-05, "loss": 0.7332, "step": 278 }, { "epoch": 0.03442001048638312, "grad_norm": 0.6020835041999817, "learning_rate": 1e-05, "loss": 0.643, "step": 279 }, { "epoch": 0.034543379699595964, "grad_norm": 0.6053187847137451, "learning_rate": 1e-05, "loss": 0.6089, "step": 280 }, { "epoch": 0.03466674891280881, "grad_norm": 0.5809504389762878, "learning_rate": 1e-05, "loss": 0.565, "step": 281 }, { "epoch": 0.03479011812602165, "grad_norm": 0.5404882431030273, "learning_rate": 1e-05, "loss": 0.5663, "step": 282 }, { "epoch": 0.034913487339234495, "grad_norm": 0.5558423399925232, "learning_rate": 1e-05, "loss": 0.5958, "step": 283 }, { "epoch": 0.035036856552447336, "grad_norm": 0.6342650651931763, "learning_rate": 1e-05, "loss": 0.677, "step": 284 }, { "epoch": 0.03516022576566018, "grad_norm": 0.6394723653793335, "learning_rate": 1e-05, "loss": 0.5651, "step": 285 }, { "epoch": 0.03528359497887302, "grad_norm": 0.5893279314041138, "learning_rate": 1e-05, "loss": 0.6535, "step": 286 }, { "epoch": 0.03540696419208587, "grad_norm": 0.5819500088691711, "learning_rate": 1e-05, "loss": 0.5935, "step": 287 }, { "epoch": 0.03553033340529871, "grad_norm": 0.5716069340705872, "learning_rate": 1e-05, "loss": 0.6779, "step": 288 }, { "epoch": 0.03565370261851155, "grad_norm": 0.5281563997268677, "learning_rate": 1e-05, "loss": 0.5349, "step": 289 }, { "epoch": 0.03577707183172439, "grad_norm": 0.5883553624153137, "learning_rate": 1e-05, "loss": 0.5819, "step": 290 }, { "epoch": 0.03590044104493723, "grad_norm": 0.5492230653762817, "learning_rate": 1e-05, "loss": 0.524, "step": 291 }, { "epoch": 0.03602381025815008, "grad_norm": 0.710842490196228, "learning_rate": 1e-05, "loss": 0.6936, "step": 292 }, { "epoch": 0.03614717947136292, "grad_norm": 0.5684595108032227, "learning_rate": 1e-05, "loss": 0.5835, "step": 293 }, { "epoch": 0.036270548684575764, "grad_norm": 0.7371294498443604, "learning_rate": 1e-05, "loss": 0.7509, "step": 294 }, { "epoch": 0.036393917897788605, "grad_norm": 0.6202942728996277, "learning_rate": 1e-05, "loss": 0.6358, "step": 295 }, { "epoch": 0.03651728711100145, "grad_norm": 0.6661975383758545, "learning_rate": 1e-05, "loss": 0.6465, "step": 296 }, { "epoch": 0.036640656324214295, "grad_norm": 0.5703844428062439, "learning_rate": 1e-05, "loss": 0.6766, "step": 297 }, { "epoch": 0.036764025537427136, "grad_norm": 0.5634049773216248, "learning_rate": 1e-05, "loss": 0.6183, "step": 298 }, { "epoch": 0.03688739475063998, "grad_norm": 0.5908878445625305, "learning_rate": 1e-05, "loss": 0.5536, "step": 299 }, { "epoch": 0.03701076396385282, "grad_norm": 0.6173368692398071, "learning_rate": 1e-05, "loss": 0.6864, "step": 300 }, { "epoch": 0.03713413317706566, "grad_norm": 0.5930769443511963, "learning_rate": 1e-05, "loss": 0.4998, "step": 301 }, { "epoch": 0.03725750239027851, "grad_norm": 0.5778042078018188, "learning_rate": 1e-05, "loss": 0.6087, "step": 302 }, { "epoch": 0.03738087160349135, "grad_norm": 0.5327207446098328, "learning_rate": 1e-05, "loss": 0.524, "step": 303 }, { "epoch": 0.03750424081670419, "grad_norm": 0.6208285689353943, "learning_rate": 1e-05, "loss": 0.6835, "step": 304 }, { "epoch": 0.03762761002991703, "grad_norm": 0.5417303442955017, "learning_rate": 1e-05, "loss": 0.5396, "step": 305 }, { "epoch": 0.037750979243129874, "grad_norm": 0.6230452060699463, "learning_rate": 1e-05, "loss": 0.7082, "step": 306 }, { "epoch": 0.03787434845634272, "grad_norm": 0.6040364503860474, "learning_rate": 1e-05, "loss": 0.6338, "step": 307 }, { "epoch": 0.037997717669555564, "grad_norm": 0.5657635927200317, "learning_rate": 1e-05, "loss": 0.6099, "step": 308 }, { "epoch": 0.038121086882768405, "grad_norm": 0.648148775100708, "learning_rate": 1e-05, "loss": 0.5927, "step": 309 }, { "epoch": 0.03824445609598125, "grad_norm": 0.5988162159919739, "learning_rate": 1e-05, "loss": 0.7265, "step": 310 }, { "epoch": 0.03836782530919409, "grad_norm": 0.5939094424247742, "learning_rate": 1e-05, "loss": 0.6298, "step": 311 }, { "epoch": 0.03849119452240694, "grad_norm": 0.5849359035491943, "learning_rate": 1e-05, "loss": 0.6208, "step": 312 }, { "epoch": 0.03861456373561978, "grad_norm": 0.5816283822059631, "learning_rate": 1e-05, "loss": 0.6261, "step": 313 }, { "epoch": 0.03873793294883262, "grad_norm": 0.6535449624061584, "learning_rate": 1e-05, "loss": 0.5754, "step": 314 }, { "epoch": 0.03886130216204546, "grad_norm": 0.629932701587677, "learning_rate": 1e-05, "loss": 0.6815, "step": 315 }, { "epoch": 0.0389846713752583, "grad_norm": 0.6139140725135803, "learning_rate": 1e-05, "loss": 0.6694, "step": 316 }, { "epoch": 0.03910804058847115, "grad_norm": 0.5019538402557373, "learning_rate": 1e-05, "loss": 0.3949, "step": 317 }, { "epoch": 0.03923140980168399, "grad_norm": 0.6502938866615295, "learning_rate": 1e-05, "loss": 0.6384, "step": 318 }, { "epoch": 0.03935477901489683, "grad_norm": 0.5397500991821289, "learning_rate": 1e-05, "loss": 0.5571, "step": 319 }, { "epoch": 0.039478148228109675, "grad_norm": 0.6082845330238342, "learning_rate": 1e-05, "loss": 0.6272, "step": 320 }, { "epoch": 0.039601517441322516, "grad_norm": 0.6630246639251709, "learning_rate": 1e-05, "loss": 0.7485, "step": 321 }, { "epoch": 0.03972488665453536, "grad_norm": 0.5529509782791138, "learning_rate": 1e-05, "loss": 0.6275, "step": 322 }, { "epoch": 0.039848255867748206, "grad_norm": 0.5813581943511963, "learning_rate": 1e-05, "loss": 0.5999, "step": 323 }, { "epoch": 0.03997162508096105, "grad_norm": 0.6735071539878845, "learning_rate": 1e-05, "loss": 0.6804, "step": 324 }, { "epoch": 0.04009499429417389, "grad_norm": 0.6429665684700012, "learning_rate": 1e-05, "loss": 0.6941, "step": 325 }, { "epoch": 0.04021836350738673, "grad_norm": 0.5941157937049866, "learning_rate": 1e-05, "loss": 0.631, "step": 326 }, { "epoch": 0.04034173272059957, "grad_norm": 0.5352389216423035, "learning_rate": 1e-05, "loss": 0.5861, "step": 327 }, { "epoch": 0.04046510193381242, "grad_norm": 0.6680461168289185, "learning_rate": 1e-05, "loss": 0.7146, "step": 328 }, { "epoch": 0.04058847114702526, "grad_norm": 0.5385650992393494, "learning_rate": 1e-05, "loss": 0.5382, "step": 329 }, { "epoch": 0.0407118403602381, "grad_norm": 0.590452253818512, "learning_rate": 1e-05, "loss": 0.6556, "step": 330 }, { "epoch": 0.040835209573450944, "grad_norm": 0.5661354064941406, "learning_rate": 1e-05, "loss": 0.5729, "step": 331 }, { "epoch": 0.040958578786663785, "grad_norm": 0.6310044527053833, "learning_rate": 1e-05, "loss": 0.6374, "step": 332 }, { "epoch": 0.041081947999876633, "grad_norm": 0.5309809446334839, "learning_rate": 1e-05, "loss": 0.5382, "step": 333 }, { "epoch": 0.041205317213089475, "grad_norm": 0.6374737024307251, "learning_rate": 1e-05, "loss": 0.6327, "step": 334 }, { "epoch": 0.041328686426302316, "grad_norm": 0.6647066473960876, "learning_rate": 1e-05, "loss": 0.6187, "step": 335 }, { "epoch": 0.04145205563951516, "grad_norm": 0.5255587697029114, "learning_rate": 1e-05, "loss": 0.4921, "step": 336 }, { "epoch": 0.041575424852728, "grad_norm": 0.6037887930870056, "learning_rate": 1e-05, "loss": 0.5601, "step": 337 }, { "epoch": 0.04169879406594085, "grad_norm": 0.5194093585014343, "learning_rate": 1e-05, "loss": 0.5005, "step": 338 }, { "epoch": 0.04182216327915369, "grad_norm": 0.5889206528663635, "learning_rate": 1e-05, "loss": 0.6385, "step": 339 }, { "epoch": 0.04194553249236653, "grad_norm": 0.6714861989021301, "learning_rate": 1e-05, "loss": 0.6912, "step": 340 }, { "epoch": 0.04206890170557937, "grad_norm": 0.7682051658630371, "learning_rate": 1e-05, "loss": 0.798, "step": 341 }, { "epoch": 0.04219227091879221, "grad_norm": 0.5450900793075562, "learning_rate": 1e-05, "loss": 0.5332, "step": 342 }, { "epoch": 0.04231564013200506, "grad_norm": 0.5635772943496704, "learning_rate": 1e-05, "loss": 0.6037, "step": 343 }, { "epoch": 0.0424390093452179, "grad_norm": 0.6017500758171082, "learning_rate": 1e-05, "loss": 0.6477, "step": 344 }, { "epoch": 0.042562378558430744, "grad_norm": 0.5904042720794678, "learning_rate": 1e-05, "loss": 0.5943, "step": 345 }, { "epoch": 0.042685747771643585, "grad_norm": 0.5959513783454895, "learning_rate": 1e-05, "loss": 0.7207, "step": 346 }, { "epoch": 0.04280911698485643, "grad_norm": 0.48673805594444275, "learning_rate": 1e-05, "loss": 0.4775, "step": 347 }, { "epoch": 0.042932486198069275, "grad_norm": 0.6182994842529297, "learning_rate": 1e-05, "loss": 0.7066, "step": 348 }, { "epoch": 0.043055855411282117, "grad_norm": 0.6454131007194519, "learning_rate": 1e-05, "loss": 0.6488, "step": 349 }, { "epoch": 0.04317922462449496, "grad_norm": 0.5664829015731812, "learning_rate": 1e-05, "loss": 0.692, "step": 350 }, { "epoch": 0.0433025938377078, "grad_norm": 0.6065468192100525, "learning_rate": 1e-05, "loss": 0.6534, "step": 351 }, { "epoch": 0.04342596305092064, "grad_norm": 0.643217921257019, "learning_rate": 1e-05, "loss": 0.6036, "step": 352 }, { "epoch": 0.04354933226413348, "grad_norm": 0.6327873468399048, "learning_rate": 1e-05, "loss": 0.6101, "step": 353 }, { "epoch": 0.04367270147734633, "grad_norm": 0.6768671274185181, "learning_rate": 1e-05, "loss": 0.6414, "step": 354 }, { "epoch": 0.04379607069055917, "grad_norm": 0.562089741230011, "learning_rate": 1e-05, "loss": 0.6004, "step": 355 }, { "epoch": 0.04391943990377201, "grad_norm": 0.5322239398956299, "learning_rate": 1e-05, "loss": 0.6106, "step": 356 }, { "epoch": 0.044042809116984855, "grad_norm": 0.558059811592102, "learning_rate": 1e-05, "loss": 0.5666, "step": 357 }, { "epoch": 0.044166178330197696, "grad_norm": 0.5862721800804138, "learning_rate": 1e-05, "loss": 0.5744, "step": 358 }, { "epoch": 0.044289547543410544, "grad_norm": 0.5982164144515991, "learning_rate": 1e-05, "loss": 0.63, "step": 359 }, { "epoch": 0.044412916756623386, "grad_norm": 0.4922582507133484, "learning_rate": 1e-05, "loss": 0.5324, "step": 360 }, { "epoch": 0.04453628596983623, "grad_norm": 0.6716896891593933, "learning_rate": 1e-05, "loss": 0.6486, "step": 361 }, { "epoch": 0.04465965518304907, "grad_norm": 0.5501975417137146, "learning_rate": 1e-05, "loss": 0.5811, "step": 362 }, { "epoch": 0.04478302439626191, "grad_norm": 0.5372856259346008, "learning_rate": 1e-05, "loss": 0.5275, "step": 363 }, { "epoch": 0.04490639360947476, "grad_norm": 0.5713937282562256, "learning_rate": 1e-05, "loss": 0.5875, "step": 364 }, { "epoch": 0.0450297628226876, "grad_norm": 0.6461490988731384, "learning_rate": 1e-05, "loss": 0.6486, "step": 365 }, { "epoch": 0.04515313203590044, "grad_norm": 0.6221438646316528, "learning_rate": 1e-05, "loss": 0.6243, "step": 366 }, { "epoch": 0.04527650124911328, "grad_norm": 0.5221573114395142, "learning_rate": 1e-05, "loss": 0.5513, "step": 367 }, { "epoch": 0.045399870462326124, "grad_norm": 0.6303828954696655, "learning_rate": 1e-05, "loss": 0.7099, "step": 368 }, { "epoch": 0.04552323967553897, "grad_norm": 0.5502330660820007, "learning_rate": 1e-05, "loss": 0.5257, "step": 369 }, { "epoch": 0.04564660888875181, "grad_norm": 0.5902296304702759, "learning_rate": 1e-05, "loss": 0.6005, "step": 370 }, { "epoch": 0.045769978101964655, "grad_norm": 0.6029537320137024, "learning_rate": 1e-05, "loss": 0.6658, "step": 371 }, { "epoch": 0.045893347315177496, "grad_norm": 0.5691503286361694, "learning_rate": 1e-05, "loss": 0.6246, "step": 372 }, { "epoch": 0.04601671652839034, "grad_norm": 0.5419954657554626, "learning_rate": 1e-05, "loss": 0.5682, "step": 373 }, { "epoch": 0.046140085741603186, "grad_norm": 0.5704677700996399, "learning_rate": 1e-05, "loss": 0.6362, "step": 374 }, { "epoch": 0.04626345495481603, "grad_norm": 0.5814294815063477, "learning_rate": 1e-05, "loss": 0.6738, "step": 375 }, { "epoch": 0.04638682416802887, "grad_norm": 0.5909560918807983, "learning_rate": 1e-05, "loss": 0.6517, "step": 376 }, { "epoch": 0.04651019338124171, "grad_norm": 0.6322766542434692, "learning_rate": 1e-05, "loss": 0.6672, "step": 377 }, { "epoch": 0.04663356259445455, "grad_norm": 0.5584964752197266, "learning_rate": 1e-05, "loss": 0.4707, "step": 378 }, { "epoch": 0.0467569318076674, "grad_norm": 0.544264018535614, "learning_rate": 1e-05, "loss": 0.4621, "step": 379 }, { "epoch": 0.04688030102088024, "grad_norm": 0.6485415101051331, "learning_rate": 1e-05, "loss": 0.6376, "step": 380 }, { "epoch": 0.04700367023409308, "grad_norm": 0.5916298031806946, "learning_rate": 1e-05, "loss": 0.6612, "step": 381 }, { "epoch": 0.047127039447305924, "grad_norm": 0.654310941696167, "learning_rate": 1e-05, "loss": 0.6897, "step": 382 }, { "epoch": 0.047250408660518765, "grad_norm": 0.5355238914489746, "learning_rate": 1e-05, "loss": 0.5671, "step": 383 }, { "epoch": 0.047373777873731614, "grad_norm": 0.6356751322746277, "learning_rate": 1e-05, "loss": 0.7891, "step": 384 }, { "epoch": 0.047497147086944455, "grad_norm": 0.5226166844367981, "learning_rate": 1e-05, "loss": 0.5095, "step": 385 }, { "epoch": 0.047620516300157296, "grad_norm": 0.561014711856842, "learning_rate": 1e-05, "loss": 0.5981, "step": 386 }, { "epoch": 0.04774388551337014, "grad_norm": 0.5137313008308411, "learning_rate": 1e-05, "loss": 0.5349, "step": 387 }, { "epoch": 0.04786725472658298, "grad_norm": 0.5543534755706787, "learning_rate": 1e-05, "loss": 0.599, "step": 388 }, { "epoch": 0.04799062393979582, "grad_norm": 0.6052325963973999, "learning_rate": 1e-05, "loss": 0.601, "step": 389 }, { "epoch": 0.04811399315300867, "grad_norm": 0.5758461952209473, "learning_rate": 1e-05, "loss": 0.5439, "step": 390 }, { "epoch": 0.04823736236622151, "grad_norm": 0.6031063795089722, "learning_rate": 1e-05, "loss": 0.6565, "step": 391 }, { "epoch": 0.04836073157943435, "grad_norm": 0.5908290147781372, "learning_rate": 1e-05, "loss": 0.6293, "step": 392 }, { "epoch": 0.04848410079264719, "grad_norm": 0.7875754237174988, "learning_rate": 1e-05, "loss": 0.7383, "step": 393 }, { "epoch": 0.048607470005860035, "grad_norm": 0.5767374038696289, "learning_rate": 1e-05, "loss": 0.5502, "step": 394 }, { "epoch": 0.04873083921907288, "grad_norm": 0.5793061852455139, "learning_rate": 1e-05, "loss": 0.5961, "step": 395 }, { "epoch": 0.048854208432285724, "grad_norm": 0.5366004705429077, "learning_rate": 1e-05, "loss": 0.5526, "step": 396 }, { "epoch": 0.048977577645498566, "grad_norm": 0.5803956985473633, "learning_rate": 1e-05, "loss": 0.6801, "step": 397 }, { "epoch": 0.04910094685871141, "grad_norm": 0.5981411337852478, "learning_rate": 1e-05, "loss": 0.5282, "step": 398 }, { "epoch": 0.04922431607192425, "grad_norm": 0.5937778949737549, "learning_rate": 1e-05, "loss": 0.6272, "step": 399 }, { "epoch": 0.0493476852851371, "grad_norm": 0.621304988861084, "learning_rate": 1e-05, "loss": 0.6534, "step": 400 }, { "epoch": 0.04947105449834994, "grad_norm": 0.6524643898010254, "learning_rate": 1e-05, "loss": 0.6962, "step": 401 }, { "epoch": 0.04959442371156278, "grad_norm": 0.6389184594154358, "learning_rate": 1e-05, "loss": 0.6656, "step": 402 }, { "epoch": 0.04971779292477562, "grad_norm": 0.5638105273246765, "learning_rate": 1e-05, "loss": 0.5256, "step": 403 }, { "epoch": 0.04984116213798846, "grad_norm": 0.6041951179504395, "learning_rate": 1e-05, "loss": 0.6589, "step": 404 }, { "epoch": 0.04996453135120131, "grad_norm": 0.5777479410171509, "learning_rate": 1e-05, "loss": 0.5971, "step": 405 }, { "epoch": 0.05008790056441415, "grad_norm": 0.6643062233924866, "learning_rate": 1e-05, "loss": 0.7212, "step": 406 }, { "epoch": 0.05021126977762699, "grad_norm": 0.6408272385597229, "learning_rate": 1e-05, "loss": 0.7188, "step": 407 }, { "epoch": 0.050334638990839835, "grad_norm": 0.5566601753234863, "learning_rate": 1e-05, "loss": 0.5511, "step": 408 }, { "epoch": 0.050458008204052676, "grad_norm": 0.5759593844413757, "learning_rate": 1e-05, "loss": 0.6435, "step": 409 }, { "epoch": 0.050581377417265524, "grad_norm": 0.6173961758613586, "learning_rate": 1e-05, "loss": 0.6956, "step": 410 }, { "epoch": 0.050704746630478366, "grad_norm": 0.6883727312088013, "learning_rate": 1e-05, "loss": 0.7373, "step": 411 }, { "epoch": 0.05082811584369121, "grad_norm": 0.6257954239845276, "learning_rate": 1e-05, "loss": 0.6965, "step": 412 }, { "epoch": 0.05095148505690405, "grad_norm": 0.5806434750556946, "learning_rate": 1e-05, "loss": 0.5707, "step": 413 }, { "epoch": 0.05107485427011689, "grad_norm": 0.6409105062484741, "learning_rate": 1e-05, "loss": 0.6172, "step": 414 }, { "epoch": 0.05119822348332974, "grad_norm": 0.5113802552223206, "learning_rate": 1e-05, "loss": 0.4141, "step": 415 }, { "epoch": 0.05132159269654258, "grad_norm": 0.6368800401687622, "learning_rate": 1e-05, "loss": 0.826, "step": 416 }, { "epoch": 0.05144496190975542, "grad_norm": 0.6727871894836426, "learning_rate": 1e-05, "loss": 0.7637, "step": 417 }, { "epoch": 0.05156833112296826, "grad_norm": 0.6684110760688782, "learning_rate": 1e-05, "loss": 0.6739, "step": 418 }, { "epoch": 0.051691700336181104, "grad_norm": 0.549552857875824, "learning_rate": 1e-05, "loss": 0.4583, "step": 419 }, { "epoch": 0.051815069549393945, "grad_norm": 0.5883033871650696, "learning_rate": 1e-05, "loss": 0.6447, "step": 420 }, { "epoch": 0.051938438762606794, "grad_norm": 0.589646577835083, "learning_rate": 1e-05, "loss": 0.6103, "step": 421 }, { "epoch": 0.052061807975819635, "grad_norm": 0.5620612502098083, "learning_rate": 1e-05, "loss": 0.4912, "step": 422 }, { "epoch": 0.052185177189032476, "grad_norm": 0.6898766756057739, "learning_rate": 1e-05, "loss": 0.7386, "step": 423 }, { "epoch": 0.05230854640224532, "grad_norm": 0.5639111399650574, "learning_rate": 1e-05, "loss": 0.5373, "step": 424 }, { "epoch": 0.05243191561545816, "grad_norm": 0.6049554944038391, "learning_rate": 1e-05, "loss": 0.6843, "step": 425 }, { "epoch": 0.05255528482867101, "grad_norm": 0.5774723291397095, "learning_rate": 1e-05, "loss": 0.5619, "step": 426 }, { "epoch": 0.05267865404188385, "grad_norm": 0.6372997760772705, "learning_rate": 1e-05, "loss": 0.6635, "step": 427 }, { "epoch": 0.05280202325509669, "grad_norm": 0.5646178126335144, "learning_rate": 1e-05, "loss": 0.5728, "step": 428 }, { "epoch": 0.05292539246830953, "grad_norm": 0.6157800555229187, "learning_rate": 1e-05, "loss": 0.5739, "step": 429 }, { "epoch": 0.05304876168152237, "grad_norm": 0.5814571976661682, "learning_rate": 1e-05, "loss": 0.5625, "step": 430 }, { "epoch": 0.05317213089473522, "grad_norm": 0.5953289866447449, "learning_rate": 1e-05, "loss": 0.6159, "step": 431 }, { "epoch": 0.05329550010794806, "grad_norm": 0.6119066476821899, "learning_rate": 1e-05, "loss": 0.5119, "step": 432 }, { "epoch": 0.053418869321160904, "grad_norm": 0.6363236904144287, "learning_rate": 1e-05, "loss": 0.6198, "step": 433 }, { "epoch": 0.053542238534373746, "grad_norm": 0.5507789850234985, "learning_rate": 1e-05, "loss": 0.5768, "step": 434 }, { "epoch": 0.05366560774758659, "grad_norm": 0.5638236999511719, "learning_rate": 1e-05, "loss": 0.4738, "step": 435 }, { "epoch": 0.053788976960799435, "grad_norm": 0.5991834998130798, "learning_rate": 1e-05, "loss": 0.6096, "step": 436 }, { "epoch": 0.05391234617401228, "grad_norm": 0.5425814390182495, "learning_rate": 1e-05, "loss": 0.5589, "step": 437 }, { "epoch": 0.05403571538722512, "grad_norm": 0.554955244064331, "learning_rate": 1e-05, "loss": 0.5045, "step": 438 }, { "epoch": 0.05415908460043796, "grad_norm": 0.6257026195526123, "learning_rate": 1e-05, "loss": 0.5947, "step": 439 }, { "epoch": 0.0542824538136508, "grad_norm": 0.5397578477859497, "learning_rate": 1e-05, "loss": 0.5414, "step": 440 }, { "epoch": 0.05440582302686365, "grad_norm": 0.6008768081665039, "learning_rate": 1e-05, "loss": 0.5923, "step": 441 }, { "epoch": 0.05452919224007649, "grad_norm": 0.703492283821106, "learning_rate": 1e-05, "loss": 0.738, "step": 442 }, { "epoch": 0.05465256145328933, "grad_norm": 0.5832856297492981, "learning_rate": 1e-05, "loss": 0.6407, "step": 443 }, { "epoch": 0.05477593066650217, "grad_norm": 0.5485468506813049, "learning_rate": 1e-05, "loss": 0.5852, "step": 444 }, { "epoch": 0.054899299879715015, "grad_norm": 0.5635490417480469, "learning_rate": 1e-05, "loss": 0.5809, "step": 445 }, { "epoch": 0.05502266909292786, "grad_norm": 0.589340329170227, "learning_rate": 1e-05, "loss": 0.6004, "step": 446 }, { "epoch": 0.055146038306140704, "grad_norm": 0.6161998510360718, "learning_rate": 1e-05, "loss": 0.6062, "step": 447 }, { "epoch": 0.055269407519353546, "grad_norm": 0.5740360021591187, "learning_rate": 1e-05, "loss": 0.5761, "step": 448 }, { "epoch": 0.05539277673256639, "grad_norm": 0.5875034928321838, "learning_rate": 1e-05, "loss": 0.6116, "step": 449 }, { "epoch": 0.05551614594577923, "grad_norm": 0.639455258846283, "learning_rate": 1e-05, "loss": 0.7121, "step": 450 }, { "epoch": 0.05563951515899208, "grad_norm": 0.5710155367851257, "learning_rate": 1e-05, "loss": 0.5884, "step": 451 }, { "epoch": 0.05576288437220492, "grad_norm": 0.6221250295639038, "learning_rate": 1e-05, "loss": 0.6036, "step": 452 }, { "epoch": 0.05588625358541776, "grad_norm": 0.5742440819740295, "learning_rate": 1e-05, "loss": 0.482, "step": 453 }, { "epoch": 0.0560096227986306, "grad_norm": 0.6367557644844055, "learning_rate": 1e-05, "loss": 0.7003, "step": 454 }, { "epoch": 0.05613299201184344, "grad_norm": 0.6009160280227661, "learning_rate": 1e-05, "loss": 0.583, "step": 455 }, { "epoch": 0.056256361225056284, "grad_norm": 0.529914379119873, "learning_rate": 1e-05, "loss": 0.4647, "step": 456 }, { "epoch": 0.05637973043826913, "grad_norm": 0.6715623736381531, "learning_rate": 1e-05, "loss": 0.6581, "step": 457 }, { "epoch": 0.056503099651481974, "grad_norm": 0.5389369130134583, "learning_rate": 1e-05, "loss": 0.611, "step": 458 }, { "epoch": 0.056626468864694815, "grad_norm": 0.6869989633560181, "learning_rate": 1e-05, "loss": 0.7436, "step": 459 }, { "epoch": 0.056749838077907656, "grad_norm": 0.5909601449966431, "learning_rate": 1e-05, "loss": 0.663, "step": 460 }, { "epoch": 0.0568732072911205, "grad_norm": 0.5552419424057007, "learning_rate": 1e-05, "loss": 0.5181, "step": 461 }, { "epoch": 0.056996576504333346, "grad_norm": 0.5607795119285583, "learning_rate": 1e-05, "loss": 0.5767, "step": 462 }, { "epoch": 0.05711994571754619, "grad_norm": 0.6363530158996582, "learning_rate": 1e-05, "loss": 0.7986, "step": 463 }, { "epoch": 0.05724331493075903, "grad_norm": 0.5592305064201355, "learning_rate": 1e-05, "loss": 0.5189, "step": 464 }, { "epoch": 0.05736668414397187, "grad_norm": 0.5768619775772095, "learning_rate": 1e-05, "loss": 0.5535, "step": 465 }, { "epoch": 0.05749005335718471, "grad_norm": 0.648094654083252, "learning_rate": 1e-05, "loss": 0.6669, "step": 466 }, { "epoch": 0.05761342257039756, "grad_norm": 0.5667260885238647, "learning_rate": 1e-05, "loss": 0.5925, "step": 467 }, { "epoch": 0.0577367917836104, "grad_norm": 0.5700448155403137, "learning_rate": 1e-05, "loss": 0.5547, "step": 468 }, { "epoch": 0.05786016099682324, "grad_norm": 0.6365095376968384, "learning_rate": 1e-05, "loss": 0.6106, "step": 469 }, { "epoch": 0.057983530210036084, "grad_norm": 0.6138272881507874, "learning_rate": 1e-05, "loss": 0.6627, "step": 470 }, { "epoch": 0.058106899423248926, "grad_norm": 0.6145910620689392, "learning_rate": 1e-05, "loss": 0.6221, "step": 471 }, { "epoch": 0.058230268636461774, "grad_norm": 0.6133964657783508, "learning_rate": 1e-05, "loss": 0.618, "step": 472 }, { "epoch": 0.058353637849674615, "grad_norm": 0.6263993382453918, "learning_rate": 1e-05, "loss": 0.682, "step": 473 }, { "epoch": 0.05847700706288746, "grad_norm": 0.5974031686782837, "learning_rate": 1e-05, "loss": 0.6214, "step": 474 }, { "epoch": 0.0586003762761003, "grad_norm": 0.5427314043045044, "learning_rate": 1e-05, "loss": 0.5361, "step": 475 }, { "epoch": 0.05872374548931314, "grad_norm": 0.6430461406707764, "learning_rate": 1e-05, "loss": 0.7062, "step": 476 }, { "epoch": 0.05884711470252599, "grad_norm": 0.6237983107566833, "learning_rate": 1e-05, "loss": 0.6427, "step": 477 }, { "epoch": 0.05897048391573883, "grad_norm": 0.5494363307952881, "learning_rate": 1e-05, "loss": 0.5326, "step": 478 }, { "epoch": 0.05909385312895167, "grad_norm": 0.5423187613487244, "learning_rate": 1e-05, "loss": 0.4912, "step": 479 }, { "epoch": 0.05921722234216451, "grad_norm": 0.6514203548431396, "learning_rate": 1e-05, "loss": 0.6215, "step": 480 }, { "epoch": 0.05934059155537735, "grad_norm": 0.6010270714759827, "learning_rate": 1e-05, "loss": 0.5681, "step": 481 }, { "epoch": 0.0594639607685902, "grad_norm": 0.5607581734657288, "learning_rate": 1e-05, "loss": 0.5485, "step": 482 }, { "epoch": 0.05958732998180304, "grad_norm": 0.6297522783279419, "learning_rate": 1e-05, "loss": 0.6121, "step": 483 }, { "epoch": 0.059710699195015884, "grad_norm": 0.6542215347290039, "learning_rate": 1e-05, "loss": 0.6011, "step": 484 }, { "epoch": 0.059834068408228726, "grad_norm": 0.6814959645271301, "learning_rate": 1e-05, "loss": 0.6484, "step": 485 }, { "epoch": 0.05995743762144157, "grad_norm": 0.5913944840431213, "learning_rate": 1e-05, "loss": 0.625, "step": 486 }, { "epoch": 0.06008080683465441, "grad_norm": 0.6125743389129639, "learning_rate": 1e-05, "loss": 0.5606, "step": 487 }, { "epoch": 0.06020417604786726, "grad_norm": 0.5797486305236816, "learning_rate": 1e-05, "loss": 0.5753, "step": 488 }, { "epoch": 0.0603275452610801, "grad_norm": 0.547673225402832, "learning_rate": 1e-05, "loss": 0.584, "step": 489 }, { "epoch": 0.06045091447429294, "grad_norm": 0.5680713057518005, "learning_rate": 1e-05, "loss": 0.5822, "step": 490 }, { "epoch": 0.06057428368750578, "grad_norm": 0.5213432908058167, "learning_rate": 1e-05, "loss": 0.5343, "step": 491 }, { "epoch": 0.06069765290071862, "grad_norm": 0.6417579650878906, "learning_rate": 1e-05, "loss": 0.6319, "step": 492 }, { "epoch": 0.06082102211393147, "grad_norm": 0.6559723615646362, "learning_rate": 1e-05, "loss": 0.6848, "step": 493 }, { "epoch": 0.06094439132714431, "grad_norm": 0.6805989146232605, "learning_rate": 1e-05, "loss": 0.6202, "step": 494 }, { "epoch": 0.061067760540357154, "grad_norm": 0.6389230489730835, "learning_rate": 1e-05, "loss": 0.6612, "step": 495 }, { "epoch": 0.061191129753569995, "grad_norm": 0.6134034395217896, "learning_rate": 1e-05, "loss": 0.6738, "step": 496 }, { "epoch": 0.061314498966782836, "grad_norm": 0.548075795173645, "learning_rate": 1e-05, "loss": 0.6036, "step": 497 }, { "epoch": 0.061437868179995685, "grad_norm": 0.6290404796600342, "learning_rate": 1e-05, "loss": 0.5785, "step": 498 }, { "epoch": 0.061561237393208526, "grad_norm": 0.6083202362060547, "learning_rate": 1e-05, "loss": 0.5934, "step": 499 }, { "epoch": 0.06168460660642137, "grad_norm": 0.5644240975379944, "learning_rate": 1e-05, "loss": 0.5457, "step": 500 }, { "epoch": 0.06180797581963421, "grad_norm": 0.6183356642723083, "learning_rate": 1e-05, "loss": 0.6986, "step": 501 }, { "epoch": 0.06193134503284705, "grad_norm": 0.5695188641548157, "learning_rate": 1e-05, "loss": 0.5888, "step": 502 }, { "epoch": 0.0620547142460599, "grad_norm": 0.6169439554214478, "learning_rate": 1e-05, "loss": 0.5644, "step": 503 }, { "epoch": 0.06217808345927274, "grad_norm": 0.536048948764801, "learning_rate": 1e-05, "loss": 0.5738, "step": 504 }, { "epoch": 0.06230145267248558, "grad_norm": 0.5114624500274658, "learning_rate": 1e-05, "loss": 0.5248, "step": 505 }, { "epoch": 0.06242482188569842, "grad_norm": 0.5888549089431763, "learning_rate": 1e-05, "loss": 0.5977, "step": 506 }, { "epoch": 0.06254819109891127, "grad_norm": 0.6106228828430176, "learning_rate": 1e-05, "loss": 0.6134, "step": 507 }, { "epoch": 0.0626715603121241, "grad_norm": 0.5512991547584534, "learning_rate": 1e-05, "loss": 0.4905, "step": 508 }, { "epoch": 0.06279492952533695, "grad_norm": 0.6036090850830078, "learning_rate": 1e-05, "loss": 0.6586, "step": 509 }, { "epoch": 0.06291829873854979, "grad_norm": 0.5778861045837402, "learning_rate": 1e-05, "loss": 0.6157, "step": 510 }, { "epoch": 0.06304166795176264, "grad_norm": 0.5897467732429504, "learning_rate": 1e-05, "loss": 0.4827, "step": 511 }, { "epoch": 0.06316503716497548, "grad_norm": 0.7208260297775269, "learning_rate": 1e-05, "loss": 0.6855, "step": 512 }, { "epoch": 0.06328840637818832, "grad_norm": 0.6609594821929932, "learning_rate": 1e-05, "loss": 0.6637, "step": 513 }, { "epoch": 0.06341177559140117, "grad_norm": 0.5631861090660095, "learning_rate": 1e-05, "loss": 0.5655, "step": 514 }, { "epoch": 0.063535144804614, "grad_norm": 0.5032969117164612, "learning_rate": 1e-05, "loss": 0.4668, "step": 515 }, { "epoch": 0.06365851401782685, "grad_norm": 0.649137556552887, "learning_rate": 1e-05, "loss": 0.6133, "step": 516 }, { "epoch": 0.0637818832310397, "grad_norm": 0.5955870747566223, "learning_rate": 1e-05, "loss": 0.6051, "step": 517 }, { "epoch": 0.06390525244425253, "grad_norm": 0.5907684564590454, "learning_rate": 1e-05, "loss": 0.6501, "step": 518 }, { "epoch": 0.06402862165746538, "grad_norm": 0.5742161273956299, "learning_rate": 1e-05, "loss": 0.5848, "step": 519 }, { "epoch": 0.06415199087067822, "grad_norm": 0.5666871070861816, "learning_rate": 1e-05, "loss": 0.585, "step": 520 }, { "epoch": 0.06427536008389106, "grad_norm": 0.5899717807769775, "learning_rate": 1e-05, "loss": 0.5815, "step": 521 }, { "epoch": 0.06439872929710391, "grad_norm": 0.6287236213684082, "learning_rate": 1e-05, "loss": 0.6369, "step": 522 }, { "epoch": 0.06452209851031675, "grad_norm": 0.5284711718559265, "learning_rate": 1e-05, "loss": 0.5077, "step": 523 }, { "epoch": 0.0646454677235296, "grad_norm": 0.5855197906494141, "learning_rate": 1e-05, "loss": 0.6139, "step": 524 }, { "epoch": 0.06476883693674243, "grad_norm": 0.542837381362915, "learning_rate": 1e-05, "loss": 0.6014, "step": 525 }, { "epoch": 0.06489220614995528, "grad_norm": 0.6514118313789368, "learning_rate": 1e-05, "loss": 0.6981, "step": 526 }, { "epoch": 0.06501557536316813, "grad_norm": 0.586581826210022, "learning_rate": 1e-05, "loss": 0.5892, "step": 527 }, { "epoch": 0.06513894457638096, "grad_norm": 0.6185070276260376, "learning_rate": 1e-05, "loss": 0.5592, "step": 528 }, { "epoch": 0.06526231378959381, "grad_norm": 0.6097654104232788, "learning_rate": 1e-05, "loss": 0.6593, "step": 529 }, { "epoch": 0.06538568300280664, "grad_norm": 0.5564464926719666, "learning_rate": 1e-05, "loss": 0.5915, "step": 530 }, { "epoch": 0.06550905221601949, "grad_norm": 0.5844548940658569, "learning_rate": 1e-05, "loss": 0.5715, "step": 531 }, { "epoch": 0.06563242142923234, "grad_norm": 0.5787303447723389, "learning_rate": 1e-05, "loss": 0.6092, "step": 532 }, { "epoch": 0.06575579064244517, "grad_norm": 0.5522416830062866, "learning_rate": 1e-05, "loss": 0.6101, "step": 533 }, { "epoch": 0.06587915985565802, "grad_norm": 0.6233509182929993, "learning_rate": 1e-05, "loss": 0.6794, "step": 534 }, { "epoch": 0.06600252906887086, "grad_norm": 0.6257951259613037, "learning_rate": 1e-05, "loss": 0.6526, "step": 535 }, { "epoch": 0.0661258982820837, "grad_norm": 0.6403011083602905, "learning_rate": 1e-05, "loss": 0.6523, "step": 536 }, { "epoch": 0.06624926749529655, "grad_norm": 0.618105947971344, "learning_rate": 1e-05, "loss": 0.6708, "step": 537 }, { "epoch": 0.06637263670850939, "grad_norm": 0.6575707197189331, "learning_rate": 1e-05, "loss": 0.6173, "step": 538 }, { "epoch": 0.06649600592172224, "grad_norm": 0.6284185647964478, "learning_rate": 1e-05, "loss": 0.6704, "step": 539 }, { "epoch": 0.06661937513493507, "grad_norm": 0.5916041731834412, "learning_rate": 1e-05, "loss": 0.6185, "step": 540 }, { "epoch": 0.06674274434814792, "grad_norm": 0.5972467064857483, "learning_rate": 1e-05, "loss": 0.5771, "step": 541 }, { "epoch": 0.06686611356136077, "grad_norm": 0.5867637991905212, "learning_rate": 1e-05, "loss": 0.6153, "step": 542 }, { "epoch": 0.0669894827745736, "grad_norm": 0.6211584806442261, "learning_rate": 1e-05, "loss": 0.6214, "step": 543 }, { "epoch": 0.06711285198778645, "grad_norm": 0.6831349730491638, "learning_rate": 1e-05, "loss": 0.74, "step": 544 }, { "epoch": 0.06723622120099929, "grad_norm": 0.5687799453735352, "learning_rate": 1e-05, "loss": 0.635, "step": 545 }, { "epoch": 0.06735959041421213, "grad_norm": 0.668211817741394, "learning_rate": 1e-05, "loss": 0.6793, "step": 546 }, { "epoch": 0.06748295962742498, "grad_norm": 0.588466465473175, "learning_rate": 1e-05, "loss": 0.663, "step": 547 }, { "epoch": 0.06760632884063782, "grad_norm": 0.6479374170303345, "learning_rate": 1e-05, "loss": 0.6799, "step": 548 }, { "epoch": 0.06772969805385066, "grad_norm": 0.6971288919448853, "learning_rate": 1e-05, "loss": 0.726, "step": 549 }, { "epoch": 0.0678530672670635, "grad_norm": 0.573499858379364, "learning_rate": 1e-05, "loss": 0.6516, "step": 550 }, { "epoch": 0.06797643648027635, "grad_norm": 0.5689913034439087, "learning_rate": 1e-05, "loss": 0.5506, "step": 551 }, { "epoch": 0.0680998056934892, "grad_norm": 0.6492893695831299, "learning_rate": 1e-05, "loss": 0.6655, "step": 552 }, { "epoch": 0.06822317490670203, "grad_norm": 0.5753166079521179, "learning_rate": 1e-05, "loss": 0.5672, "step": 553 }, { "epoch": 0.06834654411991488, "grad_norm": 0.62542325258255, "learning_rate": 1e-05, "loss": 0.6577, "step": 554 }, { "epoch": 0.06846991333312771, "grad_norm": 0.5876516103744507, "learning_rate": 1e-05, "loss": 0.637, "step": 555 }, { "epoch": 0.06859328254634056, "grad_norm": 0.5698797106742859, "learning_rate": 1e-05, "loss": 0.5469, "step": 556 }, { "epoch": 0.06871665175955341, "grad_norm": 0.5861631631851196, "learning_rate": 1e-05, "loss": 0.5783, "step": 557 }, { "epoch": 0.06884002097276624, "grad_norm": 0.5518393516540527, "learning_rate": 1e-05, "loss": 0.6117, "step": 558 }, { "epoch": 0.06896339018597909, "grad_norm": 0.5484036803245544, "learning_rate": 1e-05, "loss": 0.5747, "step": 559 }, { "epoch": 0.06908675939919193, "grad_norm": 0.6680779457092285, "learning_rate": 1e-05, "loss": 0.6615, "step": 560 }, { "epoch": 0.06921012861240478, "grad_norm": 0.5571660399436951, "learning_rate": 1e-05, "loss": 0.6108, "step": 561 }, { "epoch": 0.06933349782561762, "grad_norm": 0.5886970162391663, "learning_rate": 1e-05, "loss": 0.5613, "step": 562 }, { "epoch": 0.06945686703883046, "grad_norm": 0.5548239350318909, "learning_rate": 1e-05, "loss": 0.5188, "step": 563 }, { "epoch": 0.0695802362520433, "grad_norm": 0.6944438219070435, "learning_rate": 1e-05, "loss": 0.8668, "step": 564 }, { "epoch": 0.06970360546525614, "grad_norm": 0.5893566012382507, "learning_rate": 1e-05, "loss": 0.5939, "step": 565 }, { "epoch": 0.06982697467846899, "grad_norm": 0.5982200503349304, "learning_rate": 1e-05, "loss": 0.5891, "step": 566 }, { "epoch": 0.06995034389168184, "grad_norm": 0.5256538987159729, "learning_rate": 1e-05, "loss": 0.4832, "step": 567 }, { "epoch": 0.07007371310489467, "grad_norm": 0.6923106908798218, "learning_rate": 1e-05, "loss": 0.7155, "step": 568 }, { "epoch": 0.07019708231810752, "grad_norm": 0.5877748131752014, "learning_rate": 1e-05, "loss": 0.6436, "step": 569 }, { "epoch": 0.07032045153132035, "grad_norm": 0.5676082968711853, "learning_rate": 1e-05, "loss": 0.5453, "step": 570 }, { "epoch": 0.0704438207445332, "grad_norm": 0.7360358834266663, "learning_rate": 1e-05, "loss": 0.721, "step": 571 }, { "epoch": 0.07056718995774604, "grad_norm": 0.588007390499115, "learning_rate": 1e-05, "loss": 0.6209, "step": 572 }, { "epoch": 0.07069055917095889, "grad_norm": 0.5384306311607361, "learning_rate": 1e-05, "loss": 0.5076, "step": 573 }, { "epoch": 0.07081392838417173, "grad_norm": 0.6339231729507446, "learning_rate": 1e-05, "loss": 0.642, "step": 574 }, { "epoch": 0.07093729759738457, "grad_norm": 0.548627495765686, "learning_rate": 1e-05, "loss": 0.5702, "step": 575 }, { "epoch": 0.07106066681059742, "grad_norm": 0.5945917367935181, "learning_rate": 1e-05, "loss": 0.6715, "step": 576 }, { "epoch": 0.07118403602381025, "grad_norm": 0.5856098532676697, "learning_rate": 1e-05, "loss": 0.5767, "step": 577 }, { "epoch": 0.0713074052370231, "grad_norm": 0.5592419505119324, "learning_rate": 1e-05, "loss": 0.6471, "step": 578 }, { "epoch": 0.07143077445023595, "grad_norm": 0.560768187046051, "learning_rate": 1e-05, "loss": 0.586, "step": 579 }, { "epoch": 0.07155414366344878, "grad_norm": 0.5930595397949219, "learning_rate": 1e-05, "loss": 0.6178, "step": 580 }, { "epoch": 0.07167751287666163, "grad_norm": 0.6343058347702026, "learning_rate": 1e-05, "loss": 0.6924, "step": 581 }, { "epoch": 0.07180088208987447, "grad_norm": 0.5597347021102905, "learning_rate": 1e-05, "loss": 0.5637, "step": 582 }, { "epoch": 0.07192425130308731, "grad_norm": 0.6443220973014832, "learning_rate": 1e-05, "loss": 0.7048, "step": 583 }, { "epoch": 0.07204762051630016, "grad_norm": 0.5480485558509827, "learning_rate": 1e-05, "loss": 0.5677, "step": 584 }, { "epoch": 0.072170989729513, "grad_norm": 0.5937134623527527, "learning_rate": 1e-05, "loss": 0.6582, "step": 585 }, { "epoch": 0.07229435894272584, "grad_norm": 0.5461417436599731, "learning_rate": 1e-05, "loss": 0.546, "step": 586 }, { "epoch": 0.07241772815593868, "grad_norm": 0.6008235812187195, "learning_rate": 1e-05, "loss": 0.5525, "step": 587 }, { "epoch": 0.07254109736915153, "grad_norm": 0.5734827518463135, "learning_rate": 1e-05, "loss": 0.579, "step": 588 }, { "epoch": 0.07266446658236438, "grad_norm": 0.6431339979171753, "learning_rate": 1e-05, "loss": 0.6787, "step": 589 }, { "epoch": 0.07278783579557721, "grad_norm": 0.5991437435150146, "learning_rate": 1e-05, "loss": 0.6195, "step": 590 }, { "epoch": 0.07291120500879006, "grad_norm": 0.5817320942878723, "learning_rate": 1e-05, "loss": 0.728, "step": 591 }, { "epoch": 0.0730345742220029, "grad_norm": 0.5802493691444397, "learning_rate": 1e-05, "loss": 0.6363, "step": 592 }, { "epoch": 0.07315794343521574, "grad_norm": 0.5973821878433228, "learning_rate": 1e-05, "loss": 0.6175, "step": 593 }, { "epoch": 0.07328131264842859, "grad_norm": 0.6116293668746948, "learning_rate": 1e-05, "loss": 0.6082, "step": 594 }, { "epoch": 0.07340468186164142, "grad_norm": 0.5912976861000061, "learning_rate": 1e-05, "loss": 0.5355, "step": 595 }, { "epoch": 0.07352805107485427, "grad_norm": 0.5989688634872437, "learning_rate": 1e-05, "loss": 0.6872, "step": 596 }, { "epoch": 0.07365142028806711, "grad_norm": 0.5541938543319702, "learning_rate": 1e-05, "loss": 0.5482, "step": 597 }, { "epoch": 0.07377478950127996, "grad_norm": 0.643241822719574, "learning_rate": 1e-05, "loss": 0.7039, "step": 598 }, { "epoch": 0.0738981587144928, "grad_norm": 0.5476577281951904, "learning_rate": 1e-05, "loss": 0.5689, "step": 599 }, { "epoch": 0.07402152792770564, "grad_norm": 0.6018733978271484, "learning_rate": 1e-05, "loss": 0.5722, "step": 600 }, { "epoch": 0.07414489714091849, "grad_norm": 0.6177645325660706, "learning_rate": 1e-05, "loss": 0.7022, "step": 601 }, { "epoch": 0.07426826635413132, "grad_norm": 0.6192421913146973, "learning_rate": 1e-05, "loss": 0.6203, "step": 602 }, { "epoch": 0.07439163556734417, "grad_norm": 0.5606722235679626, "learning_rate": 1e-05, "loss": 0.5353, "step": 603 }, { "epoch": 0.07451500478055702, "grad_norm": 0.6411992907524109, "learning_rate": 1e-05, "loss": 0.6746, "step": 604 }, { "epoch": 0.07463837399376985, "grad_norm": 0.5681835412979126, "learning_rate": 1e-05, "loss": 0.5461, "step": 605 }, { "epoch": 0.0747617432069827, "grad_norm": 0.6031200885772705, "learning_rate": 1e-05, "loss": 0.5449, "step": 606 }, { "epoch": 0.07488511242019553, "grad_norm": 0.5831257104873657, "learning_rate": 1e-05, "loss": 0.6314, "step": 607 }, { "epoch": 0.07500848163340838, "grad_norm": 0.5519315004348755, "learning_rate": 1e-05, "loss": 0.5801, "step": 608 }, { "epoch": 0.07513185084662123, "grad_norm": 0.5735523104667664, "learning_rate": 1e-05, "loss": 0.5945, "step": 609 }, { "epoch": 0.07525522005983407, "grad_norm": 0.5499991774559021, "learning_rate": 1e-05, "loss": 0.6212, "step": 610 }, { "epoch": 0.07537858927304691, "grad_norm": 0.5344164371490479, "learning_rate": 1e-05, "loss": 0.5381, "step": 611 }, { "epoch": 0.07550195848625975, "grad_norm": 0.6408299803733826, "learning_rate": 1e-05, "loss": 0.54, "step": 612 }, { "epoch": 0.0756253276994726, "grad_norm": 0.6531967520713806, "learning_rate": 1e-05, "loss": 0.6282, "step": 613 }, { "epoch": 0.07574869691268545, "grad_norm": 0.5882022976875305, "learning_rate": 1e-05, "loss": 0.5757, "step": 614 }, { "epoch": 0.07587206612589828, "grad_norm": 0.5525880455970764, "learning_rate": 1e-05, "loss": 0.6258, "step": 615 }, { "epoch": 0.07599543533911113, "grad_norm": 0.6078624725341797, "learning_rate": 1e-05, "loss": 0.5531, "step": 616 }, { "epoch": 0.07611880455232396, "grad_norm": 0.721813976764679, "learning_rate": 1e-05, "loss": 0.7744, "step": 617 }, { "epoch": 0.07624217376553681, "grad_norm": 0.5249733328819275, "learning_rate": 1e-05, "loss": 0.5207, "step": 618 }, { "epoch": 0.07636554297874966, "grad_norm": 0.6329979300498962, "learning_rate": 1e-05, "loss": 0.6812, "step": 619 }, { "epoch": 0.0764889121919625, "grad_norm": 0.6271072030067444, "learning_rate": 1e-05, "loss": 0.6948, "step": 620 }, { "epoch": 0.07661228140517534, "grad_norm": 0.5209800004959106, "learning_rate": 1e-05, "loss": 0.5252, "step": 621 }, { "epoch": 0.07673565061838818, "grad_norm": 0.6072545647621155, "learning_rate": 1e-05, "loss": 0.5921, "step": 622 }, { "epoch": 0.07685901983160102, "grad_norm": 0.5913287997245789, "learning_rate": 1e-05, "loss": 0.5551, "step": 623 }, { "epoch": 0.07698238904481387, "grad_norm": 0.5636013150215149, "learning_rate": 1e-05, "loss": 0.5749, "step": 624 }, { "epoch": 0.07710575825802671, "grad_norm": 0.5855698585510254, "learning_rate": 1e-05, "loss": 0.579, "step": 625 }, { "epoch": 0.07722912747123956, "grad_norm": 0.6431542634963989, "learning_rate": 1e-05, "loss": 0.5412, "step": 626 }, { "epoch": 0.07735249668445239, "grad_norm": 0.5876504182815552, "learning_rate": 1e-05, "loss": 0.6018, "step": 627 }, { "epoch": 0.07747586589766524, "grad_norm": 0.5829225182533264, "learning_rate": 1e-05, "loss": 0.5911, "step": 628 }, { "epoch": 0.07759923511087809, "grad_norm": 0.6078658699989319, "learning_rate": 1e-05, "loss": 0.6116, "step": 629 }, { "epoch": 0.07772260432409092, "grad_norm": 0.5801270604133606, "learning_rate": 1e-05, "loss": 0.5638, "step": 630 }, { "epoch": 0.07784597353730377, "grad_norm": 0.6018309593200684, "learning_rate": 1e-05, "loss": 0.5718, "step": 631 }, { "epoch": 0.0779693427505166, "grad_norm": 0.5305702090263367, "learning_rate": 1e-05, "loss": 0.5401, "step": 632 }, { "epoch": 0.07809271196372945, "grad_norm": 0.5505676865577698, "learning_rate": 1e-05, "loss": 0.5975, "step": 633 }, { "epoch": 0.0782160811769423, "grad_norm": 0.6111274361610413, "learning_rate": 1e-05, "loss": 0.6552, "step": 634 }, { "epoch": 0.07833945039015514, "grad_norm": 0.619265615940094, "learning_rate": 1e-05, "loss": 0.5602, "step": 635 }, { "epoch": 0.07846281960336798, "grad_norm": 0.5853444933891296, "learning_rate": 1e-05, "loss": 0.5589, "step": 636 }, { "epoch": 0.07858618881658082, "grad_norm": 0.6206921935081482, "learning_rate": 1e-05, "loss": 0.6425, "step": 637 }, { "epoch": 0.07870955802979367, "grad_norm": 0.613926351070404, "learning_rate": 1e-05, "loss": 0.5959, "step": 638 }, { "epoch": 0.0788329272430065, "grad_norm": 0.5937105417251587, "learning_rate": 1e-05, "loss": 0.5833, "step": 639 }, { "epoch": 0.07895629645621935, "grad_norm": 0.5491421818733215, "learning_rate": 1e-05, "loss": 0.5347, "step": 640 }, { "epoch": 0.0790796656694322, "grad_norm": 0.6089139580726624, "learning_rate": 1e-05, "loss": 0.697, "step": 641 }, { "epoch": 0.07920303488264503, "grad_norm": 0.5694493055343628, "learning_rate": 1e-05, "loss": 0.6366, "step": 642 }, { "epoch": 0.07932640409585788, "grad_norm": 0.6560913324356079, "learning_rate": 1e-05, "loss": 0.6728, "step": 643 }, { "epoch": 0.07944977330907071, "grad_norm": 0.6001807451248169, "learning_rate": 1e-05, "loss": 0.5622, "step": 644 }, { "epoch": 0.07957314252228356, "grad_norm": 0.5621477961540222, "learning_rate": 1e-05, "loss": 0.5754, "step": 645 }, { "epoch": 0.07969651173549641, "grad_norm": 0.6148655414581299, "learning_rate": 1e-05, "loss": 0.6841, "step": 646 }, { "epoch": 0.07981988094870925, "grad_norm": 0.5724589228630066, "learning_rate": 1e-05, "loss": 0.595, "step": 647 }, { "epoch": 0.0799432501619221, "grad_norm": 0.5899855494499207, "learning_rate": 1e-05, "loss": 0.6054, "step": 648 }, { "epoch": 0.08006661937513493, "grad_norm": 0.6030654907226562, "learning_rate": 1e-05, "loss": 0.5416, "step": 649 }, { "epoch": 0.08018998858834778, "grad_norm": 0.6599435806274414, "learning_rate": 1e-05, "loss": 0.6673, "step": 650 }, { "epoch": 0.08031335780156063, "grad_norm": 0.6236377358436584, "learning_rate": 1e-05, "loss": 0.6597, "step": 651 }, { "epoch": 0.08043672701477346, "grad_norm": 0.6875399947166443, "learning_rate": 1e-05, "loss": 0.6948, "step": 652 }, { "epoch": 0.08056009622798631, "grad_norm": 0.5077522993087769, "learning_rate": 1e-05, "loss": 0.4523, "step": 653 }, { "epoch": 0.08068346544119914, "grad_norm": 0.6527911424636841, "learning_rate": 1e-05, "loss": 0.6682, "step": 654 }, { "epoch": 0.08080683465441199, "grad_norm": 0.5701897144317627, "learning_rate": 1e-05, "loss": 0.5875, "step": 655 }, { "epoch": 0.08093020386762484, "grad_norm": 0.6189697980880737, "learning_rate": 1e-05, "loss": 0.6449, "step": 656 }, { "epoch": 0.08105357308083767, "grad_norm": 0.6126153469085693, "learning_rate": 1e-05, "loss": 0.6245, "step": 657 }, { "epoch": 0.08117694229405052, "grad_norm": 0.6946678161621094, "learning_rate": 1e-05, "loss": 0.6589, "step": 658 }, { "epoch": 0.08130031150726336, "grad_norm": 0.5692234635353088, "learning_rate": 1e-05, "loss": 0.5708, "step": 659 }, { "epoch": 0.0814236807204762, "grad_norm": 0.5885841250419617, "learning_rate": 1e-05, "loss": 0.5281, "step": 660 }, { "epoch": 0.08154704993368905, "grad_norm": 0.6205266118049622, "learning_rate": 1e-05, "loss": 0.5003, "step": 661 }, { "epoch": 0.08167041914690189, "grad_norm": 0.5708321332931519, "learning_rate": 1e-05, "loss": 0.5762, "step": 662 }, { "epoch": 0.08179378836011474, "grad_norm": 0.5459261536598206, "learning_rate": 1e-05, "loss": 0.5389, "step": 663 }, { "epoch": 0.08191715757332757, "grad_norm": 0.5930382013320923, "learning_rate": 1e-05, "loss": 0.5828, "step": 664 }, { "epoch": 0.08204052678654042, "grad_norm": 0.5375136733055115, "learning_rate": 1e-05, "loss": 0.6234, "step": 665 }, { "epoch": 0.08216389599975327, "grad_norm": 0.6988787651062012, "learning_rate": 1e-05, "loss": 0.6365, "step": 666 }, { "epoch": 0.0822872652129661, "grad_norm": 0.6314388513565063, "learning_rate": 1e-05, "loss": 0.7038, "step": 667 }, { "epoch": 0.08241063442617895, "grad_norm": 0.583640992641449, "learning_rate": 1e-05, "loss": 0.6387, "step": 668 }, { "epoch": 0.08253400363939178, "grad_norm": 0.6306531429290771, "learning_rate": 1e-05, "loss": 0.6412, "step": 669 }, { "epoch": 0.08265737285260463, "grad_norm": 0.7175393104553223, "learning_rate": 1e-05, "loss": 0.6948, "step": 670 }, { "epoch": 0.08278074206581748, "grad_norm": 0.6012209057807922, "learning_rate": 1e-05, "loss": 0.5295, "step": 671 }, { "epoch": 0.08290411127903032, "grad_norm": 0.5297471284866333, "learning_rate": 1e-05, "loss": 0.5505, "step": 672 }, { "epoch": 0.08302748049224316, "grad_norm": 0.6037614941596985, "learning_rate": 1e-05, "loss": 0.5414, "step": 673 }, { "epoch": 0.083150849705456, "grad_norm": 0.5912021398544312, "learning_rate": 1e-05, "loss": 0.6041, "step": 674 }, { "epoch": 0.08327421891866885, "grad_norm": 0.5735694766044617, "learning_rate": 1e-05, "loss": 0.5689, "step": 675 }, { "epoch": 0.0833975881318817, "grad_norm": 0.5949263572692871, "learning_rate": 1e-05, "loss": 0.61, "step": 676 }, { "epoch": 0.08352095734509453, "grad_norm": 0.5633903741836548, "learning_rate": 1e-05, "loss": 0.5531, "step": 677 }, { "epoch": 0.08364432655830738, "grad_norm": 0.601646900177002, "learning_rate": 1e-05, "loss": 0.6096, "step": 678 }, { "epoch": 0.08376769577152021, "grad_norm": 0.5840855240821838, "learning_rate": 1e-05, "loss": 0.5937, "step": 679 }, { "epoch": 0.08389106498473306, "grad_norm": 0.6945694088935852, "learning_rate": 1e-05, "loss": 0.6854, "step": 680 }, { "epoch": 0.08401443419794591, "grad_norm": 0.6327170729637146, "learning_rate": 1e-05, "loss": 0.6127, "step": 681 }, { "epoch": 0.08413780341115874, "grad_norm": 0.5525254011154175, "learning_rate": 1e-05, "loss": 0.5597, "step": 682 }, { "epoch": 0.08426117262437159, "grad_norm": 0.584862232208252, "learning_rate": 1e-05, "loss": 0.5915, "step": 683 }, { "epoch": 0.08438454183758443, "grad_norm": 0.630394697189331, "learning_rate": 1e-05, "loss": 0.7061, "step": 684 }, { "epoch": 0.08450791105079727, "grad_norm": 0.6141555905342102, "learning_rate": 1e-05, "loss": 0.5946, "step": 685 }, { "epoch": 0.08463128026401012, "grad_norm": 0.6037793755531311, "learning_rate": 1e-05, "loss": 0.6543, "step": 686 }, { "epoch": 0.08475464947722296, "grad_norm": 0.6608875393867493, "learning_rate": 1e-05, "loss": 0.6815, "step": 687 }, { "epoch": 0.0848780186904358, "grad_norm": 0.6874929070472717, "learning_rate": 1e-05, "loss": 0.6745, "step": 688 }, { "epoch": 0.08500138790364864, "grad_norm": 0.5844847559928894, "learning_rate": 1e-05, "loss": 0.546, "step": 689 }, { "epoch": 0.08512475711686149, "grad_norm": 0.5287410616874695, "learning_rate": 1e-05, "loss": 0.5332, "step": 690 }, { "epoch": 0.08524812633007434, "grad_norm": 0.5067606568336487, "learning_rate": 1e-05, "loss": 0.4955, "step": 691 }, { "epoch": 0.08537149554328717, "grad_norm": 0.5880190134048462, "learning_rate": 1e-05, "loss": 0.5984, "step": 692 }, { "epoch": 0.08549486475650002, "grad_norm": 0.6293211579322815, "learning_rate": 1e-05, "loss": 0.6271, "step": 693 }, { "epoch": 0.08561823396971285, "grad_norm": 0.5515981912612915, "learning_rate": 1e-05, "loss": 0.5254, "step": 694 }, { "epoch": 0.0857416031829257, "grad_norm": 0.6555203199386597, "learning_rate": 1e-05, "loss": 0.6847, "step": 695 }, { "epoch": 0.08586497239613855, "grad_norm": 0.678251326084137, "learning_rate": 1e-05, "loss": 0.7853, "step": 696 }, { "epoch": 0.08598834160935138, "grad_norm": 0.5612874627113342, "learning_rate": 1e-05, "loss": 0.6155, "step": 697 }, { "epoch": 0.08611171082256423, "grad_norm": 0.5569027662277222, "learning_rate": 1e-05, "loss": 0.5397, "step": 698 }, { "epoch": 0.08623508003577707, "grad_norm": 0.5492419600486755, "learning_rate": 1e-05, "loss": 0.5378, "step": 699 }, { "epoch": 0.08635844924898992, "grad_norm": 0.6159240007400513, "learning_rate": 1e-05, "loss": 0.5682, "step": 700 }, { "epoch": 0.08648181846220276, "grad_norm": 0.5254884362220764, "learning_rate": 1e-05, "loss": 0.5072, "step": 701 }, { "epoch": 0.0866051876754156, "grad_norm": 0.6125096082687378, "learning_rate": 1e-05, "loss": 0.5901, "step": 702 }, { "epoch": 0.08672855688862845, "grad_norm": 0.5549134612083435, "learning_rate": 1e-05, "loss": 0.5678, "step": 703 }, { "epoch": 0.08685192610184128, "grad_norm": 0.6323354244232178, "learning_rate": 1e-05, "loss": 0.5844, "step": 704 }, { "epoch": 0.08697529531505413, "grad_norm": 0.6141095161437988, "learning_rate": 1e-05, "loss": 0.6665, "step": 705 }, { "epoch": 0.08709866452826696, "grad_norm": 0.6008206009864807, "learning_rate": 1e-05, "loss": 0.5269, "step": 706 }, { "epoch": 0.08722203374147981, "grad_norm": 0.5797525644302368, "learning_rate": 1e-05, "loss": 0.5499, "step": 707 }, { "epoch": 0.08734540295469266, "grad_norm": 0.5477797985076904, "learning_rate": 1e-05, "loss": 0.5183, "step": 708 }, { "epoch": 0.0874687721679055, "grad_norm": 0.6207008957862854, "learning_rate": 1e-05, "loss": 0.6708, "step": 709 }, { "epoch": 0.08759214138111834, "grad_norm": 0.6822627186775208, "learning_rate": 1e-05, "loss": 0.5398, "step": 710 }, { "epoch": 0.08771551059433118, "grad_norm": 0.6982694268226624, "learning_rate": 1e-05, "loss": 0.7406, "step": 711 }, { "epoch": 0.08783887980754403, "grad_norm": 0.6586421728134155, "learning_rate": 1e-05, "loss": 0.6473, "step": 712 }, { "epoch": 0.08796224902075687, "grad_norm": 0.5956459641456604, "learning_rate": 1e-05, "loss": 0.5719, "step": 713 }, { "epoch": 0.08808561823396971, "grad_norm": 0.6696299314498901, "learning_rate": 1e-05, "loss": 0.6299, "step": 714 }, { "epoch": 0.08820898744718256, "grad_norm": 0.6080349087715149, "learning_rate": 1e-05, "loss": 0.5023, "step": 715 }, { "epoch": 0.08833235666039539, "grad_norm": 0.5688741207122803, "learning_rate": 1e-05, "loss": 0.542, "step": 716 }, { "epoch": 0.08845572587360824, "grad_norm": 0.6608965992927551, "learning_rate": 1e-05, "loss": 0.5977, "step": 717 }, { "epoch": 0.08857909508682109, "grad_norm": 0.6230865716934204, "learning_rate": 1e-05, "loss": 0.5727, "step": 718 }, { "epoch": 0.08870246430003392, "grad_norm": 0.6461156010627747, "learning_rate": 1e-05, "loss": 0.7045, "step": 719 }, { "epoch": 0.08882583351324677, "grad_norm": 0.619427502155304, "learning_rate": 1e-05, "loss": 0.7185, "step": 720 }, { "epoch": 0.0889492027264596, "grad_norm": 0.6230922341346741, "learning_rate": 1e-05, "loss": 0.6876, "step": 721 }, { "epoch": 0.08907257193967245, "grad_norm": 0.6335686445236206, "learning_rate": 1e-05, "loss": 0.5627, "step": 722 }, { "epoch": 0.0891959411528853, "grad_norm": 0.5229864716529846, "learning_rate": 1e-05, "loss": 0.5058, "step": 723 }, { "epoch": 0.08931931036609814, "grad_norm": 0.5650851130485535, "learning_rate": 1e-05, "loss": 0.5151, "step": 724 }, { "epoch": 0.08944267957931099, "grad_norm": 0.6599282622337341, "learning_rate": 1e-05, "loss": 0.6455, "step": 725 }, { "epoch": 0.08956604879252382, "grad_norm": 0.622596800327301, "learning_rate": 1e-05, "loss": 0.651, "step": 726 }, { "epoch": 0.08968941800573667, "grad_norm": 0.5408441424369812, "learning_rate": 1e-05, "loss": 0.6374, "step": 727 }, { "epoch": 0.08981278721894952, "grad_norm": 0.6182026863098145, "learning_rate": 1e-05, "loss": 0.6238, "step": 728 }, { "epoch": 0.08993615643216235, "grad_norm": 0.5875959992408752, "learning_rate": 1e-05, "loss": 0.5721, "step": 729 }, { "epoch": 0.0900595256453752, "grad_norm": 0.5507059097290039, "learning_rate": 1e-05, "loss": 0.609, "step": 730 }, { "epoch": 0.09018289485858803, "grad_norm": 0.530573844909668, "learning_rate": 1e-05, "loss": 0.4713, "step": 731 }, { "epoch": 0.09030626407180088, "grad_norm": 0.5410104393959045, "learning_rate": 1e-05, "loss": 0.5368, "step": 732 }, { "epoch": 0.09042963328501373, "grad_norm": 0.5872105956077576, "learning_rate": 1e-05, "loss": 0.5832, "step": 733 }, { "epoch": 0.09055300249822656, "grad_norm": 0.5455322265625, "learning_rate": 1e-05, "loss": 0.534, "step": 734 }, { "epoch": 0.09067637171143941, "grad_norm": 0.5801419019699097, "learning_rate": 1e-05, "loss": 0.6327, "step": 735 }, { "epoch": 0.09079974092465225, "grad_norm": 0.5480952858924866, "learning_rate": 1e-05, "loss": 0.5201, "step": 736 }, { "epoch": 0.0909231101378651, "grad_norm": 0.6108790636062622, "learning_rate": 1e-05, "loss": 0.5899, "step": 737 }, { "epoch": 0.09104647935107794, "grad_norm": 0.6489862203598022, "learning_rate": 1e-05, "loss": 0.6967, "step": 738 }, { "epoch": 0.09116984856429078, "grad_norm": 0.5327072739601135, "learning_rate": 1e-05, "loss": 0.4779, "step": 739 }, { "epoch": 0.09129321777750363, "grad_norm": 0.635067343711853, "learning_rate": 1e-05, "loss": 0.6132, "step": 740 }, { "epoch": 0.09141658699071646, "grad_norm": 0.5768219828605652, "learning_rate": 1e-05, "loss": 0.6318, "step": 741 }, { "epoch": 0.09153995620392931, "grad_norm": 0.6005128026008606, "learning_rate": 1e-05, "loss": 0.6816, "step": 742 }, { "epoch": 0.09166332541714216, "grad_norm": 0.5563594102859497, "learning_rate": 1e-05, "loss": 0.5586, "step": 743 }, { "epoch": 0.09178669463035499, "grad_norm": 0.6517925262451172, "learning_rate": 1e-05, "loss": 0.647, "step": 744 }, { "epoch": 0.09191006384356784, "grad_norm": 0.6177546381950378, "learning_rate": 1e-05, "loss": 0.707, "step": 745 }, { "epoch": 0.09203343305678068, "grad_norm": 0.5998350381851196, "learning_rate": 1e-05, "loss": 0.6506, "step": 746 }, { "epoch": 0.09215680226999352, "grad_norm": 0.5591738224029541, "learning_rate": 1e-05, "loss": 0.5281, "step": 747 }, { "epoch": 0.09228017148320637, "grad_norm": 0.5970093011856079, "learning_rate": 1e-05, "loss": 0.5709, "step": 748 }, { "epoch": 0.0924035406964192, "grad_norm": 0.6590249538421631, "learning_rate": 1e-05, "loss": 0.6963, "step": 749 }, { "epoch": 0.09252690990963205, "grad_norm": 0.5259758234024048, "learning_rate": 1e-05, "loss": 0.5273, "step": 750 }, { "epoch": 0.09265027912284489, "grad_norm": 0.6334772109985352, "learning_rate": 1e-05, "loss": 0.6814, "step": 751 }, { "epoch": 0.09277364833605774, "grad_norm": 0.5974390506744385, "learning_rate": 1e-05, "loss": 0.716, "step": 752 }, { "epoch": 0.09289701754927059, "grad_norm": 0.7123695611953735, "learning_rate": 1e-05, "loss": 0.7189, "step": 753 }, { "epoch": 0.09302038676248342, "grad_norm": 0.6688754558563232, "learning_rate": 1e-05, "loss": 0.6208, "step": 754 }, { "epoch": 0.09314375597569627, "grad_norm": 0.5322921872138977, "learning_rate": 1e-05, "loss": 0.5619, "step": 755 }, { "epoch": 0.0932671251889091, "grad_norm": 0.6672219038009644, "learning_rate": 1e-05, "loss": 0.6341, "step": 756 }, { "epoch": 0.09339049440212195, "grad_norm": 0.5465327501296997, "learning_rate": 1e-05, "loss": 0.4856, "step": 757 }, { "epoch": 0.0935138636153348, "grad_norm": 0.66756671667099, "learning_rate": 1e-05, "loss": 0.7571, "step": 758 }, { "epoch": 0.09363723282854763, "grad_norm": 0.6068812608718872, "learning_rate": 1e-05, "loss": 0.6831, "step": 759 }, { "epoch": 0.09376060204176048, "grad_norm": 0.5622346997261047, "learning_rate": 1e-05, "loss": 0.5859, "step": 760 }, { "epoch": 0.09388397125497332, "grad_norm": 0.5368627905845642, "learning_rate": 1e-05, "loss": 0.5027, "step": 761 }, { "epoch": 0.09400734046818617, "grad_norm": 0.5501405000686646, "learning_rate": 1e-05, "loss": 0.5837, "step": 762 }, { "epoch": 0.09413070968139901, "grad_norm": 0.6154775023460388, "learning_rate": 1e-05, "loss": 0.628, "step": 763 }, { "epoch": 0.09425407889461185, "grad_norm": 0.4899398386478424, "learning_rate": 1e-05, "loss": 0.478, "step": 764 }, { "epoch": 0.0943774481078247, "grad_norm": 0.6209059357643127, "learning_rate": 1e-05, "loss": 0.6351, "step": 765 }, { "epoch": 0.09450081732103753, "grad_norm": 0.5760708451271057, "learning_rate": 1e-05, "loss": 0.6165, "step": 766 }, { "epoch": 0.09462418653425038, "grad_norm": 0.626979410648346, "learning_rate": 1e-05, "loss": 0.6562, "step": 767 }, { "epoch": 0.09474755574746323, "grad_norm": 0.6689379215240479, "learning_rate": 1e-05, "loss": 0.6581, "step": 768 }, { "epoch": 0.09487092496067606, "grad_norm": 0.5340315699577332, "learning_rate": 1e-05, "loss": 0.5538, "step": 769 }, { "epoch": 0.09499429417388891, "grad_norm": 0.577362596988678, "learning_rate": 1e-05, "loss": 0.5751, "step": 770 }, { "epoch": 0.09511766338710174, "grad_norm": 0.640321671962738, "learning_rate": 1e-05, "loss": 0.6969, "step": 771 }, { "epoch": 0.09524103260031459, "grad_norm": 0.7233144044876099, "learning_rate": 1e-05, "loss": 0.6273, "step": 772 }, { "epoch": 0.09536440181352743, "grad_norm": 0.5687932372093201, "learning_rate": 1e-05, "loss": 0.6103, "step": 773 }, { "epoch": 0.09548777102674028, "grad_norm": 0.5581019520759583, "learning_rate": 1e-05, "loss": 0.5887, "step": 774 }, { "epoch": 0.09561114023995312, "grad_norm": 0.6546663045883179, "learning_rate": 1e-05, "loss": 0.6757, "step": 775 }, { "epoch": 0.09573450945316596, "grad_norm": 0.6614390015602112, "learning_rate": 1e-05, "loss": 0.707, "step": 776 }, { "epoch": 0.0958578786663788, "grad_norm": 0.5513828992843628, "learning_rate": 1e-05, "loss": 0.5675, "step": 777 }, { "epoch": 0.09598124787959164, "grad_norm": 0.6027662754058838, "learning_rate": 1e-05, "loss": 0.5963, "step": 778 }, { "epoch": 0.09610461709280449, "grad_norm": 0.6994633674621582, "learning_rate": 1e-05, "loss": 0.7247, "step": 779 }, { "epoch": 0.09622798630601734, "grad_norm": 0.5471053123474121, "learning_rate": 1e-05, "loss": 0.5837, "step": 780 }, { "epoch": 0.09635135551923017, "grad_norm": 0.5646206140518188, "learning_rate": 1e-05, "loss": 0.5615, "step": 781 }, { "epoch": 0.09647472473244302, "grad_norm": 0.6041614413261414, "learning_rate": 1e-05, "loss": 0.6156, "step": 782 }, { "epoch": 0.09659809394565586, "grad_norm": 0.5765410661697388, "learning_rate": 1e-05, "loss": 0.6377, "step": 783 }, { "epoch": 0.0967214631588687, "grad_norm": 0.6108306050300598, "learning_rate": 1e-05, "loss": 0.6169, "step": 784 }, { "epoch": 0.09684483237208155, "grad_norm": 0.5354897379875183, "learning_rate": 1e-05, "loss": 0.5724, "step": 785 }, { "epoch": 0.09696820158529439, "grad_norm": 0.5637685060501099, "learning_rate": 1e-05, "loss": 0.5823, "step": 786 }, { "epoch": 0.09709157079850723, "grad_norm": 0.5444669723510742, "learning_rate": 1e-05, "loss": 0.5901, "step": 787 }, { "epoch": 0.09721494001172007, "grad_norm": 0.5757175087928772, "learning_rate": 1e-05, "loss": 0.6181, "step": 788 }, { "epoch": 0.09733830922493292, "grad_norm": 0.6197813153266907, "learning_rate": 1e-05, "loss": 0.5527, "step": 789 }, { "epoch": 0.09746167843814577, "grad_norm": 0.5797435641288757, "learning_rate": 1e-05, "loss": 0.5787, "step": 790 }, { "epoch": 0.0975850476513586, "grad_norm": 0.6014904975891113, "learning_rate": 1e-05, "loss": 0.6212, "step": 791 }, { "epoch": 0.09770841686457145, "grad_norm": 0.6408337950706482, "learning_rate": 1e-05, "loss": 0.7036, "step": 792 }, { "epoch": 0.09783178607778428, "grad_norm": 0.6363214254379272, "learning_rate": 1e-05, "loss": 0.6425, "step": 793 }, { "epoch": 0.09795515529099713, "grad_norm": 0.5610533952713013, "learning_rate": 1e-05, "loss": 0.6137, "step": 794 }, { "epoch": 0.09807852450420998, "grad_norm": 0.7234058380126953, "learning_rate": 1e-05, "loss": 0.6933, "step": 795 }, { "epoch": 0.09820189371742281, "grad_norm": 0.6144707798957825, "learning_rate": 1e-05, "loss": 0.6906, "step": 796 }, { "epoch": 0.09832526293063566, "grad_norm": 0.5454790592193604, "learning_rate": 1e-05, "loss": 0.5769, "step": 797 }, { "epoch": 0.0984486321438485, "grad_norm": 0.5839847326278687, "learning_rate": 1e-05, "loss": 0.6982, "step": 798 }, { "epoch": 0.09857200135706135, "grad_norm": 0.5895339846611023, "learning_rate": 1e-05, "loss": 0.6543, "step": 799 }, { "epoch": 0.0986953705702742, "grad_norm": 0.5322195291519165, "learning_rate": 1e-05, "loss": 0.5751, "step": 800 }, { "epoch": 0.09881873978348703, "grad_norm": 0.580163836479187, "learning_rate": 1e-05, "loss": 0.6287, "step": 801 }, { "epoch": 0.09894210899669988, "grad_norm": 0.5280058979988098, "learning_rate": 1e-05, "loss": 0.4853, "step": 802 }, { "epoch": 0.09906547820991271, "grad_norm": 0.572113037109375, "learning_rate": 1e-05, "loss": 0.5965, "step": 803 }, { "epoch": 0.09918884742312556, "grad_norm": 0.5329837203025818, "learning_rate": 1e-05, "loss": 0.5207, "step": 804 }, { "epoch": 0.09931221663633841, "grad_norm": 0.5440502762794495, "learning_rate": 1e-05, "loss": 0.561, "step": 805 }, { "epoch": 0.09943558584955124, "grad_norm": 0.6423857808113098, "learning_rate": 1e-05, "loss": 0.6626, "step": 806 }, { "epoch": 0.09955895506276409, "grad_norm": 0.5931563377380371, "learning_rate": 1e-05, "loss": 0.4935, "step": 807 }, { "epoch": 0.09968232427597692, "grad_norm": 0.6067647933959961, "learning_rate": 1e-05, "loss": 0.5822, "step": 808 }, { "epoch": 0.09980569348918977, "grad_norm": 0.5776715874671936, "learning_rate": 1e-05, "loss": 0.5667, "step": 809 }, { "epoch": 0.09992906270240262, "grad_norm": 0.6376082897186279, "learning_rate": 1e-05, "loss": 0.7262, "step": 810 }, { "epoch": 0.10005243191561546, "grad_norm": 0.7682474851608276, "learning_rate": 1e-05, "loss": 0.8356, "step": 811 }, { "epoch": 0.1001758011288283, "grad_norm": 0.5724799036979675, "learning_rate": 1e-05, "loss": 0.5947, "step": 812 }, { "epoch": 0.10029917034204114, "grad_norm": 0.5180864334106445, "learning_rate": 1e-05, "loss": 0.5472, "step": 813 }, { "epoch": 0.10042253955525399, "grad_norm": 0.573675274848938, "learning_rate": 1e-05, "loss": 0.6194, "step": 814 }, { "epoch": 0.10054590876846684, "grad_norm": 0.5456639528274536, "learning_rate": 1e-05, "loss": 0.4954, "step": 815 }, { "epoch": 0.10066927798167967, "grad_norm": 0.6414303183555603, "learning_rate": 1e-05, "loss": 0.6738, "step": 816 }, { "epoch": 0.10079264719489252, "grad_norm": 0.5449044108390808, "learning_rate": 1e-05, "loss": 0.5651, "step": 817 }, { "epoch": 0.10091601640810535, "grad_norm": 0.5167319774627686, "learning_rate": 1e-05, "loss": 0.52, "step": 818 }, { "epoch": 0.1010393856213182, "grad_norm": 0.6151095032691956, "learning_rate": 1e-05, "loss": 0.5925, "step": 819 }, { "epoch": 0.10116275483453105, "grad_norm": 0.6013595461845398, "learning_rate": 1e-05, "loss": 0.5201, "step": 820 }, { "epoch": 0.10128612404774388, "grad_norm": 0.5855554938316345, "learning_rate": 1e-05, "loss": 0.613, "step": 821 }, { "epoch": 0.10140949326095673, "grad_norm": 0.5782818794250488, "learning_rate": 1e-05, "loss": 0.6896, "step": 822 }, { "epoch": 0.10153286247416957, "grad_norm": 0.5760400891304016, "learning_rate": 1e-05, "loss": 0.4756, "step": 823 }, { "epoch": 0.10165623168738241, "grad_norm": 0.515282928943634, "learning_rate": 1e-05, "loss": 0.4925, "step": 824 }, { "epoch": 0.10177960090059526, "grad_norm": 0.6007592678070068, "learning_rate": 1e-05, "loss": 0.6716, "step": 825 }, { "epoch": 0.1019029701138081, "grad_norm": 0.6083133220672607, "learning_rate": 1e-05, "loss": 0.6406, "step": 826 }, { "epoch": 0.10202633932702095, "grad_norm": 0.5428780317306519, "learning_rate": 1e-05, "loss": 0.5247, "step": 827 }, { "epoch": 0.10214970854023378, "grad_norm": 0.6058477163314819, "learning_rate": 1e-05, "loss": 0.5622, "step": 828 }, { "epoch": 0.10227307775344663, "grad_norm": 0.5984196066856384, "learning_rate": 1e-05, "loss": 0.562, "step": 829 }, { "epoch": 0.10239644696665948, "grad_norm": 0.623558521270752, "learning_rate": 1e-05, "loss": 0.7266, "step": 830 }, { "epoch": 0.10251981617987231, "grad_norm": 0.6086697578430176, "learning_rate": 1e-05, "loss": 0.5748, "step": 831 }, { "epoch": 0.10264318539308516, "grad_norm": 0.6274095773696899, "learning_rate": 1e-05, "loss": 0.5783, "step": 832 }, { "epoch": 0.102766554606298, "grad_norm": 0.5972983837127686, "learning_rate": 1e-05, "loss": 0.5823, "step": 833 }, { "epoch": 0.10288992381951084, "grad_norm": 0.58349609375, "learning_rate": 1e-05, "loss": 0.5939, "step": 834 }, { "epoch": 0.10301329303272369, "grad_norm": 0.5649619102478027, "learning_rate": 1e-05, "loss": 0.5467, "step": 835 }, { "epoch": 0.10313666224593653, "grad_norm": 0.5884172320365906, "learning_rate": 1e-05, "loss": 0.5807, "step": 836 }, { "epoch": 0.10326003145914937, "grad_norm": 0.6378473043441772, "learning_rate": 1e-05, "loss": 0.6455, "step": 837 }, { "epoch": 0.10338340067236221, "grad_norm": 0.7278244495391846, "learning_rate": 1e-05, "loss": 0.7253, "step": 838 }, { "epoch": 0.10350676988557506, "grad_norm": 0.6098231673240662, "learning_rate": 1e-05, "loss": 0.5841, "step": 839 }, { "epoch": 0.10363013909878789, "grad_norm": 0.5758348703384399, "learning_rate": 1e-05, "loss": 0.5735, "step": 840 }, { "epoch": 0.10375350831200074, "grad_norm": 0.62794429063797, "learning_rate": 1e-05, "loss": 0.7251, "step": 841 }, { "epoch": 0.10387687752521359, "grad_norm": 0.6562005877494812, "learning_rate": 1e-05, "loss": 0.6777, "step": 842 }, { "epoch": 0.10400024673842642, "grad_norm": 0.5741284489631653, "learning_rate": 1e-05, "loss": 0.6746, "step": 843 }, { "epoch": 0.10412361595163927, "grad_norm": 0.595655620098114, "learning_rate": 1e-05, "loss": 0.6272, "step": 844 }, { "epoch": 0.1042469851648521, "grad_norm": 0.5842293500900269, "learning_rate": 1e-05, "loss": 0.6281, "step": 845 }, { "epoch": 0.10437035437806495, "grad_norm": 0.5569776296615601, "learning_rate": 1e-05, "loss": 0.5733, "step": 846 }, { "epoch": 0.1044937235912778, "grad_norm": 0.5713351368904114, "learning_rate": 1e-05, "loss": 0.558, "step": 847 }, { "epoch": 0.10461709280449064, "grad_norm": 0.6279792785644531, "learning_rate": 1e-05, "loss": 0.6479, "step": 848 }, { "epoch": 0.10474046201770348, "grad_norm": 0.5862835645675659, "learning_rate": 1e-05, "loss": 0.59, "step": 849 }, { "epoch": 0.10486383123091632, "grad_norm": 0.8784244656562805, "learning_rate": 1e-05, "loss": 0.6602, "step": 850 }, { "epoch": 0.10498720044412917, "grad_norm": 0.6079643368721008, "learning_rate": 1e-05, "loss": 0.5732, "step": 851 }, { "epoch": 0.10511056965734202, "grad_norm": 0.5694060921669006, "learning_rate": 1e-05, "loss": 0.5166, "step": 852 }, { "epoch": 0.10523393887055485, "grad_norm": 0.6135292053222656, "learning_rate": 1e-05, "loss": 0.6939, "step": 853 }, { "epoch": 0.1053573080837677, "grad_norm": 0.5935803055763245, "learning_rate": 1e-05, "loss": 0.5657, "step": 854 }, { "epoch": 0.10548067729698053, "grad_norm": 0.531604528427124, "learning_rate": 1e-05, "loss": 0.4703, "step": 855 }, { "epoch": 0.10560404651019338, "grad_norm": 0.6244126558303833, "learning_rate": 1e-05, "loss": 0.5859, "step": 856 }, { "epoch": 0.10572741572340623, "grad_norm": 0.5967321395874023, "learning_rate": 1e-05, "loss": 0.5811, "step": 857 }, { "epoch": 0.10585078493661906, "grad_norm": 0.5854459404945374, "learning_rate": 1e-05, "loss": 0.6294, "step": 858 }, { "epoch": 0.10597415414983191, "grad_norm": 0.6504725217819214, "learning_rate": 1e-05, "loss": 0.7267, "step": 859 }, { "epoch": 0.10609752336304475, "grad_norm": 0.6631115078926086, "learning_rate": 1e-05, "loss": 0.6586, "step": 860 }, { "epoch": 0.1062208925762576, "grad_norm": 0.7572216391563416, "learning_rate": 1e-05, "loss": 0.7078, "step": 861 }, { "epoch": 0.10634426178947044, "grad_norm": 0.6408737897872925, "learning_rate": 1e-05, "loss": 0.5266, "step": 862 }, { "epoch": 0.10646763100268328, "grad_norm": 0.5446124076843262, "learning_rate": 1e-05, "loss": 0.5415, "step": 863 }, { "epoch": 0.10659100021589613, "grad_norm": 0.5451663732528687, "learning_rate": 1e-05, "loss": 0.5883, "step": 864 }, { "epoch": 0.10671436942910896, "grad_norm": 0.6394774317741394, "learning_rate": 1e-05, "loss": 0.6427, "step": 865 }, { "epoch": 0.10683773864232181, "grad_norm": 0.5482413172721863, "learning_rate": 1e-05, "loss": 0.4988, "step": 866 }, { "epoch": 0.10696110785553466, "grad_norm": 0.5981287360191345, "learning_rate": 1e-05, "loss": 0.6389, "step": 867 }, { "epoch": 0.10708447706874749, "grad_norm": 0.6504390835762024, "learning_rate": 1e-05, "loss": 0.5719, "step": 868 }, { "epoch": 0.10720784628196034, "grad_norm": 0.6917802691459656, "learning_rate": 1e-05, "loss": 0.6356, "step": 869 }, { "epoch": 0.10733121549517317, "grad_norm": 0.602292537689209, "learning_rate": 1e-05, "loss": 0.5806, "step": 870 }, { "epoch": 0.10745458470838602, "grad_norm": 0.6269113421440125, "learning_rate": 1e-05, "loss": 0.5874, "step": 871 }, { "epoch": 0.10757795392159887, "grad_norm": 0.6842443943023682, "learning_rate": 1e-05, "loss": 0.6876, "step": 872 }, { "epoch": 0.1077013231348117, "grad_norm": 0.7217112183570862, "learning_rate": 1e-05, "loss": 0.6976, "step": 873 }, { "epoch": 0.10782469234802455, "grad_norm": 0.6224880814552307, "learning_rate": 1e-05, "loss": 0.6404, "step": 874 }, { "epoch": 0.10794806156123739, "grad_norm": 0.5713373422622681, "learning_rate": 1e-05, "loss": 0.6352, "step": 875 }, { "epoch": 0.10807143077445024, "grad_norm": 0.6100727915763855, "learning_rate": 1e-05, "loss": 0.6068, "step": 876 }, { "epoch": 0.10819479998766308, "grad_norm": 0.5767968893051147, "learning_rate": 1e-05, "loss": 0.6207, "step": 877 }, { "epoch": 0.10831816920087592, "grad_norm": 0.6010056734085083, "learning_rate": 1e-05, "loss": 0.5977, "step": 878 }, { "epoch": 0.10844153841408877, "grad_norm": 0.6069702506065369, "learning_rate": 1e-05, "loss": 0.5945, "step": 879 }, { "epoch": 0.1085649076273016, "grad_norm": 0.5608288049697876, "learning_rate": 1e-05, "loss": 0.5462, "step": 880 }, { "epoch": 0.10868827684051445, "grad_norm": 0.567882776260376, "learning_rate": 1e-05, "loss": 0.5975, "step": 881 }, { "epoch": 0.1088116460537273, "grad_norm": 0.6653764247894287, "learning_rate": 1e-05, "loss": 0.5951, "step": 882 }, { "epoch": 0.10893501526694013, "grad_norm": 0.5936322808265686, "learning_rate": 1e-05, "loss": 0.5787, "step": 883 }, { "epoch": 0.10905838448015298, "grad_norm": 0.6232696175575256, "learning_rate": 1e-05, "loss": 0.6398, "step": 884 }, { "epoch": 0.10918175369336582, "grad_norm": 0.6075279116630554, "learning_rate": 1e-05, "loss": 0.6692, "step": 885 }, { "epoch": 0.10930512290657866, "grad_norm": 0.5381433367729187, "learning_rate": 1e-05, "loss": 0.5628, "step": 886 }, { "epoch": 0.10942849211979151, "grad_norm": 0.6140667200088501, "learning_rate": 1e-05, "loss": 0.6263, "step": 887 }, { "epoch": 0.10955186133300435, "grad_norm": 0.6168531179428101, "learning_rate": 1e-05, "loss": 0.6239, "step": 888 }, { "epoch": 0.1096752305462172, "grad_norm": 0.6166793704032898, "learning_rate": 1e-05, "loss": 0.6521, "step": 889 }, { "epoch": 0.10979859975943003, "grad_norm": 0.5553500652313232, "learning_rate": 1e-05, "loss": 0.5466, "step": 890 }, { "epoch": 0.10992196897264288, "grad_norm": 0.7185643911361694, "learning_rate": 1e-05, "loss": 0.7012, "step": 891 }, { "epoch": 0.11004533818585573, "grad_norm": 0.5954753160476685, "learning_rate": 1e-05, "loss": 0.6099, "step": 892 }, { "epoch": 0.11016870739906856, "grad_norm": 0.6082721948623657, "learning_rate": 1e-05, "loss": 0.6901, "step": 893 }, { "epoch": 0.11029207661228141, "grad_norm": 0.7427380084991455, "learning_rate": 1e-05, "loss": 0.7157, "step": 894 }, { "epoch": 0.11041544582549424, "grad_norm": 0.6015198826789856, "learning_rate": 1e-05, "loss": 0.6068, "step": 895 }, { "epoch": 0.11053881503870709, "grad_norm": 0.6221822500228882, "learning_rate": 1e-05, "loss": 0.5757, "step": 896 }, { "epoch": 0.11066218425191994, "grad_norm": 0.5811266303062439, "learning_rate": 1e-05, "loss": 0.5861, "step": 897 }, { "epoch": 0.11078555346513277, "grad_norm": 0.5792666673660278, "learning_rate": 1e-05, "loss": 0.62, "step": 898 }, { "epoch": 0.11090892267834562, "grad_norm": 0.6115574836730957, "learning_rate": 1e-05, "loss": 0.5495, "step": 899 }, { "epoch": 0.11103229189155846, "grad_norm": 0.6193934082984924, "learning_rate": 1e-05, "loss": 0.6164, "step": 900 }, { "epoch": 0.1111556611047713, "grad_norm": 0.6529099345207214, "learning_rate": 1e-05, "loss": 0.6474, "step": 901 }, { "epoch": 0.11127903031798415, "grad_norm": 0.634085476398468, "learning_rate": 1e-05, "loss": 0.6227, "step": 902 }, { "epoch": 0.11140239953119699, "grad_norm": 0.6103140711784363, "learning_rate": 1e-05, "loss": 0.665, "step": 903 }, { "epoch": 0.11152576874440984, "grad_norm": 0.555284857749939, "learning_rate": 1e-05, "loss": 0.5635, "step": 904 }, { "epoch": 0.11164913795762267, "grad_norm": 0.6468415260314941, "learning_rate": 1e-05, "loss": 0.723, "step": 905 }, { "epoch": 0.11177250717083552, "grad_norm": 0.6383646130561829, "learning_rate": 1e-05, "loss": 0.6557, "step": 906 }, { "epoch": 0.11189587638404835, "grad_norm": 0.6067865490913391, "learning_rate": 1e-05, "loss": 0.682, "step": 907 }, { "epoch": 0.1120192455972612, "grad_norm": 0.6065346598625183, "learning_rate": 1e-05, "loss": 0.5556, "step": 908 }, { "epoch": 0.11214261481047405, "grad_norm": 0.653184175491333, "learning_rate": 1e-05, "loss": 0.6112, "step": 909 }, { "epoch": 0.11226598402368689, "grad_norm": 0.6652020812034607, "learning_rate": 1e-05, "loss": 0.681, "step": 910 }, { "epoch": 0.11238935323689973, "grad_norm": 0.5726462006568909, "learning_rate": 1e-05, "loss": 0.6252, "step": 911 }, { "epoch": 0.11251272245011257, "grad_norm": 0.6567306518554688, "learning_rate": 1e-05, "loss": 0.646, "step": 912 }, { "epoch": 0.11263609166332542, "grad_norm": 0.5901102423667908, "learning_rate": 1e-05, "loss": 0.6019, "step": 913 }, { "epoch": 0.11275946087653826, "grad_norm": 0.6077022552490234, "learning_rate": 1e-05, "loss": 0.5297, "step": 914 }, { "epoch": 0.1128828300897511, "grad_norm": 0.5481856465339661, "learning_rate": 1e-05, "loss": 0.5642, "step": 915 }, { "epoch": 0.11300619930296395, "grad_norm": 0.5979368686676025, "learning_rate": 1e-05, "loss": 0.6172, "step": 916 }, { "epoch": 0.11312956851617678, "grad_norm": 0.6846625804901123, "learning_rate": 1e-05, "loss": 0.7226, "step": 917 }, { "epoch": 0.11325293772938963, "grad_norm": 0.6367151737213135, "learning_rate": 1e-05, "loss": 0.7008, "step": 918 }, { "epoch": 0.11337630694260248, "grad_norm": 0.5844575762748718, "learning_rate": 1e-05, "loss": 0.5365, "step": 919 }, { "epoch": 0.11349967615581531, "grad_norm": 0.5385614037513733, "learning_rate": 1e-05, "loss": 0.5591, "step": 920 }, { "epoch": 0.11362304536902816, "grad_norm": 0.5759674906730652, "learning_rate": 1e-05, "loss": 0.6655, "step": 921 }, { "epoch": 0.113746414582241, "grad_norm": 0.6234505772590637, "learning_rate": 1e-05, "loss": 0.603, "step": 922 }, { "epoch": 0.11386978379545384, "grad_norm": 0.6490877866744995, "learning_rate": 1e-05, "loss": 0.6708, "step": 923 }, { "epoch": 0.11399315300866669, "grad_norm": 0.6303578615188599, "learning_rate": 1e-05, "loss": 0.6891, "step": 924 }, { "epoch": 0.11411652222187953, "grad_norm": 0.6887868642807007, "learning_rate": 1e-05, "loss": 0.6398, "step": 925 }, { "epoch": 0.11423989143509237, "grad_norm": 0.6074470281600952, "learning_rate": 1e-05, "loss": 0.6512, "step": 926 }, { "epoch": 0.11436326064830521, "grad_norm": 0.6835163235664368, "learning_rate": 1e-05, "loss": 0.7206, "step": 927 }, { "epoch": 0.11448662986151806, "grad_norm": 0.5809350609779358, "learning_rate": 1e-05, "loss": 0.6793, "step": 928 }, { "epoch": 0.1146099990747309, "grad_norm": 0.6447453498840332, "learning_rate": 1e-05, "loss": 0.5966, "step": 929 }, { "epoch": 0.11473336828794374, "grad_norm": 0.5879001021385193, "learning_rate": 1e-05, "loss": 0.648, "step": 930 }, { "epoch": 0.11485673750115659, "grad_norm": 0.6148250102996826, "learning_rate": 1e-05, "loss": 0.6486, "step": 931 }, { "epoch": 0.11498010671436942, "grad_norm": 0.5922719836235046, "learning_rate": 1e-05, "loss": 0.6757, "step": 932 }, { "epoch": 0.11510347592758227, "grad_norm": 0.6189209222793579, "learning_rate": 1e-05, "loss": 0.6021, "step": 933 }, { "epoch": 0.11522684514079512, "grad_norm": 0.5874676704406738, "learning_rate": 1e-05, "loss": 0.531, "step": 934 }, { "epoch": 0.11535021435400795, "grad_norm": 0.6713170409202576, "learning_rate": 1e-05, "loss": 0.8284, "step": 935 }, { "epoch": 0.1154735835672208, "grad_norm": 0.6741228103637695, "learning_rate": 1e-05, "loss": 0.667, "step": 936 }, { "epoch": 0.11559695278043364, "grad_norm": 0.608212411403656, "learning_rate": 1e-05, "loss": 0.6601, "step": 937 }, { "epoch": 0.11572032199364649, "grad_norm": 0.5468560457229614, "learning_rate": 1e-05, "loss": 0.599, "step": 938 }, { "epoch": 0.11584369120685933, "grad_norm": 0.604394257068634, "learning_rate": 1e-05, "loss": 0.6661, "step": 939 }, { "epoch": 0.11596706042007217, "grad_norm": 0.5858480334281921, "learning_rate": 1e-05, "loss": 0.6273, "step": 940 }, { "epoch": 0.11609042963328502, "grad_norm": 0.6121150255203247, "learning_rate": 1e-05, "loss": 0.6978, "step": 941 }, { "epoch": 0.11621379884649785, "grad_norm": 0.6250552535057068, "learning_rate": 1e-05, "loss": 0.6237, "step": 942 }, { "epoch": 0.1163371680597107, "grad_norm": 0.497734010219574, "learning_rate": 1e-05, "loss": 0.512, "step": 943 }, { "epoch": 0.11646053727292355, "grad_norm": 0.7596425414085388, "learning_rate": 1e-05, "loss": 0.7687, "step": 944 }, { "epoch": 0.11658390648613638, "grad_norm": 0.5743739008903503, "learning_rate": 1e-05, "loss": 0.5929, "step": 945 }, { "epoch": 0.11670727569934923, "grad_norm": 0.6052054762840271, "learning_rate": 1e-05, "loss": 0.7163, "step": 946 }, { "epoch": 0.11683064491256206, "grad_norm": 0.5070832371711731, "learning_rate": 1e-05, "loss": 0.4289, "step": 947 }, { "epoch": 0.11695401412577491, "grad_norm": 0.6426495909690857, "learning_rate": 1e-05, "loss": 0.6858, "step": 948 }, { "epoch": 0.11707738333898776, "grad_norm": 0.6412304043769836, "learning_rate": 1e-05, "loss": 0.6389, "step": 949 }, { "epoch": 0.1172007525522006, "grad_norm": 0.6043059229850769, "learning_rate": 1e-05, "loss": 0.5771, "step": 950 }, { "epoch": 0.11732412176541344, "grad_norm": 0.660457968711853, "learning_rate": 1e-05, "loss": 0.5752, "step": 951 }, { "epoch": 0.11744749097862628, "grad_norm": 0.5935615301132202, "learning_rate": 1e-05, "loss": 0.6074, "step": 952 }, { "epoch": 0.11757086019183913, "grad_norm": 0.5471706390380859, "learning_rate": 1e-05, "loss": 0.4331, "step": 953 }, { "epoch": 0.11769422940505198, "grad_norm": 0.6406422257423401, "learning_rate": 1e-05, "loss": 0.6462, "step": 954 }, { "epoch": 0.11781759861826481, "grad_norm": 0.5528600215911865, "learning_rate": 1e-05, "loss": 0.5083, "step": 955 }, { "epoch": 0.11794096783147766, "grad_norm": 0.6144471168518066, "learning_rate": 1e-05, "loss": 0.6034, "step": 956 }, { "epoch": 0.11806433704469049, "grad_norm": 0.6108344793319702, "learning_rate": 1e-05, "loss": 0.5819, "step": 957 }, { "epoch": 0.11818770625790334, "grad_norm": 0.5927284359931946, "learning_rate": 1e-05, "loss": 0.5518, "step": 958 }, { "epoch": 0.11831107547111619, "grad_norm": 0.5499866008758545, "learning_rate": 1e-05, "loss": 0.5153, "step": 959 }, { "epoch": 0.11843444468432902, "grad_norm": 0.5479235649108887, "learning_rate": 1e-05, "loss": 0.5575, "step": 960 }, { "epoch": 0.11855781389754187, "grad_norm": 0.5506371259689331, "learning_rate": 1e-05, "loss": 0.5858, "step": 961 }, { "epoch": 0.1186811831107547, "grad_norm": 0.5455615520477295, "learning_rate": 1e-05, "loss": 0.6541, "step": 962 }, { "epoch": 0.11880455232396755, "grad_norm": 0.5587183833122253, "learning_rate": 1e-05, "loss": 0.6034, "step": 963 }, { "epoch": 0.1189279215371804, "grad_norm": 0.5880556702613831, "learning_rate": 1e-05, "loss": 0.6055, "step": 964 }, { "epoch": 0.11905129075039324, "grad_norm": 0.657824695110321, "learning_rate": 1e-05, "loss": 0.6795, "step": 965 }, { "epoch": 0.11917465996360609, "grad_norm": 0.543256402015686, "learning_rate": 1e-05, "loss": 0.5498, "step": 966 }, { "epoch": 0.11929802917681892, "grad_norm": 0.5677402019500732, "learning_rate": 1e-05, "loss": 0.5573, "step": 967 }, { "epoch": 0.11942139839003177, "grad_norm": 0.5883381962776184, "learning_rate": 1e-05, "loss": 0.6368, "step": 968 }, { "epoch": 0.11954476760324462, "grad_norm": 0.5488570928573608, "learning_rate": 1e-05, "loss": 0.5378, "step": 969 }, { "epoch": 0.11966813681645745, "grad_norm": 0.5730189681053162, "learning_rate": 1e-05, "loss": 0.5285, "step": 970 }, { "epoch": 0.1197915060296703, "grad_norm": 0.5519734621047974, "learning_rate": 1e-05, "loss": 0.5454, "step": 971 }, { "epoch": 0.11991487524288313, "grad_norm": 0.585848867893219, "learning_rate": 1e-05, "loss": 0.5564, "step": 972 }, { "epoch": 0.12003824445609598, "grad_norm": 0.5902261734008789, "learning_rate": 1e-05, "loss": 0.6902, "step": 973 }, { "epoch": 0.12016161366930882, "grad_norm": 0.6093071103096008, "learning_rate": 1e-05, "loss": 0.6013, "step": 974 }, { "epoch": 0.12028498288252167, "grad_norm": 0.6673815846443176, "learning_rate": 1e-05, "loss": 0.7063, "step": 975 }, { "epoch": 0.12040835209573451, "grad_norm": 0.6267961263656616, "learning_rate": 1e-05, "loss": 0.6004, "step": 976 }, { "epoch": 0.12053172130894735, "grad_norm": 0.6234963536262512, "learning_rate": 1e-05, "loss": 0.7079, "step": 977 }, { "epoch": 0.1206550905221602, "grad_norm": 0.6578505039215088, "learning_rate": 1e-05, "loss": 0.6914, "step": 978 }, { "epoch": 0.12077845973537303, "grad_norm": 0.6174190640449524, "learning_rate": 1e-05, "loss": 0.6346, "step": 979 }, { "epoch": 0.12090182894858588, "grad_norm": 0.6497020125389099, "learning_rate": 1e-05, "loss": 0.5382, "step": 980 }, { "epoch": 0.12102519816179873, "grad_norm": 0.6125704050064087, "learning_rate": 1e-05, "loss": 0.5563, "step": 981 }, { "epoch": 0.12114856737501156, "grad_norm": 0.5643023252487183, "learning_rate": 1e-05, "loss": 0.5384, "step": 982 }, { "epoch": 0.12127193658822441, "grad_norm": 0.6060653924942017, "learning_rate": 1e-05, "loss": 0.5198, "step": 983 }, { "epoch": 0.12139530580143724, "grad_norm": 0.5557411909103394, "learning_rate": 1e-05, "loss": 0.5168, "step": 984 }, { "epoch": 0.1215186750146501, "grad_norm": 0.6129322052001953, "learning_rate": 1e-05, "loss": 0.5752, "step": 985 }, { "epoch": 0.12164204422786294, "grad_norm": 0.6432769298553467, "learning_rate": 1e-05, "loss": 0.7355, "step": 986 }, { "epoch": 0.12176541344107578, "grad_norm": 0.6102856397628784, "learning_rate": 1e-05, "loss": 0.5823, "step": 987 }, { "epoch": 0.12188878265428862, "grad_norm": 0.54808509349823, "learning_rate": 1e-05, "loss": 0.5159, "step": 988 }, { "epoch": 0.12201215186750146, "grad_norm": 0.678310215473175, "learning_rate": 1e-05, "loss": 0.7279, "step": 989 }, { "epoch": 0.12213552108071431, "grad_norm": 0.6217586994171143, "learning_rate": 1e-05, "loss": 0.6529, "step": 990 }, { "epoch": 0.12225889029392716, "grad_norm": 0.63457852602005, "learning_rate": 1e-05, "loss": 0.6234, "step": 991 }, { "epoch": 0.12238225950713999, "grad_norm": 0.6270992755889893, "learning_rate": 1e-05, "loss": 0.6509, "step": 992 }, { "epoch": 0.12250562872035284, "grad_norm": 0.5705767869949341, "learning_rate": 1e-05, "loss": 0.5125, "step": 993 }, { "epoch": 0.12262899793356567, "grad_norm": 0.5893779397010803, "learning_rate": 1e-05, "loss": 0.6378, "step": 994 }, { "epoch": 0.12275236714677852, "grad_norm": 0.6874808669090271, "learning_rate": 1e-05, "loss": 0.751, "step": 995 }, { "epoch": 0.12287573635999137, "grad_norm": 0.6563734412193298, "learning_rate": 1e-05, "loss": 0.6902, "step": 996 }, { "epoch": 0.1229991055732042, "grad_norm": 0.6299188733100891, "learning_rate": 1e-05, "loss": 0.65, "step": 997 }, { "epoch": 0.12312247478641705, "grad_norm": 0.5771213173866272, "learning_rate": 1e-05, "loss": 0.5778, "step": 998 }, { "epoch": 0.12324584399962989, "grad_norm": 0.5985590815544128, "learning_rate": 1e-05, "loss": 0.5129, "step": 999 }, { "epoch": 0.12336921321284273, "grad_norm": 0.5766268968582153, "learning_rate": 1e-05, "loss": 0.6162, "step": 1000 }, { "epoch": 0.12349258242605558, "grad_norm": 0.6082428097724915, "learning_rate": 1e-05, "loss": 0.4804, "step": 1001 }, { "epoch": 0.12361595163926842, "grad_norm": 0.5914680361747742, "learning_rate": 1e-05, "loss": 0.5383, "step": 1002 }, { "epoch": 0.12373932085248127, "grad_norm": 0.6384739875793457, "learning_rate": 1e-05, "loss": 0.6373, "step": 1003 }, { "epoch": 0.1238626900656941, "grad_norm": 0.5847108364105225, "learning_rate": 1e-05, "loss": 0.5997, "step": 1004 }, { "epoch": 0.12398605927890695, "grad_norm": 0.668291449546814, "learning_rate": 1e-05, "loss": 0.6444, "step": 1005 }, { "epoch": 0.1241094284921198, "grad_norm": 0.6307881474494934, "learning_rate": 1e-05, "loss": 0.5343, "step": 1006 }, { "epoch": 0.12423279770533263, "grad_norm": 0.6385557651519775, "learning_rate": 1e-05, "loss": 0.6072, "step": 1007 }, { "epoch": 0.12435616691854548, "grad_norm": 0.591648280620575, "learning_rate": 1e-05, "loss": 0.5905, "step": 1008 }, { "epoch": 0.12447953613175831, "grad_norm": 0.5582119226455688, "learning_rate": 1e-05, "loss": 0.5145, "step": 1009 }, { "epoch": 0.12460290534497116, "grad_norm": 0.5192312002182007, "learning_rate": 1e-05, "loss": 0.488, "step": 1010 }, { "epoch": 0.12472627455818401, "grad_norm": 0.6030610799789429, "learning_rate": 1e-05, "loss": 0.5966, "step": 1011 }, { "epoch": 0.12484964377139685, "grad_norm": 0.5793209671974182, "learning_rate": 1e-05, "loss": 0.6462, "step": 1012 }, { "epoch": 0.1249730129846097, "grad_norm": 0.6166507005691528, "learning_rate": 1e-05, "loss": 0.5926, "step": 1013 }, { "epoch": 0.12509638219782254, "grad_norm": 0.577549397945404, "learning_rate": 1e-05, "loss": 0.569, "step": 1014 }, { "epoch": 0.12521975141103536, "grad_norm": 0.5621426105499268, "learning_rate": 1e-05, "loss": 0.5289, "step": 1015 }, { "epoch": 0.1253431206242482, "grad_norm": 0.6408243179321289, "learning_rate": 1e-05, "loss": 0.5756, "step": 1016 }, { "epoch": 0.12546648983746106, "grad_norm": 0.5641854405403137, "learning_rate": 1e-05, "loss": 0.4902, "step": 1017 }, { "epoch": 0.1255898590506739, "grad_norm": 0.597705602645874, "learning_rate": 1e-05, "loss": 0.6517, "step": 1018 }, { "epoch": 0.12571322826388676, "grad_norm": 0.5403729677200317, "learning_rate": 1e-05, "loss": 0.4983, "step": 1019 }, { "epoch": 0.12583659747709958, "grad_norm": 0.5563834309577942, "learning_rate": 1e-05, "loss": 0.46, "step": 1020 }, { "epoch": 0.12595996669031242, "grad_norm": 0.5638904571533203, "learning_rate": 1e-05, "loss": 0.5779, "step": 1021 }, { "epoch": 0.12608333590352527, "grad_norm": 0.5873841047286987, "learning_rate": 1e-05, "loss": 0.5874, "step": 1022 }, { "epoch": 0.12620670511673812, "grad_norm": 0.5406944155693054, "learning_rate": 1e-05, "loss": 0.525, "step": 1023 }, { "epoch": 0.12633007432995097, "grad_norm": 0.5815768241882324, "learning_rate": 1e-05, "loss": 0.6478, "step": 1024 }, { "epoch": 0.1264534435431638, "grad_norm": 0.5627265572547913, "learning_rate": 1e-05, "loss": 0.6206, "step": 1025 }, { "epoch": 0.12657681275637664, "grad_norm": 0.631276547908783, "learning_rate": 1e-05, "loss": 0.7196, "step": 1026 }, { "epoch": 0.1267001819695895, "grad_norm": 0.6250817179679871, "learning_rate": 1e-05, "loss": 0.6223, "step": 1027 }, { "epoch": 0.12682355118280234, "grad_norm": 0.6328434348106384, "learning_rate": 1e-05, "loss": 0.6696, "step": 1028 }, { "epoch": 0.12694692039601518, "grad_norm": 0.5803878307342529, "learning_rate": 1e-05, "loss": 0.646, "step": 1029 }, { "epoch": 0.127070289609228, "grad_norm": 0.5212064981460571, "learning_rate": 1e-05, "loss": 0.4767, "step": 1030 }, { "epoch": 0.12719365882244085, "grad_norm": 0.6065342426300049, "learning_rate": 1e-05, "loss": 0.6524, "step": 1031 }, { "epoch": 0.1273170280356537, "grad_norm": 0.6296350359916687, "learning_rate": 1e-05, "loss": 0.5596, "step": 1032 }, { "epoch": 0.12744039724886655, "grad_norm": 0.7104046940803528, "learning_rate": 1e-05, "loss": 0.7796, "step": 1033 }, { "epoch": 0.1275637664620794, "grad_norm": 0.562492311000824, "learning_rate": 1e-05, "loss": 0.5398, "step": 1034 }, { "epoch": 0.12768713567529222, "grad_norm": 0.5671099424362183, "learning_rate": 1e-05, "loss": 0.5204, "step": 1035 }, { "epoch": 0.12781050488850507, "grad_norm": 0.6810777187347412, "learning_rate": 1e-05, "loss": 0.7456, "step": 1036 }, { "epoch": 0.12793387410171791, "grad_norm": 0.5636581182479858, "learning_rate": 1e-05, "loss": 0.5628, "step": 1037 }, { "epoch": 0.12805724331493076, "grad_norm": 0.5486369132995605, "learning_rate": 1e-05, "loss": 0.4774, "step": 1038 }, { "epoch": 0.1281806125281436, "grad_norm": 0.6478271484375, "learning_rate": 1e-05, "loss": 0.6534, "step": 1039 }, { "epoch": 0.12830398174135643, "grad_norm": 0.6681860685348511, "learning_rate": 1e-05, "loss": 0.7675, "step": 1040 }, { "epoch": 0.12842735095456928, "grad_norm": 0.6333445906639099, "learning_rate": 1e-05, "loss": 0.6211, "step": 1041 }, { "epoch": 0.12855072016778213, "grad_norm": 0.6794083118438721, "learning_rate": 1e-05, "loss": 0.7288, "step": 1042 }, { "epoch": 0.12867408938099498, "grad_norm": 0.5460959076881409, "learning_rate": 1e-05, "loss": 0.5283, "step": 1043 }, { "epoch": 0.12879745859420783, "grad_norm": 0.6046386957168579, "learning_rate": 1e-05, "loss": 0.618, "step": 1044 }, { "epoch": 0.12892082780742065, "grad_norm": 0.5901650786399841, "learning_rate": 1e-05, "loss": 0.5783, "step": 1045 }, { "epoch": 0.1290441970206335, "grad_norm": 0.5654875636100769, "learning_rate": 1e-05, "loss": 0.5564, "step": 1046 }, { "epoch": 0.12916756623384634, "grad_norm": 0.5677658319473267, "learning_rate": 1e-05, "loss": 0.5593, "step": 1047 }, { "epoch": 0.1292909354470592, "grad_norm": 0.6445660591125488, "learning_rate": 1e-05, "loss": 0.6933, "step": 1048 }, { "epoch": 0.12941430466027204, "grad_norm": 0.6233581900596619, "learning_rate": 1e-05, "loss": 0.6092, "step": 1049 }, { "epoch": 0.12953767387348486, "grad_norm": 0.608387291431427, "learning_rate": 1e-05, "loss": 0.6645, "step": 1050 }, { "epoch": 0.1296610430866977, "grad_norm": 0.6039597392082214, "learning_rate": 1e-05, "loss": 0.5775, "step": 1051 }, { "epoch": 0.12978441229991056, "grad_norm": 0.5851549506187439, "learning_rate": 1e-05, "loss": 0.5448, "step": 1052 }, { "epoch": 0.1299077815131234, "grad_norm": 0.6748951077461243, "learning_rate": 1e-05, "loss": 0.6122, "step": 1053 }, { "epoch": 0.13003115072633625, "grad_norm": 0.6400757431983948, "learning_rate": 1e-05, "loss": 0.6363, "step": 1054 }, { "epoch": 0.13015451993954907, "grad_norm": 0.6267991065979004, "learning_rate": 1e-05, "loss": 0.6049, "step": 1055 }, { "epoch": 0.13027788915276192, "grad_norm": 0.5667839050292969, "learning_rate": 1e-05, "loss": 0.5331, "step": 1056 }, { "epoch": 0.13040125836597477, "grad_norm": 0.5996796488761902, "learning_rate": 1e-05, "loss": 0.5538, "step": 1057 }, { "epoch": 0.13052462757918762, "grad_norm": 0.5832934379577637, "learning_rate": 1e-05, "loss": 0.6195, "step": 1058 }, { "epoch": 0.13064799679240047, "grad_norm": 0.6402991414070129, "learning_rate": 1e-05, "loss": 0.6035, "step": 1059 }, { "epoch": 0.1307713660056133, "grad_norm": 0.6044108271598816, "learning_rate": 1e-05, "loss": 0.4912, "step": 1060 }, { "epoch": 0.13089473521882614, "grad_norm": 0.533486008644104, "learning_rate": 1e-05, "loss": 0.5766, "step": 1061 }, { "epoch": 0.13101810443203898, "grad_norm": 0.5963729619979858, "learning_rate": 1e-05, "loss": 0.5771, "step": 1062 }, { "epoch": 0.13114147364525183, "grad_norm": 0.5731346011161804, "learning_rate": 1e-05, "loss": 0.5418, "step": 1063 }, { "epoch": 0.13126484285846468, "grad_norm": 0.6633305549621582, "learning_rate": 1e-05, "loss": 0.6485, "step": 1064 }, { "epoch": 0.1313882120716775, "grad_norm": 0.5793431401252747, "learning_rate": 1e-05, "loss": 0.5478, "step": 1065 }, { "epoch": 0.13151158128489035, "grad_norm": 0.5227037668228149, "learning_rate": 1e-05, "loss": 0.4755, "step": 1066 }, { "epoch": 0.1316349504981032, "grad_norm": 0.6225510835647583, "learning_rate": 1e-05, "loss": 0.5333, "step": 1067 }, { "epoch": 0.13175831971131605, "grad_norm": 0.5588158369064331, "learning_rate": 1e-05, "loss": 0.5594, "step": 1068 }, { "epoch": 0.1318816889245289, "grad_norm": 0.6043484807014465, "learning_rate": 1e-05, "loss": 0.6343, "step": 1069 }, { "epoch": 0.13200505813774172, "grad_norm": 0.5396068692207336, "learning_rate": 1e-05, "loss": 0.5197, "step": 1070 }, { "epoch": 0.13212842735095456, "grad_norm": 0.5376585721969604, "learning_rate": 1e-05, "loss": 0.4736, "step": 1071 }, { "epoch": 0.1322517965641674, "grad_norm": 0.5803943276405334, "learning_rate": 1e-05, "loss": 0.5646, "step": 1072 }, { "epoch": 0.13237516577738026, "grad_norm": 0.6040164232254028, "learning_rate": 1e-05, "loss": 0.6729, "step": 1073 }, { "epoch": 0.1324985349905931, "grad_norm": 0.6388505101203918, "learning_rate": 1e-05, "loss": 0.6943, "step": 1074 }, { "epoch": 0.13262190420380593, "grad_norm": 0.5714583992958069, "learning_rate": 1e-05, "loss": 0.5088, "step": 1075 }, { "epoch": 0.13274527341701878, "grad_norm": 0.6113476157188416, "learning_rate": 1e-05, "loss": 0.5497, "step": 1076 }, { "epoch": 0.13286864263023163, "grad_norm": 0.5745534896850586, "learning_rate": 1e-05, "loss": 0.5969, "step": 1077 }, { "epoch": 0.13299201184344447, "grad_norm": 0.5587049722671509, "learning_rate": 1e-05, "loss": 0.5596, "step": 1078 }, { "epoch": 0.13311538105665732, "grad_norm": 0.6024150252342224, "learning_rate": 1e-05, "loss": 0.6364, "step": 1079 }, { "epoch": 0.13323875026987014, "grad_norm": 0.6108371615409851, "learning_rate": 1e-05, "loss": 0.5642, "step": 1080 }, { "epoch": 0.133362119483083, "grad_norm": 0.5946840047836304, "learning_rate": 1e-05, "loss": 0.538, "step": 1081 }, { "epoch": 0.13348548869629584, "grad_norm": 0.545527458190918, "learning_rate": 1e-05, "loss": 0.5551, "step": 1082 }, { "epoch": 0.1336088579095087, "grad_norm": 0.616497814655304, "learning_rate": 1e-05, "loss": 0.5873, "step": 1083 }, { "epoch": 0.13373222712272154, "grad_norm": 0.6248806118965149, "learning_rate": 1e-05, "loss": 0.665, "step": 1084 }, { "epoch": 0.13385559633593436, "grad_norm": 0.5283156633377075, "learning_rate": 1e-05, "loss": 0.516, "step": 1085 }, { "epoch": 0.1339789655491472, "grad_norm": 0.5825273394584656, "learning_rate": 1e-05, "loss": 0.5831, "step": 1086 }, { "epoch": 0.13410233476236005, "grad_norm": 0.5679401755332947, "learning_rate": 1e-05, "loss": 0.5924, "step": 1087 }, { "epoch": 0.1342257039755729, "grad_norm": 0.6337711811065674, "learning_rate": 1e-05, "loss": 0.6454, "step": 1088 }, { "epoch": 0.13434907318878575, "grad_norm": 0.6052919626235962, "learning_rate": 1e-05, "loss": 0.6389, "step": 1089 }, { "epoch": 0.13447244240199857, "grad_norm": 0.5493788123130798, "learning_rate": 1e-05, "loss": 0.5766, "step": 1090 }, { "epoch": 0.13459581161521142, "grad_norm": 0.5799113512039185, "learning_rate": 1e-05, "loss": 0.5648, "step": 1091 }, { "epoch": 0.13471918082842427, "grad_norm": 0.687186598777771, "learning_rate": 1e-05, "loss": 0.7237, "step": 1092 }, { "epoch": 0.13484255004163712, "grad_norm": 0.6161717772483826, "learning_rate": 1e-05, "loss": 0.6528, "step": 1093 }, { "epoch": 0.13496591925484996, "grad_norm": 0.6518669724464417, "learning_rate": 1e-05, "loss": 0.5649, "step": 1094 }, { "epoch": 0.13508928846806278, "grad_norm": 0.5701250433921814, "learning_rate": 1e-05, "loss": 0.5592, "step": 1095 }, { "epoch": 0.13521265768127563, "grad_norm": 0.6635367274284363, "learning_rate": 1e-05, "loss": 0.7652, "step": 1096 }, { "epoch": 0.13533602689448848, "grad_norm": 0.6788615584373474, "learning_rate": 1e-05, "loss": 0.6736, "step": 1097 }, { "epoch": 0.13545939610770133, "grad_norm": 0.5760916471481323, "learning_rate": 1e-05, "loss": 0.5382, "step": 1098 }, { "epoch": 0.13558276532091418, "grad_norm": 0.6175260543823242, "learning_rate": 1e-05, "loss": 0.5087, "step": 1099 }, { "epoch": 0.135706134534127, "grad_norm": 0.5567233562469482, "learning_rate": 1e-05, "loss": 0.5829, "step": 1100 }, { "epoch": 0.13582950374733985, "grad_norm": 0.6041660308837891, "learning_rate": 1e-05, "loss": 0.5795, "step": 1101 }, { "epoch": 0.1359528729605527, "grad_norm": 0.602878987789154, "learning_rate": 1e-05, "loss": 0.5533, "step": 1102 }, { "epoch": 0.13607624217376554, "grad_norm": 0.6506818532943726, "learning_rate": 1e-05, "loss": 0.6515, "step": 1103 }, { "epoch": 0.1361996113869784, "grad_norm": 0.6169960498809814, "learning_rate": 1e-05, "loss": 0.532, "step": 1104 }, { "epoch": 0.1363229806001912, "grad_norm": 0.5512224435806274, "learning_rate": 1e-05, "loss": 0.553, "step": 1105 }, { "epoch": 0.13644634981340406, "grad_norm": 0.5362080931663513, "learning_rate": 1e-05, "loss": 0.5175, "step": 1106 }, { "epoch": 0.1365697190266169, "grad_norm": 0.6270944476127625, "learning_rate": 1e-05, "loss": 0.5991, "step": 1107 }, { "epoch": 0.13669308823982976, "grad_norm": 0.5213748812675476, "learning_rate": 1e-05, "loss": 0.524, "step": 1108 }, { "epoch": 0.1368164574530426, "grad_norm": 0.5863217711448669, "learning_rate": 1e-05, "loss": 0.5682, "step": 1109 }, { "epoch": 0.13693982666625543, "grad_norm": 0.5923739671707153, "learning_rate": 1e-05, "loss": 0.5269, "step": 1110 }, { "epoch": 0.13706319587946827, "grad_norm": 0.6330743432044983, "learning_rate": 1e-05, "loss": 0.6951, "step": 1111 }, { "epoch": 0.13718656509268112, "grad_norm": 0.5844782590866089, "learning_rate": 1e-05, "loss": 0.5197, "step": 1112 }, { "epoch": 0.13730993430589397, "grad_norm": 0.6022050380706787, "learning_rate": 1e-05, "loss": 0.5835, "step": 1113 }, { "epoch": 0.13743330351910682, "grad_norm": 0.6074455380439758, "learning_rate": 1e-05, "loss": 0.6074, "step": 1114 }, { "epoch": 0.13755667273231964, "grad_norm": 0.5944667458534241, "learning_rate": 1e-05, "loss": 0.5917, "step": 1115 }, { "epoch": 0.1376800419455325, "grad_norm": 0.6835671067237854, "learning_rate": 1e-05, "loss": 0.6116, "step": 1116 }, { "epoch": 0.13780341115874534, "grad_norm": 0.6634491086006165, "learning_rate": 1e-05, "loss": 0.6877, "step": 1117 }, { "epoch": 0.13792678037195819, "grad_norm": 0.6107625365257263, "learning_rate": 1e-05, "loss": 0.6001, "step": 1118 }, { "epoch": 0.13805014958517103, "grad_norm": 0.6341646313667297, "learning_rate": 1e-05, "loss": 0.6501, "step": 1119 }, { "epoch": 0.13817351879838385, "grad_norm": 0.5408080816268921, "learning_rate": 1e-05, "loss": 0.5026, "step": 1120 }, { "epoch": 0.1382968880115967, "grad_norm": 0.6740326881408691, "learning_rate": 1e-05, "loss": 0.7062, "step": 1121 }, { "epoch": 0.13842025722480955, "grad_norm": 0.5609259009361267, "learning_rate": 1e-05, "loss": 0.6031, "step": 1122 }, { "epoch": 0.1385436264380224, "grad_norm": 0.6013202667236328, "learning_rate": 1e-05, "loss": 0.5924, "step": 1123 }, { "epoch": 0.13866699565123525, "grad_norm": 0.6390030980110168, "learning_rate": 1e-05, "loss": 0.4824, "step": 1124 }, { "epoch": 0.13879036486444807, "grad_norm": 0.6166038513183594, "learning_rate": 1e-05, "loss": 0.663, "step": 1125 }, { "epoch": 0.13891373407766092, "grad_norm": 0.6164864897727966, "learning_rate": 1e-05, "loss": 0.6986, "step": 1126 }, { "epoch": 0.13903710329087376, "grad_norm": 0.6146764755249023, "learning_rate": 1e-05, "loss": 0.6491, "step": 1127 }, { "epoch": 0.1391604725040866, "grad_norm": 0.6378417611122131, "learning_rate": 1e-05, "loss": 0.6661, "step": 1128 }, { "epoch": 0.13928384171729946, "grad_norm": 0.5938000082969666, "learning_rate": 1e-05, "loss": 0.5847, "step": 1129 }, { "epoch": 0.13940721093051228, "grad_norm": 0.56881183385849, "learning_rate": 1e-05, "loss": 0.5889, "step": 1130 }, { "epoch": 0.13953058014372513, "grad_norm": 0.5986264944076538, "learning_rate": 1e-05, "loss": 0.7004, "step": 1131 }, { "epoch": 0.13965394935693798, "grad_norm": 0.5568170547485352, "learning_rate": 1e-05, "loss": 0.5132, "step": 1132 }, { "epoch": 0.13977731857015083, "grad_norm": 0.6115668416023254, "learning_rate": 1e-05, "loss": 0.5466, "step": 1133 }, { "epoch": 0.13990068778336368, "grad_norm": 0.5956915020942688, "learning_rate": 1e-05, "loss": 0.6222, "step": 1134 }, { "epoch": 0.1400240569965765, "grad_norm": 0.6004845499992371, "learning_rate": 1e-05, "loss": 0.6276, "step": 1135 }, { "epoch": 0.14014742620978934, "grad_norm": 0.6422192454338074, "learning_rate": 1e-05, "loss": 0.6259, "step": 1136 }, { "epoch": 0.1402707954230022, "grad_norm": 0.6581796407699585, "learning_rate": 1e-05, "loss": 0.6513, "step": 1137 }, { "epoch": 0.14039416463621504, "grad_norm": 0.5532979965209961, "learning_rate": 1e-05, "loss": 0.5694, "step": 1138 }, { "epoch": 0.14051753384942786, "grad_norm": 0.6844574809074402, "learning_rate": 1e-05, "loss": 0.7378, "step": 1139 }, { "epoch": 0.1406409030626407, "grad_norm": 0.5996751189231873, "learning_rate": 1e-05, "loss": 0.5939, "step": 1140 }, { "epoch": 0.14076427227585356, "grad_norm": 0.6776540875434875, "learning_rate": 1e-05, "loss": 0.7302, "step": 1141 }, { "epoch": 0.1408876414890664, "grad_norm": 0.524994969367981, "learning_rate": 1e-05, "loss": 0.4574, "step": 1142 }, { "epoch": 0.14101101070227925, "grad_norm": 0.5879911184310913, "learning_rate": 1e-05, "loss": 0.6289, "step": 1143 }, { "epoch": 0.14113437991549208, "grad_norm": 0.6755305528640747, "learning_rate": 1e-05, "loss": 0.5234, "step": 1144 }, { "epoch": 0.14125774912870492, "grad_norm": 0.6002398133277893, "learning_rate": 1e-05, "loss": 0.6255, "step": 1145 }, { "epoch": 0.14138111834191777, "grad_norm": 0.6095854043960571, "learning_rate": 1e-05, "loss": 0.5428, "step": 1146 }, { "epoch": 0.14150448755513062, "grad_norm": 0.6654171943664551, "learning_rate": 1e-05, "loss": 0.6869, "step": 1147 }, { "epoch": 0.14162785676834347, "grad_norm": 0.6232200264930725, "learning_rate": 1e-05, "loss": 0.5469, "step": 1148 }, { "epoch": 0.1417512259815563, "grad_norm": 0.5565340518951416, "learning_rate": 1e-05, "loss": 0.6198, "step": 1149 }, { "epoch": 0.14187459519476914, "grad_norm": 0.5582769513130188, "learning_rate": 1e-05, "loss": 0.5224, "step": 1150 }, { "epoch": 0.14199796440798199, "grad_norm": 0.5462267994880676, "learning_rate": 1e-05, "loss": 0.5532, "step": 1151 }, { "epoch": 0.14212133362119483, "grad_norm": 0.5649141073226929, "learning_rate": 1e-05, "loss": 0.5538, "step": 1152 }, { "epoch": 0.14224470283440768, "grad_norm": 0.521077573299408, "learning_rate": 1e-05, "loss": 0.5305, "step": 1153 }, { "epoch": 0.1423680720476205, "grad_norm": 0.6594535708427429, "learning_rate": 1e-05, "loss": 0.6647, "step": 1154 }, { "epoch": 0.14249144126083335, "grad_norm": 0.6015805602073669, "learning_rate": 1e-05, "loss": 0.7764, "step": 1155 }, { "epoch": 0.1426148104740462, "grad_norm": 0.6316118836402893, "learning_rate": 1e-05, "loss": 0.5913, "step": 1156 }, { "epoch": 0.14273817968725905, "grad_norm": 0.6170286536216736, "learning_rate": 1e-05, "loss": 0.6224, "step": 1157 }, { "epoch": 0.1428615489004719, "grad_norm": 0.7150058746337891, "learning_rate": 1e-05, "loss": 0.6673, "step": 1158 }, { "epoch": 0.14298491811368472, "grad_norm": 0.6173244118690491, "learning_rate": 1e-05, "loss": 0.5908, "step": 1159 }, { "epoch": 0.14310828732689757, "grad_norm": 0.6178215146064758, "learning_rate": 1e-05, "loss": 0.6197, "step": 1160 }, { "epoch": 0.1432316565401104, "grad_norm": 0.5619454979896545, "learning_rate": 1e-05, "loss": 0.5203, "step": 1161 }, { "epoch": 0.14335502575332326, "grad_norm": 0.5653589963912964, "learning_rate": 1e-05, "loss": 0.5585, "step": 1162 }, { "epoch": 0.1434783949665361, "grad_norm": 0.6326634883880615, "learning_rate": 1e-05, "loss": 0.6575, "step": 1163 }, { "epoch": 0.14360176417974893, "grad_norm": 0.5176656246185303, "learning_rate": 1e-05, "loss": 0.4766, "step": 1164 }, { "epoch": 0.14372513339296178, "grad_norm": 0.5424138307571411, "learning_rate": 1e-05, "loss": 0.5821, "step": 1165 }, { "epoch": 0.14384850260617463, "grad_norm": 0.6219730377197266, "learning_rate": 1e-05, "loss": 0.7433, "step": 1166 }, { "epoch": 0.14397187181938748, "grad_norm": 0.5670789480209351, "learning_rate": 1e-05, "loss": 0.5685, "step": 1167 }, { "epoch": 0.14409524103260032, "grad_norm": 0.6384449601173401, "learning_rate": 1e-05, "loss": 0.6904, "step": 1168 }, { "epoch": 0.14421861024581314, "grad_norm": 0.5588389039039612, "learning_rate": 1e-05, "loss": 0.5709, "step": 1169 }, { "epoch": 0.144341979459026, "grad_norm": 0.5818073749542236, "learning_rate": 1e-05, "loss": 0.5948, "step": 1170 }, { "epoch": 0.14446534867223884, "grad_norm": 0.6237288117408752, "learning_rate": 1e-05, "loss": 0.6353, "step": 1171 }, { "epoch": 0.1445887178854517, "grad_norm": 0.5287932753562927, "learning_rate": 1e-05, "loss": 0.5025, "step": 1172 }, { "epoch": 0.14471208709866454, "grad_norm": 0.5755062699317932, "learning_rate": 1e-05, "loss": 0.599, "step": 1173 }, { "epoch": 0.14483545631187736, "grad_norm": 0.52392578125, "learning_rate": 1e-05, "loss": 0.4971, "step": 1174 }, { "epoch": 0.1449588255250902, "grad_norm": 0.6573879718780518, "learning_rate": 1e-05, "loss": 0.6379, "step": 1175 }, { "epoch": 0.14508219473830306, "grad_norm": 0.575718879699707, "learning_rate": 1e-05, "loss": 0.6504, "step": 1176 }, { "epoch": 0.1452055639515159, "grad_norm": 0.5835546255111694, "learning_rate": 1e-05, "loss": 0.5689, "step": 1177 }, { "epoch": 0.14532893316472875, "grad_norm": 0.7002575397491455, "learning_rate": 1e-05, "loss": 0.5873, "step": 1178 }, { "epoch": 0.14545230237794157, "grad_norm": 0.5809087157249451, "learning_rate": 1e-05, "loss": 0.7092, "step": 1179 }, { "epoch": 0.14557567159115442, "grad_norm": 0.6549384593963623, "learning_rate": 1e-05, "loss": 0.6561, "step": 1180 }, { "epoch": 0.14569904080436727, "grad_norm": 0.5314410328865051, "learning_rate": 1e-05, "loss": 0.4807, "step": 1181 }, { "epoch": 0.14582241001758012, "grad_norm": 0.5249319076538086, "learning_rate": 1e-05, "loss": 0.4957, "step": 1182 }, { "epoch": 0.14594577923079297, "grad_norm": 0.6117422580718994, "learning_rate": 1e-05, "loss": 0.6933, "step": 1183 }, { "epoch": 0.1460691484440058, "grad_norm": 0.735058069229126, "learning_rate": 1e-05, "loss": 0.6779, "step": 1184 }, { "epoch": 0.14619251765721863, "grad_norm": 0.6517717838287354, "learning_rate": 1e-05, "loss": 0.6708, "step": 1185 }, { "epoch": 0.14631588687043148, "grad_norm": 0.591791033744812, "learning_rate": 1e-05, "loss": 0.5893, "step": 1186 }, { "epoch": 0.14643925608364433, "grad_norm": 0.5312754511833191, "learning_rate": 1e-05, "loss": 0.4955, "step": 1187 }, { "epoch": 0.14656262529685718, "grad_norm": 0.5905844569206238, "learning_rate": 1e-05, "loss": 0.5506, "step": 1188 }, { "epoch": 0.14668599451007, "grad_norm": 0.6466634273529053, "learning_rate": 1e-05, "loss": 0.6132, "step": 1189 }, { "epoch": 0.14680936372328285, "grad_norm": 0.6378663778305054, "learning_rate": 1e-05, "loss": 0.6388, "step": 1190 }, { "epoch": 0.1469327329364957, "grad_norm": 0.5991595983505249, "learning_rate": 1e-05, "loss": 0.6128, "step": 1191 }, { "epoch": 0.14705610214970855, "grad_norm": 0.6456660032272339, "learning_rate": 1e-05, "loss": 0.6817, "step": 1192 }, { "epoch": 0.1471794713629214, "grad_norm": 0.682644784450531, "learning_rate": 1e-05, "loss": 0.6823, "step": 1193 }, { "epoch": 0.14730284057613421, "grad_norm": 0.6001988649368286, "learning_rate": 1e-05, "loss": 0.5601, "step": 1194 }, { "epoch": 0.14742620978934706, "grad_norm": 0.5768288373947144, "learning_rate": 1e-05, "loss": 0.5407, "step": 1195 }, { "epoch": 0.1475495790025599, "grad_norm": 0.5864921808242798, "learning_rate": 1e-05, "loss": 0.5446, "step": 1196 }, { "epoch": 0.14767294821577276, "grad_norm": 0.5789661407470703, "learning_rate": 1e-05, "loss": 0.5875, "step": 1197 }, { "epoch": 0.1477963174289856, "grad_norm": 0.5641525983810425, "learning_rate": 1e-05, "loss": 0.5795, "step": 1198 }, { "epoch": 0.14791968664219843, "grad_norm": 0.6201781034469604, "learning_rate": 1e-05, "loss": 0.6646, "step": 1199 }, { "epoch": 0.14804305585541128, "grad_norm": 0.5588611960411072, "learning_rate": 1e-05, "loss": 0.5958, "step": 1200 }, { "epoch": 0.14816642506862412, "grad_norm": 0.5811861753463745, "learning_rate": 1e-05, "loss": 0.6213, "step": 1201 }, { "epoch": 0.14828979428183697, "grad_norm": 0.55463045835495, "learning_rate": 1e-05, "loss": 0.5123, "step": 1202 }, { "epoch": 0.14841316349504982, "grad_norm": 0.5732275247573853, "learning_rate": 1e-05, "loss": 0.5735, "step": 1203 }, { "epoch": 0.14853653270826264, "grad_norm": 0.5132952332496643, "learning_rate": 1e-05, "loss": 0.5392, "step": 1204 }, { "epoch": 0.1486599019214755, "grad_norm": 0.705169141292572, "learning_rate": 1e-05, "loss": 0.6654, "step": 1205 }, { "epoch": 0.14878327113468834, "grad_norm": 0.6848952174186707, "learning_rate": 1e-05, "loss": 0.7957, "step": 1206 }, { "epoch": 0.1489066403479012, "grad_norm": 0.629321277141571, "learning_rate": 1e-05, "loss": 0.6351, "step": 1207 }, { "epoch": 0.14903000956111404, "grad_norm": 0.5134278535842896, "learning_rate": 1e-05, "loss": 0.4811, "step": 1208 }, { "epoch": 0.14915337877432686, "grad_norm": 0.6208613514900208, "learning_rate": 1e-05, "loss": 0.5386, "step": 1209 }, { "epoch": 0.1492767479875397, "grad_norm": 0.6241838335990906, "learning_rate": 1e-05, "loss": 0.6166, "step": 1210 }, { "epoch": 0.14940011720075255, "grad_norm": 0.5716149806976318, "learning_rate": 1e-05, "loss": 0.5103, "step": 1211 }, { "epoch": 0.1495234864139654, "grad_norm": 0.5851812958717346, "learning_rate": 1e-05, "loss": 0.6441, "step": 1212 }, { "epoch": 0.14964685562717825, "grad_norm": 0.7221922874450684, "learning_rate": 1e-05, "loss": 0.6603, "step": 1213 }, { "epoch": 0.14977022484039107, "grad_norm": 0.5931083559989929, "learning_rate": 1e-05, "loss": 0.5946, "step": 1214 }, { "epoch": 0.14989359405360392, "grad_norm": 0.6145055890083313, "learning_rate": 1e-05, "loss": 0.6565, "step": 1215 }, { "epoch": 0.15001696326681677, "grad_norm": 0.5630729794502258, "learning_rate": 1e-05, "loss": 0.5722, "step": 1216 }, { "epoch": 0.15014033248002961, "grad_norm": 0.5994247794151306, "learning_rate": 1e-05, "loss": 0.6443, "step": 1217 }, { "epoch": 0.15026370169324246, "grad_norm": 0.5962396264076233, "learning_rate": 1e-05, "loss": 0.5793, "step": 1218 }, { "epoch": 0.15038707090645528, "grad_norm": 0.5768201351165771, "learning_rate": 1e-05, "loss": 0.5472, "step": 1219 }, { "epoch": 0.15051044011966813, "grad_norm": 0.5920811891555786, "learning_rate": 1e-05, "loss": 0.6528, "step": 1220 }, { "epoch": 0.15063380933288098, "grad_norm": 0.6752760410308838, "learning_rate": 1e-05, "loss": 0.7555, "step": 1221 }, { "epoch": 0.15075717854609383, "grad_norm": 0.6002185940742493, "learning_rate": 1e-05, "loss": 0.6091, "step": 1222 }, { "epoch": 0.15088054775930668, "grad_norm": 0.5701721906661987, "learning_rate": 1e-05, "loss": 0.6097, "step": 1223 }, { "epoch": 0.1510039169725195, "grad_norm": 0.6843379735946655, "learning_rate": 1e-05, "loss": 0.7797, "step": 1224 }, { "epoch": 0.15112728618573235, "grad_norm": 0.5488750338554382, "learning_rate": 1e-05, "loss": 0.5647, "step": 1225 }, { "epoch": 0.1512506553989452, "grad_norm": 0.5188007354736328, "learning_rate": 1e-05, "loss": 0.5043, "step": 1226 }, { "epoch": 0.15137402461215804, "grad_norm": 0.5888690948486328, "learning_rate": 1e-05, "loss": 0.6756, "step": 1227 }, { "epoch": 0.1514973938253709, "grad_norm": 0.6291328072547913, "learning_rate": 1e-05, "loss": 0.6863, "step": 1228 }, { "epoch": 0.1516207630385837, "grad_norm": 0.6365408897399902, "learning_rate": 1e-05, "loss": 0.6957, "step": 1229 }, { "epoch": 0.15174413225179656, "grad_norm": 0.5746515989303589, "learning_rate": 1e-05, "loss": 0.5654, "step": 1230 }, { "epoch": 0.1518675014650094, "grad_norm": 0.6401516795158386, "learning_rate": 1e-05, "loss": 0.6141, "step": 1231 }, { "epoch": 0.15199087067822226, "grad_norm": 0.6253232359886169, "learning_rate": 1e-05, "loss": 0.6086, "step": 1232 }, { "epoch": 0.1521142398914351, "grad_norm": 0.5706847310066223, "learning_rate": 1e-05, "loss": 0.6309, "step": 1233 }, { "epoch": 0.15223760910464793, "grad_norm": 0.5890093445777893, "learning_rate": 1e-05, "loss": 0.5913, "step": 1234 }, { "epoch": 0.15236097831786077, "grad_norm": 0.6056832075119019, "learning_rate": 1e-05, "loss": 0.5976, "step": 1235 }, { "epoch": 0.15248434753107362, "grad_norm": 0.5730763673782349, "learning_rate": 1e-05, "loss": 0.5463, "step": 1236 }, { "epoch": 0.15260771674428647, "grad_norm": 0.616614580154419, "learning_rate": 1e-05, "loss": 0.6656, "step": 1237 }, { "epoch": 0.15273108595749932, "grad_norm": 0.6313245296478271, "learning_rate": 1e-05, "loss": 0.6979, "step": 1238 }, { "epoch": 0.15285445517071214, "grad_norm": 0.6074888706207275, "learning_rate": 1e-05, "loss": 0.6324, "step": 1239 }, { "epoch": 0.152977824383925, "grad_norm": 0.6007388234138489, "learning_rate": 1e-05, "loss": 0.5348, "step": 1240 }, { "epoch": 0.15310119359713784, "grad_norm": 0.5759404897689819, "learning_rate": 1e-05, "loss": 0.6174, "step": 1241 }, { "epoch": 0.15322456281035068, "grad_norm": 0.6369009017944336, "learning_rate": 1e-05, "loss": 0.6324, "step": 1242 }, { "epoch": 0.15334793202356353, "grad_norm": 0.6486678123474121, "learning_rate": 1e-05, "loss": 0.6549, "step": 1243 }, { "epoch": 0.15347130123677635, "grad_norm": 0.5596218705177307, "learning_rate": 1e-05, "loss": 0.539, "step": 1244 }, { "epoch": 0.1535946704499892, "grad_norm": 0.634625256061554, "learning_rate": 1e-05, "loss": 0.6704, "step": 1245 }, { "epoch": 0.15371803966320205, "grad_norm": 0.5463277101516724, "learning_rate": 1e-05, "loss": 0.5423, "step": 1246 }, { "epoch": 0.1538414088764149, "grad_norm": 0.5567522048950195, "learning_rate": 1e-05, "loss": 0.4693, "step": 1247 }, { "epoch": 0.15396477808962775, "grad_norm": 0.5341894030570984, "learning_rate": 1e-05, "loss": 0.5103, "step": 1248 }, { "epoch": 0.15408814730284057, "grad_norm": 0.6147469878196716, "learning_rate": 1e-05, "loss": 0.5792, "step": 1249 }, { "epoch": 0.15421151651605342, "grad_norm": 0.6681219935417175, "learning_rate": 1e-05, "loss": 0.6511, "step": 1250 }, { "epoch": 0.15433488572926626, "grad_norm": 0.5218627452850342, "learning_rate": 1e-05, "loss": 0.466, "step": 1251 }, { "epoch": 0.1544582549424791, "grad_norm": 0.7048749923706055, "learning_rate": 1e-05, "loss": 0.6961, "step": 1252 }, { "epoch": 0.15458162415569196, "grad_norm": 0.5708070993423462, "learning_rate": 1e-05, "loss": 0.5144, "step": 1253 }, { "epoch": 0.15470499336890478, "grad_norm": 0.6395590901374817, "learning_rate": 1e-05, "loss": 0.677, "step": 1254 }, { "epoch": 0.15482836258211763, "grad_norm": 0.5809499621391296, "learning_rate": 1e-05, "loss": 0.5768, "step": 1255 }, { "epoch": 0.15495173179533048, "grad_norm": 0.6321803331375122, "learning_rate": 1e-05, "loss": 0.6346, "step": 1256 }, { "epoch": 0.15507510100854333, "grad_norm": 0.6455603241920471, "learning_rate": 1e-05, "loss": 0.6393, "step": 1257 }, { "epoch": 0.15519847022175617, "grad_norm": 0.6028273701667786, "learning_rate": 1e-05, "loss": 0.6042, "step": 1258 }, { "epoch": 0.155321839434969, "grad_norm": 0.6526874899864197, "learning_rate": 1e-05, "loss": 0.7892, "step": 1259 }, { "epoch": 0.15544520864818184, "grad_norm": 0.6060288548469543, "learning_rate": 1e-05, "loss": 0.6539, "step": 1260 }, { "epoch": 0.1555685778613947, "grad_norm": 0.7160992622375488, "learning_rate": 1e-05, "loss": 0.6271, "step": 1261 }, { "epoch": 0.15569194707460754, "grad_norm": 0.6191842555999756, "learning_rate": 1e-05, "loss": 0.7004, "step": 1262 }, { "epoch": 0.1558153162878204, "grad_norm": 0.679031252861023, "learning_rate": 1e-05, "loss": 0.6447, "step": 1263 }, { "epoch": 0.1559386855010332, "grad_norm": 0.5657467246055603, "learning_rate": 1e-05, "loss": 0.6168, "step": 1264 }, { "epoch": 0.15606205471424606, "grad_norm": 0.6450391411781311, "learning_rate": 1e-05, "loss": 0.6501, "step": 1265 }, { "epoch": 0.1561854239274589, "grad_norm": 0.5962544679641724, "learning_rate": 1e-05, "loss": 0.5617, "step": 1266 }, { "epoch": 0.15630879314067175, "grad_norm": 0.5581600069999695, "learning_rate": 1e-05, "loss": 0.5102, "step": 1267 }, { "epoch": 0.1564321623538846, "grad_norm": 0.5311964750289917, "learning_rate": 1e-05, "loss": 0.5042, "step": 1268 }, { "epoch": 0.15655553156709742, "grad_norm": 0.5985154509544373, "learning_rate": 1e-05, "loss": 0.682, "step": 1269 }, { "epoch": 0.15667890078031027, "grad_norm": 0.5747293829917908, "learning_rate": 1e-05, "loss": 0.5041, "step": 1270 }, { "epoch": 0.15680226999352312, "grad_norm": 0.5840613842010498, "learning_rate": 1e-05, "loss": 0.5758, "step": 1271 }, { "epoch": 0.15692563920673597, "grad_norm": 0.5546771287918091, "learning_rate": 1e-05, "loss": 0.5927, "step": 1272 }, { "epoch": 0.1570490084199488, "grad_norm": 0.544806182384491, "learning_rate": 1e-05, "loss": 0.5851, "step": 1273 }, { "epoch": 0.15717237763316164, "grad_norm": 0.5644140243530273, "learning_rate": 1e-05, "loss": 0.5288, "step": 1274 }, { "epoch": 0.15729574684637448, "grad_norm": 0.7390309572219849, "learning_rate": 1e-05, "loss": 0.7112, "step": 1275 }, { "epoch": 0.15741911605958733, "grad_norm": 0.5733228325843811, "learning_rate": 1e-05, "loss": 0.6037, "step": 1276 }, { "epoch": 0.15754248527280018, "grad_norm": 0.6483752727508545, "learning_rate": 1e-05, "loss": 0.6701, "step": 1277 }, { "epoch": 0.157665854486013, "grad_norm": 0.7147992253303528, "learning_rate": 1e-05, "loss": 0.685, "step": 1278 }, { "epoch": 0.15778922369922585, "grad_norm": 0.5635371208190918, "learning_rate": 1e-05, "loss": 0.6481, "step": 1279 }, { "epoch": 0.1579125929124387, "grad_norm": 0.5527617335319519, "learning_rate": 1e-05, "loss": 0.5514, "step": 1280 }, { "epoch": 0.15803596212565155, "grad_norm": 0.5614373683929443, "learning_rate": 1e-05, "loss": 0.5421, "step": 1281 }, { "epoch": 0.1581593313388644, "grad_norm": 0.663289487361908, "learning_rate": 1e-05, "loss": 0.6305, "step": 1282 }, { "epoch": 0.15828270055207722, "grad_norm": 0.6873959302902222, "learning_rate": 1e-05, "loss": 0.6629, "step": 1283 }, { "epoch": 0.15840606976529006, "grad_norm": 0.6851422190666199, "learning_rate": 1e-05, "loss": 0.6497, "step": 1284 }, { "epoch": 0.1585294389785029, "grad_norm": 0.5903031826019287, "learning_rate": 1e-05, "loss": 0.6445, "step": 1285 }, { "epoch": 0.15865280819171576, "grad_norm": 0.6263743042945862, "learning_rate": 1e-05, "loss": 0.6886, "step": 1286 }, { "epoch": 0.1587761774049286, "grad_norm": 0.681265652179718, "learning_rate": 1e-05, "loss": 0.6024, "step": 1287 }, { "epoch": 0.15889954661814143, "grad_norm": 0.6143970489501953, "learning_rate": 1e-05, "loss": 0.6629, "step": 1288 }, { "epoch": 0.15902291583135428, "grad_norm": 0.6919503211975098, "learning_rate": 1e-05, "loss": 0.5929, "step": 1289 }, { "epoch": 0.15914628504456713, "grad_norm": 0.6783342361450195, "learning_rate": 1e-05, "loss": 0.673, "step": 1290 }, { "epoch": 0.15926965425777997, "grad_norm": 0.6625657677650452, "learning_rate": 1e-05, "loss": 0.6316, "step": 1291 }, { "epoch": 0.15939302347099282, "grad_norm": 0.5910352468490601, "learning_rate": 1e-05, "loss": 0.6389, "step": 1292 }, { "epoch": 0.15951639268420564, "grad_norm": 0.6209065318107605, "learning_rate": 1e-05, "loss": 0.5925, "step": 1293 }, { "epoch": 0.1596397618974185, "grad_norm": 0.5814810395240784, "learning_rate": 1e-05, "loss": 0.5445, "step": 1294 }, { "epoch": 0.15976313111063134, "grad_norm": 0.6043274998664856, "learning_rate": 1e-05, "loss": 0.6072, "step": 1295 }, { "epoch": 0.1598865003238442, "grad_norm": 0.5743779540061951, "learning_rate": 1e-05, "loss": 0.5864, "step": 1296 }, { "epoch": 0.16000986953705704, "grad_norm": 0.5632287859916687, "learning_rate": 1e-05, "loss": 0.575, "step": 1297 }, { "epoch": 0.16013323875026986, "grad_norm": 0.6849400401115417, "learning_rate": 1e-05, "loss": 0.7186, "step": 1298 }, { "epoch": 0.1602566079634827, "grad_norm": 0.5192420482635498, "learning_rate": 1e-05, "loss": 0.4858, "step": 1299 }, { "epoch": 0.16037997717669555, "grad_norm": 0.6459532380104065, "learning_rate": 1e-05, "loss": 0.7666, "step": 1300 }, { "epoch": 0.1605033463899084, "grad_norm": 0.6216596961021423, "learning_rate": 1e-05, "loss": 0.593, "step": 1301 }, { "epoch": 0.16062671560312125, "grad_norm": 0.7354278564453125, "learning_rate": 1e-05, "loss": 0.6975, "step": 1302 }, { "epoch": 0.16075008481633407, "grad_norm": 0.5606255531311035, "learning_rate": 1e-05, "loss": 0.49, "step": 1303 }, { "epoch": 0.16087345402954692, "grad_norm": 0.6486828327178955, "learning_rate": 1e-05, "loss": 0.6781, "step": 1304 }, { "epoch": 0.16099682324275977, "grad_norm": 0.541122555732727, "learning_rate": 1e-05, "loss": 0.6139, "step": 1305 }, { "epoch": 0.16112019245597262, "grad_norm": 0.6152592301368713, "learning_rate": 1e-05, "loss": 0.6693, "step": 1306 }, { "epoch": 0.16124356166918546, "grad_norm": 0.5739635229110718, "learning_rate": 1e-05, "loss": 0.5568, "step": 1307 }, { "epoch": 0.16136693088239829, "grad_norm": 0.629636824131012, "learning_rate": 1e-05, "loss": 0.6936, "step": 1308 }, { "epoch": 0.16149030009561113, "grad_norm": 0.5915833711624146, "learning_rate": 1e-05, "loss": 0.6199, "step": 1309 }, { "epoch": 0.16161366930882398, "grad_norm": 0.6476815342903137, "learning_rate": 1e-05, "loss": 0.5151, "step": 1310 }, { "epoch": 0.16173703852203683, "grad_norm": 0.590911328792572, "learning_rate": 1e-05, "loss": 0.5581, "step": 1311 }, { "epoch": 0.16186040773524968, "grad_norm": 0.509678840637207, "learning_rate": 1e-05, "loss": 0.4918, "step": 1312 }, { "epoch": 0.1619837769484625, "grad_norm": 0.6263941526412964, "learning_rate": 1e-05, "loss": 0.6398, "step": 1313 }, { "epoch": 0.16210714616167535, "grad_norm": 0.5686319470405579, "learning_rate": 1e-05, "loss": 0.6004, "step": 1314 }, { "epoch": 0.1622305153748882, "grad_norm": 0.5986799597740173, "learning_rate": 1e-05, "loss": 0.6745, "step": 1315 }, { "epoch": 0.16235388458810104, "grad_norm": 0.6681434512138367, "learning_rate": 1e-05, "loss": 0.6625, "step": 1316 }, { "epoch": 0.1624772538013139, "grad_norm": 0.6577621698379517, "learning_rate": 1e-05, "loss": 0.6993, "step": 1317 }, { "epoch": 0.1626006230145267, "grad_norm": 0.5914725065231323, "learning_rate": 1e-05, "loss": 0.5595, "step": 1318 }, { "epoch": 0.16272399222773956, "grad_norm": 0.6592916250228882, "learning_rate": 1e-05, "loss": 0.7296, "step": 1319 }, { "epoch": 0.1628473614409524, "grad_norm": 0.5311395525932312, "learning_rate": 1e-05, "loss": 0.5334, "step": 1320 }, { "epoch": 0.16297073065416526, "grad_norm": 0.5023735165596008, "learning_rate": 1e-05, "loss": 0.46, "step": 1321 }, { "epoch": 0.1630940998673781, "grad_norm": 0.5933510065078735, "learning_rate": 1e-05, "loss": 0.5203, "step": 1322 }, { "epoch": 0.16321746908059093, "grad_norm": 0.5540674328804016, "learning_rate": 1e-05, "loss": 0.6019, "step": 1323 }, { "epoch": 0.16334083829380378, "grad_norm": 0.602191150188446, "learning_rate": 1e-05, "loss": 0.6871, "step": 1324 }, { "epoch": 0.16346420750701662, "grad_norm": 0.5976289510726929, "learning_rate": 1e-05, "loss": 0.6215, "step": 1325 }, { "epoch": 0.16358757672022947, "grad_norm": 0.5976904034614563, "learning_rate": 1e-05, "loss": 0.526, "step": 1326 }, { "epoch": 0.16371094593344232, "grad_norm": 0.6073179244995117, "learning_rate": 1e-05, "loss": 0.6015, "step": 1327 }, { "epoch": 0.16383431514665514, "grad_norm": 0.5847944617271423, "learning_rate": 1e-05, "loss": 0.592, "step": 1328 }, { "epoch": 0.163957684359868, "grad_norm": 0.5243585109710693, "learning_rate": 1e-05, "loss": 0.4561, "step": 1329 }, { "epoch": 0.16408105357308084, "grad_norm": 0.5911519527435303, "learning_rate": 1e-05, "loss": 0.484, "step": 1330 }, { "epoch": 0.16420442278629369, "grad_norm": 0.6088693737983704, "learning_rate": 1e-05, "loss": 0.6247, "step": 1331 }, { "epoch": 0.16432779199950653, "grad_norm": 0.5669758915901184, "learning_rate": 1e-05, "loss": 0.51, "step": 1332 }, { "epoch": 0.16445116121271935, "grad_norm": 0.5942766070365906, "learning_rate": 1e-05, "loss": 0.5767, "step": 1333 }, { "epoch": 0.1645745304259322, "grad_norm": 0.6759349703788757, "learning_rate": 1e-05, "loss": 0.6414, "step": 1334 }, { "epoch": 0.16469789963914505, "grad_norm": 0.6538611054420471, "learning_rate": 1e-05, "loss": 0.6609, "step": 1335 }, { "epoch": 0.1648212688523579, "grad_norm": 0.5967938899993896, "learning_rate": 1e-05, "loss": 0.6317, "step": 1336 }, { "epoch": 0.16494463806557075, "grad_norm": 0.5959211587905884, "learning_rate": 1e-05, "loss": 0.625, "step": 1337 }, { "epoch": 0.16506800727878357, "grad_norm": 0.6074855923652649, "learning_rate": 1e-05, "loss": 0.5704, "step": 1338 }, { "epoch": 0.16519137649199642, "grad_norm": 0.6430473923683167, "learning_rate": 1e-05, "loss": 0.5731, "step": 1339 }, { "epoch": 0.16531474570520927, "grad_norm": 0.5586942434310913, "learning_rate": 1e-05, "loss": 0.5687, "step": 1340 }, { "epoch": 0.1654381149184221, "grad_norm": 0.5503284931182861, "learning_rate": 1e-05, "loss": 0.5578, "step": 1341 }, { "epoch": 0.16556148413163496, "grad_norm": 0.6098592281341553, "learning_rate": 1e-05, "loss": 0.6323, "step": 1342 }, { "epoch": 0.16568485334484778, "grad_norm": 0.569240391254425, "learning_rate": 1e-05, "loss": 0.5375, "step": 1343 }, { "epoch": 0.16580822255806063, "grad_norm": 0.5913271903991699, "learning_rate": 1e-05, "loss": 0.7103, "step": 1344 }, { "epoch": 0.16593159177127348, "grad_norm": 0.6355246305465698, "learning_rate": 1e-05, "loss": 0.6261, "step": 1345 }, { "epoch": 0.16605496098448633, "grad_norm": 0.586956799030304, "learning_rate": 1e-05, "loss": 0.5597, "step": 1346 }, { "epoch": 0.16617833019769918, "grad_norm": 0.5783499479293823, "learning_rate": 1e-05, "loss": 0.6329, "step": 1347 }, { "epoch": 0.166301699410912, "grad_norm": 0.6544833779335022, "learning_rate": 1e-05, "loss": 0.6522, "step": 1348 }, { "epoch": 0.16642506862412484, "grad_norm": 0.6000480055809021, "learning_rate": 1e-05, "loss": 0.5712, "step": 1349 }, { "epoch": 0.1665484378373377, "grad_norm": 0.594396710395813, "learning_rate": 1e-05, "loss": 0.5381, "step": 1350 }, { "epoch": 0.16667180705055054, "grad_norm": 0.6335678100585938, "learning_rate": 1e-05, "loss": 0.515, "step": 1351 }, { "epoch": 0.1667951762637634, "grad_norm": 0.555543065071106, "learning_rate": 1e-05, "loss": 0.5868, "step": 1352 }, { "epoch": 0.1669185454769762, "grad_norm": 0.5812152028083801, "learning_rate": 1e-05, "loss": 0.5902, "step": 1353 }, { "epoch": 0.16704191469018906, "grad_norm": 0.5690121650695801, "learning_rate": 1e-05, "loss": 0.5622, "step": 1354 }, { "epoch": 0.1671652839034019, "grad_norm": 0.6021883487701416, "learning_rate": 1e-05, "loss": 0.592, "step": 1355 }, { "epoch": 0.16728865311661476, "grad_norm": 0.5338316559791565, "learning_rate": 1e-05, "loss": 0.4648, "step": 1356 }, { "epoch": 0.1674120223298276, "grad_norm": 0.5128103494644165, "learning_rate": 1e-05, "loss": 0.4683, "step": 1357 }, { "epoch": 0.16753539154304042, "grad_norm": 0.5093370676040649, "learning_rate": 1e-05, "loss": 0.4551, "step": 1358 }, { "epoch": 0.16765876075625327, "grad_norm": 0.6128836870193481, "learning_rate": 1e-05, "loss": 0.5885, "step": 1359 }, { "epoch": 0.16778212996946612, "grad_norm": 0.5074846148490906, "learning_rate": 1e-05, "loss": 0.5139, "step": 1360 }, { "epoch": 0.16790549918267897, "grad_norm": 0.647907555103302, "learning_rate": 1e-05, "loss": 0.6813, "step": 1361 }, { "epoch": 0.16802886839589182, "grad_norm": 0.6299780607223511, "learning_rate": 1e-05, "loss": 0.6473, "step": 1362 }, { "epoch": 0.16815223760910464, "grad_norm": 0.6567357778549194, "learning_rate": 1e-05, "loss": 0.6325, "step": 1363 }, { "epoch": 0.16827560682231749, "grad_norm": 0.6330839991569519, "learning_rate": 1e-05, "loss": 0.6685, "step": 1364 }, { "epoch": 0.16839897603553033, "grad_norm": 0.6043751835823059, "learning_rate": 1e-05, "loss": 0.652, "step": 1365 }, { "epoch": 0.16852234524874318, "grad_norm": 0.5553057193756104, "learning_rate": 1e-05, "loss": 0.5542, "step": 1366 }, { "epoch": 0.16864571446195603, "grad_norm": 0.6436865925788879, "learning_rate": 1e-05, "loss": 0.5013, "step": 1367 }, { "epoch": 0.16876908367516885, "grad_norm": 0.7386382818222046, "learning_rate": 1e-05, "loss": 0.6994, "step": 1368 }, { "epoch": 0.1688924528883817, "grad_norm": 0.5559189915657043, "learning_rate": 1e-05, "loss": 0.4895, "step": 1369 }, { "epoch": 0.16901582210159455, "grad_norm": 0.5614380240440369, "learning_rate": 1e-05, "loss": 0.5497, "step": 1370 }, { "epoch": 0.1691391913148074, "grad_norm": 0.5921884775161743, "learning_rate": 1e-05, "loss": 0.6324, "step": 1371 }, { "epoch": 0.16926256052802024, "grad_norm": 0.5830715894699097, "learning_rate": 1e-05, "loss": 0.5426, "step": 1372 }, { "epoch": 0.16938592974123307, "grad_norm": 0.5852150321006775, "learning_rate": 1e-05, "loss": 0.6086, "step": 1373 }, { "epoch": 0.16950929895444591, "grad_norm": 0.5378096103668213, "learning_rate": 1e-05, "loss": 0.5009, "step": 1374 }, { "epoch": 0.16963266816765876, "grad_norm": 0.5302000045776367, "learning_rate": 1e-05, "loss": 0.4483, "step": 1375 }, { "epoch": 0.1697560373808716, "grad_norm": 0.6090794205665588, "learning_rate": 1e-05, "loss": 0.6235, "step": 1376 }, { "epoch": 0.16987940659408446, "grad_norm": 0.5620583891868591, "learning_rate": 1e-05, "loss": 0.547, "step": 1377 }, { "epoch": 0.17000277580729728, "grad_norm": 0.5836871862411499, "learning_rate": 1e-05, "loss": 0.5834, "step": 1378 }, { "epoch": 0.17012614502051013, "grad_norm": 0.5769467353820801, "learning_rate": 1e-05, "loss": 0.5426, "step": 1379 }, { "epoch": 0.17024951423372298, "grad_norm": 0.5864458084106445, "learning_rate": 1e-05, "loss": 0.5739, "step": 1380 }, { "epoch": 0.17037288344693582, "grad_norm": 0.6256905794143677, "learning_rate": 1e-05, "loss": 0.6204, "step": 1381 }, { "epoch": 0.17049625266014867, "grad_norm": 0.6458309292793274, "learning_rate": 1e-05, "loss": 0.586, "step": 1382 }, { "epoch": 0.1706196218733615, "grad_norm": 0.6578165888786316, "learning_rate": 1e-05, "loss": 0.6808, "step": 1383 }, { "epoch": 0.17074299108657434, "grad_norm": 0.5168820023536682, "learning_rate": 1e-05, "loss": 0.4653, "step": 1384 }, { "epoch": 0.1708663602997872, "grad_norm": 0.5833832025527954, "learning_rate": 1e-05, "loss": 0.5774, "step": 1385 }, { "epoch": 0.17098972951300004, "grad_norm": 0.61765056848526, "learning_rate": 1e-05, "loss": 0.6041, "step": 1386 }, { "epoch": 0.1711130987262129, "grad_norm": 0.6491623520851135, "learning_rate": 1e-05, "loss": 0.6629, "step": 1387 }, { "epoch": 0.1712364679394257, "grad_norm": 0.6325035095214844, "learning_rate": 1e-05, "loss": 0.6133, "step": 1388 }, { "epoch": 0.17135983715263856, "grad_norm": 0.7338211536407471, "learning_rate": 1e-05, "loss": 0.6343, "step": 1389 }, { "epoch": 0.1714832063658514, "grad_norm": 0.5915918946266174, "learning_rate": 1e-05, "loss": 0.6377, "step": 1390 }, { "epoch": 0.17160657557906425, "grad_norm": 0.6097543239593506, "learning_rate": 1e-05, "loss": 0.6482, "step": 1391 }, { "epoch": 0.1717299447922771, "grad_norm": 0.6087876558303833, "learning_rate": 1e-05, "loss": 0.6881, "step": 1392 }, { "epoch": 0.17185331400548992, "grad_norm": 0.5841989517211914, "learning_rate": 1e-05, "loss": 0.6182, "step": 1393 }, { "epoch": 0.17197668321870277, "grad_norm": 0.6268092393875122, "learning_rate": 1e-05, "loss": 0.7078, "step": 1394 }, { "epoch": 0.17210005243191562, "grad_norm": 0.6044963002204895, "learning_rate": 1e-05, "loss": 0.5918, "step": 1395 }, { "epoch": 0.17222342164512847, "grad_norm": 0.6009758114814758, "learning_rate": 1e-05, "loss": 0.6411, "step": 1396 }, { "epoch": 0.17234679085834131, "grad_norm": 0.5776037573814392, "learning_rate": 1e-05, "loss": 0.6082, "step": 1397 }, { "epoch": 0.17247016007155413, "grad_norm": 0.5917057394981384, "learning_rate": 1e-05, "loss": 0.5718, "step": 1398 }, { "epoch": 0.17259352928476698, "grad_norm": 0.5985581278800964, "learning_rate": 1e-05, "loss": 0.5835, "step": 1399 }, { "epoch": 0.17271689849797983, "grad_norm": 0.5935748219490051, "learning_rate": 1e-05, "loss": 0.5711, "step": 1400 }, { "epoch": 0.17284026771119268, "grad_norm": 0.6110780239105225, "learning_rate": 1e-05, "loss": 0.6682, "step": 1401 }, { "epoch": 0.17296363692440553, "grad_norm": 0.6975710988044739, "learning_rate": 1e-05, "loss": 0.7099, "step": 1402 }, { "epoch": 0.17308700613761835, "grad_norm": 0.5978606343269348, "learning_rate": 1e-05, "loss": 0.524, "step": 1403 }, { "epoch": 0.1732103753508312, "grad_norm": 0.5167611837387085, "learning_rate": 1e-05, "loss": 0.5387, "step": 1404 }, { "epoch": 0.17333374456404405, "grad_norm": 0.5569655299186707, "learning_rate": 1e-05, "loss": 0.6163, "step": 1405 }, { "epoch": 0.1734571137772569, "grad_norm": 0.6026011109352112, "learning_rate": 1e-05, "loss": 0.5896, "step": 1406 }, { "epoch": 0.17358048299046971, "grad_norm": 0.6555896997451782, "learning_rate": 1e-05, "loss": 0.7488, "step": 1407 }, { "epoch": 0.17370385220368256, "grad_norm": 0.6141147613525391, "learning_rate": 1e-05, "loss": 0.6798, "step": 1408 }, { "epoch": 0.1738272214168954, "grad_norm": 0.5758657455444336, "learning_rate": 1e-05, "loss": 0.5912, "step": 1409 }, { "epoch": 0.17395059063010826, "grad_norm": 0.604698896408081, "learning_rate": 1e-05, "loss": 0.5879, "step": 1410 }, { "epoch": 0.1740739598433211, "grad_norm": 0.6622260808944702, "learning_rate": 1e-05, "loss": 0.6405, "step": 1411 }, { "epoch": 0.17419732905653393, "grad_norm": 0.541377067565918, "learning_rate": 1e-05, "loss": 0.6193, "step": 1412 }, { "epoch": 0.17432069826974678, "grad_norm": 0.5724209547042847, "learning_rate": 1e-05, "loss": 0.6297, "step": 1413 }, { "epoch": 0.17444406748295962, "grad_norm": 0.5879709720611572, "learning_rate": 1e-05, "loss": 0.599, "step": 1414 }, { "epoch": 0.17456743669617247, "grad_norm": 0.5970587730407715, "learning_rate": 1e-05, "loss": 0.5685, "step": 1415 }, { "epoch": 0.17469080590938532, "grad_norm": 0.6659982800483704, "learning_rate": 1e-05, "loss": 0.6414, "step": 1416 }, { "epoch": 0.17481417512259814, "grad_norm": 0.5296661853790283, "learning_rate": 1e-05, "loss": 0.5869, "step": 1417 }, { "epoch": 0.174937544335811, "grad_norm": 0.5568834543228149, "learning_rate": 1e-05, "loss": 0.4926, "step": 1418 }, { "epoch": 0.17506091354902384, "grad_norm": 0.6285920143127441, "learning_rate": 1e-05, "loss": 0.7082, "step": 1419 }, { "epoch": 0.1751842827622367, "grad_norm": 0.6357538104057312, "learning_rate": 1e-05, "loss": 0.6775, "step": 1420 }, { "epoch": 0.17530765197544954, "grad_norm": 0.5593284368515015, "learning_rate": 1e-05, "loss": 0.544, "step": 1421 }, { "epoch": 0.17543102118866236, "grad_norm": 0.6290630102157593, "learning_rate": 1e-05, "loss": 0.6143, "step": 1422 }, { "epoch": 0.1755543904018752, "grad_norm": 0.6148583889007568, "learning_rate": 1e-05, "loss": 0.6627, "step": 1423 }, { "epoch": 0.17567775961508805, "grad_norm": 0.6073105931282043, "learning_rate": 1e-05, "loss": 0.5965, "step": 1424 }, { "epoch": 0.1758011288283009, "grad_norm": 0.5772759914398193, "learning_rate": 1e-05, "loss": 0.5912, "step": 1425 }, { "epoch": 0.17592449804151375, "grad_norm": 0.5804688334465027, "learning_rate": 1e-05, "loss": 0.5924, "step": 1426 }, { "epoch": 0.17604786725472657, "grad_norm": 0.5630443096160889, "learning_rate": 1e-05, "loss": 0.5233, "step": 1427 }, { "epoch": 0.17617123646793942, "grad_norm": 0.5764265060424805, "learning_rate": 1e-05, "loss": 0.5345, "step": 1428 }, { "epoch": 0.17629460568115227, "grad_norm": 0.6152485609054565, "learning_rate": 1e-05, "loss": 0.5919, "step": 1429 }, { "epoch": 0.17641797489436511, "grad_norm": 0.5950880646705627, "learning_rate": 1e-05, "loss": 0.6514, "step": 1430 }, { "epoch": 0.17654134410757796, "grad_norm": 0.6025393605232239, "learning_rate": 1e-05, "loss": 0.7194, "step": 1431 }, { "epoch": 0.17666471332079078, "grad_norm": 0.5491715669631958, "learning_rate": 1e-05, "loss": 0.5058, "step": 1432 }, { "epoch": 0.17678808253400363, "grad_norm": 0.6280636787414551, "learning_rate": 1e-05, "loss": 0.5789, "step": 1433 }, { "epoch": 0.17691145174721648, "grad_norm": 0.5515477657318115, "learning_rate": 1e-05, "loss": 0.5348, "step": 1434 }, { "epoch": 0.17703482096042933, "grad_norm": 0.5892338156700134, "learning_rate": 1e-05, "loss": 0.6233, "step": 1435 }, { "epoch": 0.17715819017364218, "grad_norm": 0.5776546001434326, "learning_rate": 1e-05, "loss": 0.5482, "step": 1436 }, { "epoch": 0.177281559386855, "grad_norm": 0.5223098397254944, "learning_rate": 1e-05, "loss": 0.5024, "step": 1437 }, { "epoch": 0.17740492860006785, "grad_norm": 0.6256270408630371, "learning_rate": 1e-05, "loss": 0.6201, "step": 1438 }, { "epoch": 0.1775282978132807, "grad_norm": 0.6648322939872742, "learning_rate": 1e-05, "loss": 0.6278, "step": 1439 }, { "epoch": 0.17765166702649354, "grad_norm": 0.6399833559989929, "learning_rate": 1e-05, "loss": 0.6926, "step": 1440 }, { "epoch": 0.1777750362397064, "grad_norm": 0.6330356001853943, "learning_rate": 1e-05, "loss": 0.625, "step": 1441 }, { "epoch": 0.1778984054529192, "grad_norm": 0.6051684617996216, "learning_rate": 1e-05, "loss": 0.6552, "step": 1442 }, { "epoch": 0.17802177466613206, "grad_norm": 0.6318832039833069, "learning_rate": 1e-05, "loss": 0.5823, "step": 1443 }, { "epoch": 0.1781451438793449, "grad_norm": 0.6064520478248596, "learning_rate": 1e-05, "loss": 0.5613, "step": 1444 }, { "epoch": 0.17826851309255776, "grad_norm": 0.5499850511550903, "learning_rate": 1e-05, "loss": 0.5586, "step": 1445 }, { "epoch": 0.1783918823057706, "grad_norm": 0.5785058736801147, "learning_rate": 1e-05, "loss": 0.5627, "step": 1446 }, { "epoch": 0.17851525151898343, "grad_norm": 0.561181902885437, "learning_rate": 1e-05, "loss": 0.5379, "step": 1447 }, { "epoch": 0.17863862073219627, "grad_norm": 0.6299298405647278, "learning_rate": 1e-05, "loss": 0.63, "step": 1448 }, { "epoch": 0.17876198994540912, "grad_norm": 0.6563483476638794, "learning_rate": 1e-05, "loss": 0.6435, "step": 1449 }, { "epoch": 0.17888535915862197, "grad_norm": 0.5572364926338196, "learning_rate": 1e-05, "loss": 0.5211, "step": 1450 }, { "epoch": 0.17900872837183482, "grad_norm": 0.6478597521781921, "learning_rate": 1e-05, "loss": 0.5886, "step": 1451 }, { "epoch": 0.17913209758504764, "grad_norm": 0.5429078340530396, "learning_rate": 1e-05, "loss": 0.5041, "step": 1452 }, { "epoch": 0.1792554667982605, "grad_norm": 0.585900604724884, "learning_rate": 1e-05, "loss": 0.6369, "step": 1453 }, { "epoch": 0.17937883601147334, "grad_norm": 0.6304806470870972, "learning_rate": 1e-05, "loss": 0.6229, "step": 1454 }, { "epoch": 0.17950220522468618, "grad_norm": 0.6310139894485474, "learning_rate": 1e-05, "loss": 0.6237, "step": 1455 }, { "epoch": 0.17962557443789903, "grad_norm": 0.5698610544204712, "learning_rate": 1e-05, "loss": 0.4986, "step": 1456 }, { "epoch": 0.17974894365111185, "grad_norm": 0.627486526966095, "learning_rate": 1e-05, "loss": 0.6241, "step": 1457 }, { "epoch": 0.1798723128643247, "grad_norm": 0.6258780360221863, "learning_rate": 1e-05, "loss": 0.6076, "step": 1458 }, { "epoch": 0.17999568207753755, "grad_norm": 0.6134940385818481, "learning_rate": 1e-05, "loss": 0.5275, "step": 1459 }, { "epoch": 0.1801190512907504, "grad_norm": 0.6201620697975159, "learning_rate": 1e-05, "loss": 0.5893, "step": 1460 }, { "epoch": 0.18024242050396325, "grad_norm": 0.5738028287887573, "learning_rate": 1e-05, "loss": 0.5707, "step": 1461 }, { "epoch": 0.18036578971717607, "grad_norm": 0.5581071972846985, "learning_rate": 1e-05, "loss": 0.5809, "step": 1462 }, { "epoch": 0.18048915893038892, "grad_norm": 0.5761184692382812, "learning_rate": 1e-05, "loss": 0.6088, "step": 1463 }, { "epoch": 0.18061252814360176, "grad_norm": 0.7510678172111511, "learning_rate": 1e-05, "loss": 0.6925, "step": 1464 }, { "epoch": 0.1807358973568146, "grad_norm": 0.6510051488876343, "learning_rate": 1e-05, "loss": 0.644, "step": 1465 }, { "epoch": 0.18085926657002746, "grad_norm": 0.6663166284561157, "learning_rate": 1e-05, "loss": 0.6859, "step": 1466 }, { "epoch": 0.18098263578324028, "grad_norm": 0.49531224370002747, "learning_rate": 1e-05, "loss": 0.5002, "step": 1467 }, { "epoch": 0.18110600499645313, "grad_norm": 0.6285102963447571, "learning_rate": 1e-05, "loss": 0.6281, "step": 1468 }, { "epoch": 0.18122937420966598, "grad_norm": 0.6583281755447388, "learning_rate": 1e-05, "loss": 0.6914, "step": 1469 }, { "epoch": 0.18135274342287883, "grad_norm": 0.6010138988494873, "learning_rate": 1e-05, "loss": 0.5857, "step": 1470 }, { "epoch": 0.18147611263609167, "grad_norm": 0.6035161018371582, "learning_rate": 1e-05, "loss": 0.61, "step": 1471 }, { "epoch": 0.1815994818493045, "grad_norm": 0.5825844407081604, "learning_rate": 1e-05, "loss": 0.5817, "step": 1472 }, { "epoch": 0.18172285106251734, "grad_norm": 0.6423900723457336, "learning_rate": 1e-05, "loss": 0.6537, "step": 1473 }, { "epoch": 0.1818462202757302, "grad_norm": 0.5706498622894287, "learning_rate": 1e-05, "loss": 0.5857, "step": 1474 }, { "epoch": 0.18196958948894304, "grad_norm": 0.5542414784431458, "learning_rate": 1e-05, "loss": 0.5646, "step": 1475 }, { "epoch": 0.1820929587021559, "grad_norm": 0.562289297580719, "learning_rate": 1e-05, "loss": 0.585, "step": 1476 }, { "epoch": 0.1822163279153687, "grad_norm": 0.5702707171440125, "learning_rate": 1e-05, "loss": 0.5707, "step": 1477 }, { "epoch": 0.18233969712858156, "grad_norm": 0.5284931659698486, "learning_rate": 1e-05, "loss": 0.5096, "step": 1478 }, { "epoch": 0.1824630663417944, "grad_norm": 0.6386755704879761, "learning_rate": 1e-05, "loss": 0.6718, "step": 1479 }, { "epoch": 0.18258643555500725, "grad_norm": 0.7288224697113037, "learning_rate": 1e-05, "loss": 0.7039, "step": 1480 }, { "epoch": 0.1827098047682201, "grad_norm": 0.6231094002723694, "learning_rate": 1e-05, "loss": 0.592, "step": 1481 }, { "epoch": 0.18283317398143292, "grad_norm": 0.563120424747467, "learning_rate": 1e-05, "loss": 0.6013, "step": 1482 }, { "epoch": 0.18295654319464577, "grad_norm": 0.5715072751045227, "learning_rate": 1e-05, "loss": 0.5906, "step": 1483 }, { "epoch": 0.18307991240785862, "grad_norm": 0.5498408079147339, "learning_rate": 1e-05, "loss": 0.5702, "step": 1484 }, { "epoch": 0.18320328162107147, "grad_norm": 0.7068774700164795, "learning_rate": 1e-05, "loss": 0.6292, "step": 1485 }, { "epoch": 0.18332665083428432, "grad_norm": 0.5630149245262146, "learning_rate": 1e-05, "loss": 0.5004, "step": 1486 }, { "epoch": 0.18345002004749714, "grad_norm": 0.5463991165161133, "learning_rate": 1e-05, "loss": 0.4604, "step": 1487 }, { "epoch": 0.18357338926070998, "grad_norm": 0.5421369075775146, "learning_rate": 1e-05, "loss": 0.5417, "step": 1488 }, { "epoch": 0.18369675847392283, "grad_norm": 0.5781387090682983, "learning_rate": 1e-05, "loss": 0.5379, "step": 1489 }, { "epoch": 0.18382012768713568, "grad_norm": 0.5993568301200867, "learning_rate": 1e-05, "loss": 0.5386, "step": 1490 }, { "epoch": 0.18394349690034853, "grad_norm": 0.5155007243156433, "learning_rate": 1e-05, "loss": 0.4931, "step": 1491 }, { "epoch": 0.18406686611356135, "grad_norm": 0.5355122685432434, "learning_rate": 1e-05, "loss": 0.5043, "step": 1492 }, { "epoch": 0.1841902353267742, "grad_norm": 0.6011319756507874, "learning_rate": 1e-05, "loss": 0.6595, "step": 1493 }, { "epoch": 0.18431360453998705, "grad_norm": 0.6980515718460083, "learning_rate": 1e-05, "loss": 0.7192, "step": 1494 }, { "epoch": 0.1844369737531999, "grad_norm": 0.5503998398780823, "learning_rate": 1e-05, "loss": 0.5452, "step": 1495 }, { "epoch": 0.18456034296641274, "grad_norm": 0.6555482745170593, "learning_rate": 1e-05, "loss": 0.6121, "step": 1496 }, { "epoch": 0.18468371217962556, "grad_norm": 0.712791919708252, "learning_rate": 1e-05, "loss": 0.6231, "step": 1497 }, { "epoch": 0.1848070813928384, "grad_norm": 0.6144706606864929, "learning_rate": 1e-05, "loss": 0.5042, "step": 1498 }, { "epoch": 0.18493045060605126, "grad_norm": 0.6046552062034607, "learning_rate": 1e-05, "loss": 0.6898, "step": 1499 }, { "epoch": 0.1850538198192641, "grad_norm": 0.6722352504730225, "learning_rate": 1e-05, "loss": 0.6953, "step": 1500 }, { "epoch": 0.18517718903247696, "grad_norm": 0.6160069108009338, "learning_rate": 1e-05, "loss": 0.573, "step": 1501 }, { "epoch": 0.18530055824568978, "grad_norm": 0.6124862432479858, "learning_rate": 1e-05, "loss": 0.5774, "step": 1502 }, { "epoch": 0.18542392745890263, "grad_norm": 0.6140773296356201, "learning_rate": 1e-05, "loss": 0.6536, "step": 1503 }, { "epoch": 0.18554729667211547, "grad_norm": 0.5490710139274597, "learning_rate": 1e-05, "loss": 0.5421, "step": 1504 }, { "epoch": 0.18567066588532832, "grad_norm": 0.5625138282775879, "learning_rate": 1e-05, "loss": 0.5105, "step": 1505 }, { "epoch": 0.18579403509854117, "grad_norm": 0.5470868945121765, "learning_rate": 1e-05, "loss": 0.5871, "step": 1506 }, { "epoch": 0.185917404311754, "grad_norm": 0.6492623686790466, "learning_rate": 1e-05, "loss": 0.6365, "step": 1507 }, { "epoch": 0.18604077352496684, "grad_norm": 0.6457206606864929, "learning_rate": 1e-05, "loss": 0.706, "step": 1508 }, { "epoch": 0.1861641427381797, "grad_norm": 0.5955114364624023, "learning_rate": 1e-05, "loss": 0.5565, "step": 1509 }, { "epoch": 0.18628751195139254, "grad_norm": 0.4766409993171692, "learning_rate": 1e-05, "loss": 0.44, "step": 1510 }, { "epoch": 0.18641088116460539, "grad_norm": 0.6656385064125061, "learning_rate": 1e-05, "loss": 0.703, "step": 1511 }, { "epoch": 0.1865342503778182, "grad_norm": 0.6941162943840027, "learning_rate": 1e-05, "loss": 0.6065, "step": 1512 }, { "epoch": 0.18665761959103105, "grad_norm": 0.6399810314178467, "learning_rate": 1e-05, "loss": 0.6489, "step": 1513 }, { "epoch": 0.1867809888042439, "grad_norm": 0.5990459322929382, "learning_rate": 1e-05, "loss": 0.6285, "step": 1514 }, { "epoch": 0.18690435801745675, "grad_norm": 0.5420210957527161, "learning_rate": 1e-05, "loss": 0.5751, "step": 1515 }, { "epoch": 0.1870277272306696, "grad_norm": 0.6132910251617432, "learning_rate": 1e-05, "loss": 0.4807, "step": 1516 }, { "epoch": 0.18715109644388242, "grad_norm": 0.5285121202468872, "learning_rate": 1e-05, "loss": 0.5335, "step": 1517 }, { "epoch": 0.18727446565709527, "grad_norm": 0.5822078585624695, "learning_rate": 1e-05, "loss": 0.5568, "step": 1518 }, { "epoch": 0.18739783487030812, "grad_norm": 0.5513888597488403, "learning_rate": 1e-05, "loss": 0.526, "step": 1519 }, { "epoch": 0.18752120408352096, "grad_norm": 0.7042272686958313, "learning_rate": 1e-05, "loss": 0.7255, "step": 1520 }, { "epoch": 0.1876445732967338, "grad_norm": 0.553839385509491, "learning_rate": 1e-05, "loss": 0.5835, "step": 1521 }, { "epoch": 0.18776794250994663, "grad_norm": 0.643916666507721, "learning_rate": 1e-05, "loss": 0.6251, "step": 1522 }, { "epoch": 0.18789131172315948, "grad_norm": 0.5861712694168091, "learning_rate": 1e-05, "loss": 0.5534, "step": 1523 }, { "epoch": 0.18801468093637233, "grad_norm": 0.641473650932312, "learning_rate": 1e-05, "loss": 0.6768, "step": 1524 }, { "epoch": 0.18813805014958518, "grad_norm": 0.5653306245803833, "learning_rate": 1e-05, "loss": 0.5336, "step": 1525 }, { "epoch": 0.18826141936279803, "grad_norm": 0.5235188007354736, "learning_rate": 1e-05, "loss": 0.4827, "step": 1526 }, { "epoch": 0.18838478857601085, "grad_norm": 0.6030436158180237, "learning_rate": 1e-05, "loss": 0.5779, "step": 1527 }, { "epoch": 0.1885081577892237, "grad_norm": 0.5358695983886719, "learning_rate": 1e-05, "loss": 0.5355, "step": 1528 }, { "epoch": 0.18863152700243654, "grad_norm": 0.5771702527999878, "learning_rate": 1e-05, "loss": 0.5085, "step": 1529 }, { "epoch": 0.1887548962156494, "grad_norm": 0.5979045033454895, "learning_rate": 1e-05, "loss": 0.6363, "step": 1530 }, { "epoch": 0.18887826542886224, "grad_norm": 0.5736837387084961, "learning_rate": 1e-05, "loss": 0.6257, "step": 1531 }, { "epoch": 0.18900163464207506, "grad_norm": 0.5426259636878967, "learning_rate": 1e-05, "loss": 0.5914, "step": 1532 }, { "epoch": 0.1891250038552879, "grad_norm": 0.5823668837547302, "learning_rate": 1e-05, "loss": 0.6142, "step": 1533 }, { "epoch": 0.18924837306850076, "grad_norm": 0.5909394025802612, "learning_rate": 1e-05, "loss": 0.6681, "step": 1534 }, { "epoch": 0.1893717422817136, "grad_norm": 0.5663127899169922, "learning_rate": 1e-05, "loss": 0.5384, "step": 1535 }, { "epoch": 0.18949511149492645, "grad_norm": 0.5591380596160889, "learning_rate": 1e-05, "loss": 0.5696, "step": 1536 }, { "epoch": 0.18961848070813928, "grad_norm": 0.5303158760070801, "learning_rate": 1e-05, "loss": 0.4406, "step": 1537 }, { "epoch": 0.18974184992135212, "grad_norm": 0.6327853798866272, "learning_rate": 1e-05, "loss": 0.6437, "step": 1538 }, { "epoch": 0.18986521913456497, "grad_norm": 0.5639046430587769, "learning_rate": 1e-05, "loss": 0.5249, "step": 1539 }, { "epoch": 0.18998858834777782, "grad_norm": 0.5796468257904053, "learning_rate": 1e-05, "loss": 0.5841, "step": 1540 }, { "epoch": 0.19011195756099064, "grad_norm": 0.5485278367996216, "learning_rate": 1e-05, "loss": 0.5454, "step": 1541 }, { "epoch": 0.1902353267742035, "grad_norm": 0.5273466110229492, "learning_rate": 1e-05, "loss": 0.5521, "step": 1542 }, { "epoch": 0.19035869598741634, "grad_norm": 0.5772281289100647, "learning_rate": 1e-05, "loss": 0.646, "step": 1543 }, { "epoch": 0.19048206520062919, "grad_norm": 0.6173272728919983, "learning_rate": 1e-05, "loss": 0.6488, "step": 1544 }, { "epoch": 0.19060543441384203, "grad_norm": 0.5157741904258728, "learning_rate": 1e-05, "loss": 0.5107, "step": 1545 }, { "epoch": 0.19072880362705485, "grad_norm": 0.6745272874832153, "learning_rate": 1e-05, "loss": 0.6205, "step": 1546 }, { "epoch": 0.1908521728402677, "grad_norm": 0.6059731841087341, "learning_rate": 1e-05, "loss": 0.5912, "step": 1547 }, { "epoch": 0.19097554205348055, "grad_norm": 0.6514725685119629, "learning_rate": 1e-05, "loss": 0.6088, "step": 1548 }, { "epoch": 0.1910989112666934, "grad_norm": 0.575731635093689, "learning_rate": 1e-05, "loss": 0.5541, "step": 1549 }, { "epoch": 0.19122228047990625, "grad_norm": 0.5724289417266846, "learning_rate": 1e-05, "loss": 0.6066, "step": 1550 }, { "epoch": 0.19134564969311907, "grad_norm": 0.5260459184646606, "learning_rate": 1e-05, "loss": 0.5031, "step": 1551 }, { "epoch": 0.19146901890633192, "grad_norm": 0.570237398147583, "learning_rate": 1e-05, "loss": 0.555, "step": 1552 }, { "epoch": 0.19159238811954477, "grad_norm": 0.5222923159599304, "learning_rate": 1e-05, "loss": 0.5275, "step": 1553 }, { "epoch": 0.1917157573327576, "grad_norm": 0.6447715759277344, "learning_rate": 1e-05, "loss": 0.6592, "step": 1554 }, { "epoch": 0.19183912654597046, "grad_norm": 0.5300583243370056, "learning_rate": 1e-05, "loss": 0.5335, "step": 1555 }, { "epoch": 0.19196249575918328, "grad_norm": 0.5837185382843018, "learning_rate": 1e-05, "loss": 0.6046, "step": 1556 }, { "epoch": 0.19208586497239613, "grad_norm": 0.5754802227020264, "learning_rate": 1e-05, "loss": 0.5597, "step": 1557 }, { "epoch": 0.19220923418560898, "grad_norm": 0.6380780935287476, "learning_rate": 1e-05, "loss": 0.627, "step": 1558 }, { "epoch": 0.19233260339882183, "grad_norm": 0.5764129757881165, "learning_rate": 1e-05, "loss": 0.5509, "step": 1559 }, { "epoch": 0.19245597261203468, "grad_norm": 0.6110897064208984, "learning_rate": 1e-05, "loss": 0.6843, "step": 1560 }, { "epoch": 0.1925793418252475, "grad_norm": 0.6361885666847229, "learning_rate": 1e-05, "loss": 0.5887, "step": 1561 }, { "epoch": 0.19270271103846034, "grad_norm": 0.6072838306427002, "learning_rate": 1e-05, "loss": 0.5796, "step": 1562 }, { "epoch": 0.1928260802516732, "grad_norm": 0.538295567035675, "learning_rate": 1e-05, "loss": 0.6045, "step": 1563 }, { "epoch": 0.19294944946488604, "grad_norm": 0.5595994591712952, "learning_rate": 1e-05, "loss": 0.5446, "step": 1564 }, { "epoch": 0.1930728186780989, "grad_norm": 0.48170608282089233, "learning_rate": 1e-05, "loss": 0.4312, "step": 1565 }, { "epoch": 0.1931961878913117, "grad_norm": 0.6482919454574585, "learning_rate": 1e-05, "loss": 0.6826, "step": 1566 }, { "epoch": 0.19331955710452456, "grad_norm": 0.6795153021812439, "learning_rate": 1e-05, "loss": 0.6518, "step": 1567 }, { "epoch": 0.1934429263177374, "grad_norm": 0.5681809782981873, "learning_rate": 1e-05, "loss": 0.5016, "step": 1568 }, { "epoch": 0.19356629553095026, "grad_norm": 0.6220571398735046, "learning_rate": 1e-05, "loss": 0.6288, "step": 1569 }, { "epoch": 0.1936896647441631, "grad_norm": 0.5925187468528748, "learning_rate": 1e-05, "loss": 0.5295, "step": 1570 }, { "epoch": 0.19381303395737592, "grad_norm": 0.5392367839813232, "learning_rate": 1e-05, "loss": 0.5033, "step": 1571 }, { "epoch": 0.19393640317058877, "grad_norm": 0.703919529914856, "learning_rate": 1e-05, "loss": 0.6326, "step": 1572 }, { "epoch": 0.19405977238380162, "grad_norm": 0.5347233414649963, "learning_rate": 1e-05, "loss": 0.5408, "step": 1573 }, { "epoch": 0.19418314159701447, "grad_norm": 0.7123591899871826, "learning_rate": 1e-05, "loss": 0.6444, "step": 1574 }, { "epoch": 0.19430651081022732, "grad_norm": 0.6033807396888733, "learning_rate": 1e-05, "loss": 0.6384, "step": 1575 }, { "epoch": 0.19442988002344014, "grad_norm": 0.5779603123664856, "learning_rate": 1e-05, "loss": 0.5313, "step": 1576 }, { "epoch": 0.194553249236653, "grad_norm": 0.5107507109642029, "learning_rate": 1e-05, "loss": 0.5232, "step": 1577 }, { "epoch": 0.19467661844986583, "grad_norm": 0.5911905765533447, "learning_rate": 1e-05, "loss": 0.6069, "step": 1578 }, { "epoch": 0.19479998766307868, "grad_norm": 0.6221149563789368, "learning_rate": 1e-05, "loss": 0.6712, "step": 1579 }, { "epoch": 0.19492335687629153, "grad_norm": 0.6632144451141357, "learning_rate": 1e-05, "loss": 0.6533, "step": 1580 }, { "epoch": 0.19504672608950435, "grad_norm": 0.5681166648864746, "learning_rate": 1e-05, "loss": 0.5429, "step": 1581 }, { "epoch": 0.1951700953027172, "grad_norm": 0.6039696335792542, "learning_rate": 1e-05, "loss": 0.6583, "step": 1582 }, { "epoch": 0.19529346451593005, "grad_norm": 0.6050593256950378, "learning_rate": 1e-05, "loss": 0.6962, "step": 1583 }, { "epoch": 0.1954168337291429, "grad_norm": 0.6254820227622986, "learning_rate": 1e-05, "loss": 0.6448, "step": 1584 }, { "epoch": 0.19554020294235575, "grad_norm": 0.567329466342926, "learning_rate": 1e-05, "loss": 0.5454, "step": 1585 }, { "epoch": 0.19566357215556857, "grad_norm": 0.5595303773880005, "learning_rate": 1e-05, "loss": 0.5814, "step": 1586 }, { "epoch": 0.19578694136878141, "grad_norm": 0.6104230880737305, "learning_rate": 1e-05, "loss": 0.5379, "step": 1587 }, { "epoch": 0.19591031058199426, "grad_norm": 0.6413572430610657, "learning_rate": 1e-05, "loss": 0.6114, "step": 1588 }, { "epoch": 0.1960336797952071, "grad_norm": 0.5942163467407227, "learning_rate": 1e-05, "loss": 0.533, "step": 1589 }, { "epoch": 0.19615704900841996, "grad_norm": 0.6729963421821594, "learning_rate": 1e-05, "loss": 0.6116, "step": 1590 }, { "epoch": 0.19628041822163278, "grad_norm": 0.5170100927352905, "learning_rate": 1e-05, "loss": 0.5482, "step": 1591 }, { "epoch": 0.19640378743484563, "grad_norm": 0.5389378666877747, "learning_rate": 1e-05, "loss": 0.4993, "step": 1592 }, { "epoch": 0.19652715664805848, "grad_norm": 0.5944957733154297, "learning_rate": 1e-05, "loss": 0.535, "step": 1593 }, { "epoch": 0.19665052586127132, "grad_norm": 0.7379570603370667, "learning_rate": 1e-05, "loss": 0.6865, "step": 1594 }, { "epoch": 0.19677389507448417, "grad_norm": 0.6445943117141724, "learning_rate": 1e-05, "loss": 0.707, "step": 1595 }, { "epoch": 0.196897264287697, "grad_norm": 0.700204074382782, "learning_rate": 1e-05, "loss": 0.7624, "step": 1596 }, { "epoch": 0.19702063350090984, "grad_norm": 0.5842903852462769, "learning_rate": 1e-05, "loss": 0.5367, "step": 1597 }, { "epoch": 0.1971440027141227, "grad_norm": 0.5499956011772156, "learning_rate": 1e-05, "loss": 0.5845, "step": 1598 }, { "epoch": 0.19726737192733554, "grad_norm": 0.5812950730323792, "learning_rate": 1e-05, "loss": 0.5285, "step": 1599 }, { "epoch": 0.1973907411405484, "grad_norm": 0.6025910973548889, "learning_rate": 1e-05, "loss": 0.6195, "step": 1600 }, { "epoch": 0.1975141103537612, "grad_norm": 0.5550588965415955, "learning_rate": 1e-05, "loss": 0.5666, "step": 1601 }, { "epoch": 0.19763747956697406, "grad_norm": 0.5680839419364929, "learning_rate": 1e-05, "loss": 0.5632, "step": 1602 }, { "epoch": 0.1977608487801869, "grad_norm": 0.6280676126480103, "learning_rate": 1e-05, "loss": 0.6164, "step": 1603 }, { "epoch": 0.19788421799339975, "grad_norm": 0.6746618747711182, "learning_rate": 1e-05, "loss": 0.6885, "step": 1604 }, { "epoch": 0.1980075872066126, "grad_norm": 0.5457843542098999, "learning_rate": 1e-05, "loss": 0.5271, "step": 1605 }, { "epoch": 0.19813095641982542, "grad_norm": 0.6216585636138916, "learning_rate": 1e-05, "loss": 0.7049, "step": 1606 }, { "epoch": 0.19825432563303827, "grad_norm": 0.5875800251960754, "learning_rate": 1e-05, "loss": 0.5922, "step": 1607 }, { "epoch": 0.19837769484625112, "grad_norm": 0.5707862377166748, "learning_rate": 1e-05, "loss": 0.6028, "step": 1608 }, { "epoch": 0.19850106405946397, "grad_norm": 0.5272007584571838, "learning_rate": 1e-05, "loss": 0.5499, "step": 1609 }, { "epoch": 0.19862443327267681, "grad_norm": 0.6565138101577759, "learning_rate": 1e-05, "loss": 0.6819, "step": 1610 }, { "epoch": 0.19874780248588964, "grad_norm": 0.6510030627250671, "learning_rate": 1e-05, "loss": 0.7122, "step": 1611 }, { "epoch": 0.19887117169910248, "grad_norm": 0.6345839500427246, "learning_rate": 1e-05, "loss": 0.6748, "step": 1612 }, { "epoch": 0.19899454091231533, "grad_norm": 0.5779432058334351, "learning_rate": 1e-05, "loss": 0.5685, "step": 1613 }, { "epoch": 0.19911791012552818, "grad_norm": 0.5408890843391418, "learning_rate": 1e-05, "loss": 0.6182, "step": 1614 }, { "epoch": 0.19924127933874103, "grad_norm": 0.6218544244766235, "learning_rate": 1e-05, "loss": 0.6144, "step": 1615 }, { "epoch": 0.19936464855195385, "grad_norm": 0.5932675004005432, "learning_rate": 1e-05, "loss": 0.6153, "step": 1616 }, { "epoch": 0.1994880177651667, "grad_norm": 0.5167198777198792, "learning_rate": 1e-05, "loss": 0.4873, "step": 1617 }, { "epoch": 0.19961138697837955, "grad_norm": 0.5708170533180237, "learning_rate": 1e-05, "loss": 0.5313, "step": 1618 }, { "epoch": 0.1997347561915924, "grad_norm": 0.5472599267959595, "learning_rate": 1e-05, "loss": 0.5141, "step": 1619 }, { "epoch": 0.19985812540480524, "grad_norm": 0.6770691871643066, "learning_rate": 1e-05, "loss": 0.6626, "step": 1620 }, { "epoch": 0.19998149461801806, "grad_norm": 0.6172374486923218, "learning_rate": 1e-05, "loss": 0.5215, "step": 1621 }, { "epoch": 0.2001048638312309, "grad_norm": 0.5935702919960022, "learning_rate": 1e-05, "loss": 0.5859, "step": 1622 }, { "epoch": 0.20022823304444376, "grad_norm": 0.5725869536399841, "learning_rate": 1e-05, "loss": 0.5655, "step": 1623 }, { "epoch": 0.2003516022576566, "grad_norm": 0.6377503871917725, "learning_rate": 1e-05, "loss": 0.7188, "step": 1624 }, { "epoch": 0.20047497147086946, "grad_norm": 0.5736850500106812, "learning_rate": 1e-05, "loss": 0.6895, "step": 1625 }, { "epoch": 0.20059834068408228, "grad_norm": 0.6337167024612427, "learning_rate": 1e-05, "loss": 0.5782, "step": 1626 }, { "epoch": 0.20072170989729513, "grad_norm": 0.7355852127075195, "learning_rate": 1e-05, "loss": 0.8179, "step": 1627 }, { "epoch": 0.20084507911050797, "grad_norm": 0.6063698530197144, "learning_rate": 1e-05, "loss": 0.6404, "step": 1628 }, { "epoch": 0.20096844832372082, "grad_norm": 0.5573896169662476, "learning_rate": 1e-05, "loss": 0.5972, "step": 1629 }, { "epoch": 0.20109181753693367, "grad_norm": 0.5689536333084106, "learning_rate": 1e-05, "loss": 0.5367, "step": 1630 }, { "epoch": 0.2012151867501465, "grad_norm": 0.6727303862571716, "learning_rate": 1e-05, "loss": 0.5798, "step": 1631 }, { "epoch": 0.20133855596335934, "grad_norm": 0.5881199240684509, "learning_rate": 1e-05, "loss": 0.5276, "step": 1632 }, { "epoch": 0.2014619251765722, "grad_norm": 0.5947594046592712, "learning_rate": 1e-05, "loss": 0.6709, "step": 1633 }, { "epoch": 0.20158529438978504, "grad_norm": 0.6291419863700867, "learning_rate": 1e-05, "loss": 0.6131, "step": 1634 }, { "epoch": 0.20170866360299788, "grad_norm": 0.512417197227478, "learning_rate": 1e-05, "loss": 0.4932, "step": 1635 }, { "epoch": 0.2018320328162107, "grad_norm": 0.6602473855018616, "learning_rate": 1e-05, "loss": 0.6786, "step": 1636 }, { "epoch": 0.20195540202942355, "grad_norm": 0.5495691299438477, "learning_rate": 1e-05, "loss": 0.4951, "step": 1637 }, { "epoch": 0.2020787712426364, "grad_norm": 0.5559059381484985, "learning_rate": 1e-05, "loss": 0.5621, "step": 1638 }, { "epoch": 0.20220214045584925, "grad_norm": 0.6657743453979492, "learning_rate": 1e-05, "loss": 0.5779, "step": 1639 }, { "epoch": 0.2023255096690621, "grad_norm": 0.505855143070221, "learning_rate": 1e-05, "loss": 0.4619, "step": 1640 }, { "epoch": 0.20244887888227492, "grad_norm": 0.6898963451385498, "learning_rate": 1e-05, "loss": 0.6052, "step": 1641 }, { "epoch": 0.20257224809548777, "grad_norm": 0.6166219711303711, "learning_rate": 1e-05, "loss": 0.5905, "step": 1642 }, { "epoch": 0.20269561730870062, "grad_norm": 0.5965019464492798, "learning_rate": 1e-05, "loss": 0.546, "step": 1643 }, { "epoch": 0.20281898652191346, "grad_norm": 0.5930303335189819, "learning_rate": 1e-05, "loss": 0.5503, "step": 1644 }, { "epoch": 0.2029423557351263, "grad_norm": 0.5842453241348267, "learning_rate": 1e-05, "loss": 0.6277, "step": 1645 }, { "epoch": 0.20306572494833913, "grad_norm": 0.6046603322029114, "learning_rate": 1e-05, "loss": 0.5461, "step": 1646 }, { "epoch": 0.20318909416155198, "grad_norm": 0.5910404324531555, "learning_rate": 1e-05, "loss": 0.5835, "step": 1647 }, { "epoch": 0.20331246337476483, "grad_norm": 0.5699877142906189, "learning_rate": 1e-05, "loss": 0.6353, "step": 1648 }, { "epoch": 0.20343583258797768, "grad_norm": 0.5754281878471375, "learning_rate": 1e-05, "loss": 0.5975, "step": 1649 }, { "epoch": 0.20355920180119053, "grad_norm": 0.602631151676178, "learning_rate": 1e-05, "loss": 0.5677, "step": 1650 }, { "epoch": 0.20368257101440335, "grad_norm": 0.6614733934402466, "learning_rate": 1e-05, "loss": 0.748, "step": 1651 }, { "epoch": 0.2038059402276162, "grad_norm": 0.5375634431838989, "learning_rate": 1e-05, "loss": 0.5685, "step": 1652 }, { "epoch": 0.20392930944082904, "grad_norm": 0.6591721773147583, "learning_rate": 1e-05, "loss": 0.5866, "step": 1653 }, { "epoch": 0.2040526786540419, "grad_norm": 0.5911968946456909, "learning_rate": 1e-05, "loss": 0.574, "step": 1654 }, { "epoch": 0.20417604786725474, "grad_norm": 0.7042771577835083, "learning_rate": 1e-05, "loss": 0.6681, "step": 1655 }, { "epoch": 0.20429941708046756, "grad_norm": 0.531428873538971, "learning_rate": 1e-05, "loss": 0.5915, "step": 1656 }, { "epoch": 0.2044227862936804, "grad_norm": 0.597179651260376, "learning_rate": 1e-05, "loss": 0.6177, "step": 1657 }, { "epoch": 0.20454615550689326, "grad_norm": 0.6180896162986755, "learning_rate": 1e-05, "loss": 0.6732, "step": 1658 }, { "epoch": 0.2046695247201061, "grad_norm": 0.5492801070213318, "learning_rate": 1e-05, "loss": 0.5791, "step": 1659 }, { "epoch": 0.20479289393331895, "grad_norm": 0.5956289768218994, "learning_rate": 1e-05, "loss": 0.5491, "step": 1660 }, { "epoch": 0.20491626314653177, "grad_norm": 0.6299879550933838, "learning_rate": 1e-05, "loss": 0.5934, "step": 1661 }, { "epoch": 0.20503963235974462, "grad_norm": 0.6654905080795288, "learning_rate": 1e-05, "loss": 0.7265, "step": 1662 }, { "epoch": 0.20516300157295747, "grad_norm": 0.5912556052207947, "learning_rate": 1e-05, "loss": 0.5692, "step": 1663 }, { "epoch": 0.20528637078617032, "grad_norm": 0.6312081813812256, "learning_rate": 1e-05, "loss": 0.6732, "step": 1664 }, { "epoch": 0.20540973999938317, "grad_norm": 0.6003246307373047, "learning_rate": 1e-05, "loss": 0.6642, "step": 1665 }, { "epoch": 0.205533109212596, "grad_norm": 0.6399087905883789, "learning_rate": 1e-05, "loss": 0.6027, "step": 1666 }, { "epoch": 0.20565647842580884, "grad_norm": 0.5954822301864624, "learning_rate": 1e-05, "loss": 0.602, "step": 1667 }, { "epoch": 0.20577984763902168, "grad_norm": 0.5352650284767151, "learning_rate": 1e-05, "loss": 0.5384, "step": 1668 }, { "epoch": 0.20590321685223453, "grad_norm": 0.6956432461738586, "learning_rate": 1e-05, "loss": 0.6727, "step": 1669 }, { "epoch": 0.20602658606544738, "grad_norm": 0.6571081280708313, "learning_rate": 1e-05, "loss": 0.621, "step": 1670 }, { "epoch": 0.2061499552786602, "grad_norm": 0.6039599180221558, "learning_rate": 1e-05, "loss": 0.6944, "step": 1671 }, { "epoch": 0.20627332449187305, "grad_norm": 0.5922442078590393, "learning_rate": 1e-05, "loss": 0.6112, "step": 1672 }, { "epoch": 0.2063966937050859, "grad_norm": 0.6135786771774292, "learning_rate": 1e-05, "loss": 0.5685, "step": 1673 }, { "epoch": 0.20652006291829875, "grad_norm": 0.583037257194519, "learning_rate": 1e-05, "loss": 0.568, "step": 1674 }, { "epoch": 0.20664343213151157, "grad_norm": 0.737629771232605, "learning_rate": 1e-05, "loss": 0.6714, "step": 1675 }, { "epoch": 0.20676680134472442, "grad_norm": 0.5540207624435425, "learning_rate": 1e-05, "loss": 0.5559, "step": 1676 }, { "epoch": 0.20689017055793726, "grad_norm": 0.5732203125953674, "learning_rate": 1e-05, "loss": 0.5696, "step": 1677 }, { "epoch": 0.2070135397711501, "grad_norm": 0.5346493721008301, "learning_rate": 1e-05, "loss": 0.5321, "step": 1678 }, { "epoch": 0.20713690898436296, "grad_norm": 0.5572640895843506, "learning_rate": 1e-05, "loss": 0.6442, "step": 1679 }, { "epoch": 0.20726027819757578, "grad_norm": 0.5810232758522034, "learning_rate": 1e-05, "loss": 0.5833, "step": 1680 }, { "epoch": 0.20738364741078863, "grad_norm": 0.6594004034996033, "learning_rate": 1e-05, "loss": 0.6765, "step": 1681 }, { "epoch": 0.20750701662400148, "grad_norm": 0.6089377403259277, "learning_rate": 1e-05, "loss": 0.5877, "step": 1682 }, { "epoch": 0.20763038583721433, "grad_norm": 0.5567061901092529, "learning_rate": 1e-05, "loss": 0.6, "step": 1683 }, { "epoch": 0.20775375505042717, "grad_norm": 0.6173037886619568, "learning_rate": 1e-05, "loss": 0.554, "step": 1684 }, { "epoch": 0.20787712426364, "grad_norm": 0.5403650403022766, "learning_rate": 1e-05, "loss": 0.5873, "step": 1685 }, { "epoch": 0.20800049347685284, "grad_norm": 0.6527242660522461, "learning_rate": 1e-05, "loss": 0.6935, "step": 1686 }, { "epoch": 0.2081238626900657, "grad_norm": 0.6235274076461792, "learning_rate": 1e-05, "loss": 0.5342, "step": 1687 }, { "epoch": 0.20824723190327854, "grad_norm": 0.5519470572471619, "learning_rate": 1e-05, "loss": 0.5825, "step": 1688 }, { "epoch": 0.2083706011164914, "grad_norm": 0.626278281211853, "learning_rate": 1e-05, "loss": 0.629, "step": 1689 }, { "epoch": 0.2084939703297042, "grad_norm": 0.6618713140487671, "learning_rate": 1e-05, "loss": 0.6678, "step": 1690 }, { "epoch": 0.20861733954291706, "grad_norm": 0.7313177585601807, "learning_rate": 1e-05, "loss": 0.7097, "step": 1691 }, { "epoch": 0.2087407087561299, "grad_norm": 0.5412400960922241, "learning_rate": 1e-05, "loss": 0.5126, "step": 1692 }, { "epoch": 0.20886407796934275, "grad_norm": 0.5871806740760803, "learning_rate": 1e-05, "loss": 0.6004, "step": 1693 }, { "epoch": 0.2089874471825556, "grad_norm": 0.5653643608093262, "learning_rate": 1e-05, "loss": 0.5982, "step": 1694 }, { "epoch": 0.20911081639576842, "grad_norm": 0.5786752104759216, "learning_rate": 1e-05, "loss": 0.5338, "step": 1695 }, { "epoch": 0.20923418560898127, "grad_norm": 0.5235323905944824, "learning_rate": 1e-05, "loss": 0.544, "step": 1696 }, { "epoch": 0.20935755482219412, "grad_norm": 0.6362161040306091, "learning_rate": 1e-05, "loss": 0.6686, "step": 1697 }, { "epoch": 0.20948092403540697, "grad_norm": 0.5951741337776184, "learning_rate": 1e-05, "loss": 0.653, "step": 1698 }, { "epoch": 0.20960429324861982, "grad_norm": 0.7274183630943298, "learning_rate": 1e-05, "loss": 0.669, "step": 1699 }, { "epoch": 0.20972766246183264, "grad_norm": 0.5684137940406799, "learning_rate": 1e-05, "loss": 0.5249, "step": 1700 }, { "epoch": 0.20985103167504549, "grad_norm": 0.619889497756958, "learning_rate": 1e-05, "loss": 0.5725, "step": 1701 }, { "epoch": 0.20997440088825833, "grad_norm": 0.7203104496002197, "learning_rate": 1e-05, "loss": 0.6745, "step": 1702 }, { "epoch": 0.21009777010147118, "grad_norm": 0.6220672726631165, "learning_rate": 1e-05, "loss": 0.5862, "step": 1703 }, { "epoch": 0.21022113931468403, "grad_norm": 0.6278883814811707, "learning_rate": 1e-05, "loss": 0.6404, "step": 1704 }, { "epoch": 0.21034450852789685, "grad_norm": 0.6238960027694702, "learning_rate": 1e-05, "loss": 0.5893, "step": 1705 }, { "epoch": 0.2104678777411097, "grad_norm": 0.6556424498558044, "learning_rate": 1e-05, "loss": 0.6821, "step": 1706 }, { "epoch": 0.21059124695432255, "grad_norm": 0.5839439630508423, "learning_rate": 1e-05, "loss": 0.5855, "step": 1707 }, { "epoch": 0.2107146161675354, "grad_norm": 0.547321081161499, "learning_rate": 1e-05, "loss": 0.5791, "step": 1708 }, { "epoch": 0.21083798538074824, "grad_norm": 0.5750097632408142, "learning_rate": 1e-05, "loss": 0.5243, "step": 1709 }, { "epoch": 0.21096135459396106, "grad_norm": 0.5375807285308838, "learning_rate": 1e-05, "loss": 0.5332, "step": 1710 }, { "epoch": 0.2110847238071739, "grad_norm": 0.5460032224655151, "learning_rate": 1e-05, "loss": 0.5884, "step": 1711 }, { "epoch": 0.21120809302038676, "grad_norm": 0.6775709986686707, "learning_rate": 1e-05, "loss": 0.6757, "step": 1712 }, { "epoch": 0.2113314622335996, "grad_norm": 0.5894275307655334, "learning_rate": 1e-05, "loss": 0.6044, "step": 1713 }, { "epoch": 0.21145483144681246, "grad_norm": 0.5552064776420593, "learning_rate": 1e-05, "loss": 0.5954, "step": 1714 }, { "epoch": 0.21157820066002528, "grad_norm": 0.5775042176246643, "learning_rate": 1e-05, "loss": 0.6144, "step": 1715 }, { "epoch": 0.21170156987323813, "grad_norm": 0.5591100454330444, "learning_rate": 1e-05, "loss": 0.5099, "step": 1716 }, { "epoch": 0.21182493908645098, "grad_norm": 0.5912057757377625, "learning_rate": 1e-05, "loss": 0.5764, "step": 1717 }, { "epoch": 0.21194830829966382, "grad_norm": 0.6532388925552368, "learning_rate": 1e-05, "loss": 0.6968, "step": 1718 }, { "epoch": 0.21207167751287667, "grad_norm": 0.5435070395469666, "learning_rate": 1e-05, "loss": 0.4963, "step": 1719 }, { "epoch": 0.2121950467260895, "grad_norm": 0.5902355909347534, "learning_rate": 1e-05, "loss": 0.5418, "step": 1720 }, { "epoch": 0.21231841593930234, "grad_norm": 0.5596263408660889, "learning_rate": 1e-05, "loss": 0.5256, "step": 1721 }, { "epoch": 0.2124417851525152, "grad_norm": 0.6445684432983398, "learning_rate": 1e-05, "loss": 0.575, "step": 1722 }, { "epoch": 0.21256515436572804, "grad_norm": 0.6554718017578125, "learning_rate": 1e-05, "loss": 0.719, "step": 1723 }, { "epoch": 0.21268852357894089, "grad_norm": 0.5887124538421631, "learning_rate": 1e-05, "loss": 0.5903, "step": 1724 }, { "epoch": 0.2128118927921537, "grad_norm": 0.5870595574378967, "learning_rate": 1e-05, "loss": 0.5842, "step": 1725 }, { "epoch": 0.21293526200536655, "grad_norm": 0.6168197989463806, "learning_rate": 1e-05, "loss": 0.6182, "step": 1726 }, { "epoch": 0.2130586312185794, "grad_norm": 0.5936869978904724, "learning_rate": 1e-05, "loss": 0.6314, "step": 1727 }, { "epoch": 0.21318200043179225, "grad_norm": 0.5683929324150085, "learning_rate": 1e-05, "loss": 0.619, "step": 1728 }, { "epoch": 0.2133053696450051, "grad_norm": 0.5599418878555298, "learning_rate": 1e-05, "loss": 0.5255, "step": 1729 }, { "epoch": 0.21342873885821792, "grad_norm": 0.5917370319366455, "learning_rate": 1e-05, "loss": 0.5479, "step": 1730 }, { "epoch": 0.21355210807143077, "grad_norm": 0.5162064433097839, "learning_rate": 1e-05, "loss": 0.4836, "step": 1731 }, { "epoch": 0.21367547728464362, "grad_norm": 0.46602296829223633, "learning_rate": 1e-05, "loss": 0.4181, "step": 1732 }, { "epoch": 0.21379884649785647, "grad_norm": 0.5954990983009338, "learning_rate": 1e-05, "loss": 0.6253, "step": 1733 }, { "epoch": 0.2139222157110693, "grad_norm": 0.625479519367218, "learning_rate": 1e-05, "loss": 0.5877, "step": 1734 }, { "epoch": 0.21404558492428213, "grad_norm": 0.5753469467163086, "learning_rate": 1e-05, "loss": 0.5742, "step": 1735 }, { "epoch": 0.21416895413749498, "grad_norm": 0.5899917483329773, "learning_rate": 1e-05, "loss": 0.6412, "step": 1736 }, { "epoch": 0.21429232335070783, "grad_norm": 0.5062068700790405, "learning_rate": 1e-05, "loss": 0.5109, "step": 1737 }, { "epoch": 0.21441569256392068, "grad_norm": 0.5290830135345459, "learning_rate": 1e-05, "loss": 0.5052, "step": 1738 }, { "epoch": 0.21453906177713353, "grad_norm": 0.5507689118385315, "learning_rate": 1e-05, "loss": 0.5957, "step": 1739 }, { "epoch": 0.21466243099034635, "grad_norm": 0.6274013519287109, "learning_rate": 1e-05, "loss": 0.6085, "step": 1740 }, { "epoch": 0.2147858002035592, "grad_norm": 0.692728579044342, "learning_rate": 1e-05, "loss": 0.6485, "step": 1741 }, { "epoch": 0.21490916941677204, "grad_norm": 0.6093851923942566, "learning_rate": 1e-05, "loss": 0.5981, "step": 1742 }, { "epoch": 0.2150325386299849, "grad_norm": 0.5445655584335327, "learning_rate": 1e-05, "loss": 0.5383, "step": 1743 }, { "epoch": 0.21515590784319774, "grad_norm": 0.5951668620109558, "learning_rate": 1e-05, "loss": 0.608, "step": 1744 }, { "epoch": 0.21527927705641056, "grad_norm": 0.5801432132720947, "learning_rate": 1e-05, "loss": 0.6134, "step": 1745 }, { "epoch": 0.2154026462696234, "grad_norm": 0.577435314655304, "learning_rate": 1e-05, "loss": 0.5054, "step": 1746 }, { "epoch": 0.21552601548283626, "grad_norm": 0.5828835368156433, "learning_rate": 1e-05, "loss": 0.6047, "step": 1747 }, { "epoch": 0.2156493846960491, "grad_norm": 0.5861814618110657, "learning_rate": 1e-05, "loss": 0.5813, "step": 1748 }, { "epoch": 0.21577275390926196, "grad_norm": 0.5776733756065369, "learning_rate": 1e-05, "loss": 0.5341, "step": 1749 }, { "epoch": 0.21589612312247478, "grad_norm": 0.5509042143821716, "learning_rate": 1e-05, "loss": 0.4984, "step": 1750 }, { "epoch": 0.21601949233568762, "grad_norm": 0.5627179145812988, "learning_rate": 1e-05, "loss": 0.6001, "step": 1751 }, { "epoch": 0.21614286154890047, "grad_norm": 0.5858948230743408, "learning_rate": 1e-05, "loss": 0.628, "step": 1752 }, { "epoch": 0.21626623076211332, "grad_norm": 0.6497735381126404, "learning_rate": 1e-05, "loss": 0.5491, "step": 1753 }, { "epoch": 0.21638959997532617, "grad_norm": 0.6560208797454834, "learning_rate": 1e-05, "loss": 0.6472, "step": 1754 }, { "epoch": 0.216512969188539, "grad_norm": 0.6587162017822266, "learning_rate": 1e-05, "loss": 0.6281, "step": 1755 }, { "epoch": 0.21663633840175184, "grad_norm": 0.6221523284912109, "learning_rate": 1e-05, "loss": 0.6637, "step": 1756 }, { "epoch": 0.2167597076149647, "grad_norm": 0.5662312507629395, "learning_rate": 1e-05, "loss": 0.5999, "step": 1757 }, { "epoch": 0.21688307682817753, "grad_norm": 0.599020779132843, "learning_rate": 1e-05, "loss": 0.663, "step": 1758 }, { "epoch": 0.21700644604139038, "grad_norm": 0.658964216709137, "learning_rate": 1e-05, "loss": 0.6778, "step": 1759 }, { "epoch": 0.2171298152546032, "grad_norm": 0.6082273721694946, "learning_rate": 1e-05, "loss": 0.5899, "step": 1760 }, { "epoch": 0.21725318446781605, "grad_norm": 0.628711462020874, "learning_rate": 1e-05, "loss": 0.5915, "step": 1761 }, { "epoch": 0.2173765536810289, "grad_norm": 0.5824037194252014, "learning_rate": 1e-05, "loss": 0.5211, "step": 1762 }, { "epoch": 0.21749992289424175, "grad_norm": 0.5947689414024353, "learning_rate": 1e-05, "loss": 0.5978, "step": 1763 }, { "epoch": 0.2176232921074546, "grad_norm": 0.5650920271873474, "learning_rate": 1e-05, "loss": 0.6323, "step": 1764 }, { "epoch": 0.21774666132066742, "grad_norm": 0.5707841515541077, "learning_rate": 1e-05, "loss": 0.5526, "step": 1765 }, { "epoch": 0.21787003053388027, "grad_norm": 0.6134500503540039, "learning_rate": 1e-05, "loss": 0.5757, "step": 1766 }, { "epoch": 0.21799339974709311, "grad_norm": 0.5888264775276184, "learning_rate": 1e-05, "loss": 0.5558, "step": 1767 }, { "epoch": 0.21811676896030596, "grad_norm": 0.5791694521903992, "learning_rate": 1e-05, "loss": 0.6356, "step": 1768 }, { "epoch": 0.2182401381735188, "grad_norm": 0.6234893202781677, "learning_rate": 1e-05, "loss": 0.7285, "step": 1769 }, { "epoch": 0.21836350738673163, "grad_norm": 0.5433657169342041, "learning_rate": 1e-05, "loss": 0.5056, "step": 1770 }, { "epoch": 0.21848687659994448, "grad_norm": 0.561937153339386, "learning_rate": 1e-05, "loss": 0.5484, "step": 1771 }, { "epoch": 0.21861024581315733, "grad_norm": 0.6020365953445435, "learning_rate": 1e-05, "loss": 0.634, "step": 1772 }, { "epoch": 0.21873361502637018, "grad_norm": 0.6750563383102417, "learning_rate": 1e-05, "loss": 0.7095, "step": 1773 }, { "epoch": 0.21885698423958302, "grad_norm": 0.6404975056648254, "learning_rate": 1e-05, "loss": 0.7286, "step": 1774 }, { "epoch": 0.21898035345279585, "grad_norm": 0.6012925505638123, "learning_rate": 1e-05, "loss": 0.5806, "step": 1775 }, { "epoch": 0.2191037226660087, "grad_norm": 0.5710405707359314, "learning_rate": 1e-05, "loss": 0.5965, "step": 1776 }, { "epoch": 0.21922709187922154, "grad_norm": 0.6863515377044678, "learning_rate": 1e-05, "loss": 0.6795, "step": 1777 }, { "epoch": 0.2193504610924344, "grad_norm": 0.6505815386772156, "learning_rate": 1e-05, "loss": 0.6215, "step": 1778 }, { "epoch": 0.21947383030564724, "grad_norm": 0.586643397808075, "learning_rate": 1e-05, "loss": 0.6446, "step": 1779 }, { "epoch": 0.21959719951886006, "grad_norm": 0.5862578749656677, "learning_rate": 1e-05, "loss": 0.5423, "step": 1780 }, { "epoch": 0.2197205687320729, "grad_norm": 0.6029909253120422, "learning_rate": 1e-05, "loss": 0.6444, "step": 1781 }, { "epoch": 0.21984393794528576, "grad_norm": 0.561144232749939, "learning_rate": 1e-05, "loss": 0.5206, "step": 1782 }, { "epoch": 0.2199673071584986, "grad_norm": 0.6151432394981384, "learning_rate": 1e-05, "loss": 0.5769, "step": 1783 }, { "epoch": 0.22009067637171145, "grad_norm": 0.5829938650131226, "learning_rate": 1e-05, "loss": 0.6881, "step": 1784 }, { "epoch": 0.22021404558492427, "grad_norm": 0.5867950916290283, "learning_rate": 1e-05, "loss": 0.5816, "step": 1785 }, { "epoch": 0.22033741479813712, "grad_norm": 0.5140068531036377, "learning_rate": 1e-05, "loss": 0.5206, "step": 1786 }, { "epoch": 0.22046078401134997, "grad_norm": 0.5911858081817627, "learning_rate": 1e-05, "loss": 0.6367, "step": 1787 }, { "epoch": 0.22058415322456282, "grad_norm": 0.6242959499359131, "learning_rate": 1e-05, "loss": 0.6108, "step": 1788 }, { "epoch": 0.22070752243777567, "grad_norm": 0.5551954507827759, "learning_rate": 1e-05, "loss": 0.5354, "step": 1789 }, { "epoch": 0.2208308916509885, "grad_norm": 0.6574713587760925, "learning_rate": 1e-05, "loss": 0.6624, "step": 1790 }, { "epoch": 0.22095426086420134, "grad_norm": 0.5662931203842163, "learning_rate": 1e-05, "loss": 0.5388, "step": 1791 }, { "epoch": 0.22107763007741418, "grad_norm": 0.5566637516021729, "learning_rate": 1e-05, "loss": 0.5233, "step": 1792 }, { "epoch": 0.22120099929062703, "grad_norm": 0.5785941481590271, "learning_rate": 1e-05, "loss": 0.5916, "step": 1793 }, { "epoch": 0.22132436850383988, "grad_norm": 0.6300886869430542, "learning_rate": 1e-05, "loss": 0.6327, "step": 1794 }, { "epoch": 0.2214477377170527, "grad_norm": 0.46270835399627686, "learning_rate": 1e-05, "loss": 0.448, "step": 1795 }, { "epoch": 0.22157110693026555, "grad_norm": 0.5854263305664062, "learning_rate": 1e-05, "loss": 0.5928, "step": 1796 }, { "epoch": 0.2216944761434784, "grad_norm": 0.7294324040412903, "learning_rate": 1e-05, "loss": 0.6566, "step": 1797 }, { "epoch": 0.22181784535669125, "grad_norm": 0.5447314381599426, "learning_rate": 1e-05, "loss": 0.5601, "step": 1798 }, { "epoch": 0.2219412145699041, "grad_norm": 0.5675225257873535, "learning_rate": 1e-05, "loss": 0.5909, "step": 1799 }, { "epoch": 0.22206458378311691, "grad_norm": 0.5246520638465881, "learning_rate": 1e-05, "loss": 0.4657, "step": 1800 }, { "epoch": 0.22218795299632976, "grad_norm": 0.5889299511909485, "learning_rate": 1e-05, "loss": 0.5995, "step": 1801 }, { "epoch": 0.2223113222095426, "grad_norm": 0.580586850643158, "learning_rate": 1e-05, "loss": 0.674, "step": 1802 }, { "epoch": 0.22243469142275546, "grad_norm": 0.5943822264671326, "learning_rate": 1e-05, "loss": 0.5863, "step": 1803 }, { "epoch": 0.2225580606359683, "grad_norm": 0.5868968367576599, "learning_rate": 1e-05, "loss": 0.6546, "step": 1804 }, { "epoch": 0.22268142984918113, "grad_norm": 0.6156930327415466, "learning_rate": 1e-05, "loss": 0.6885, "step": 1805 }, { "epoch": 0.22280479906239398, "grad_norm": 0.6086090803146362, "learning_rate": 1e-05, "loss": 0.6297, "step": 1806 }, { "epoch": 0.22292816827560683, "grad_norm": 0.6245513558387756, "learning_rate": 1e-05, "loss": 0.5846, "step": 1807 }, { "epoch": 0.22305153748881967, "grad_norm": 0.6598588824272156, "learning_rate": 1e-05, "loss": 0.6138, "step": 1808 }, { "epoch": 0.2231749067020325, "grad_norm": 0.572546660900116, "learning_rate": 1e-05, "loss": 0.56, "step": 1809 }, { "epoch": 0.22329827591524534, "grad_norm": 0.6560487151145935, "learning_rate": 1e-05, "loss": 0.6942, "step": 1810 }, { "epoch": 0.2234216451284582, "grad_norm": 0.5752702951431274, "learning_rate": 1e-05, "loss": 0.5697, "step": 1811 }, { "epoch": 0.22354501434167104, "grad_norm": 0.5346210598945618, "learning_rate": 1e-05, "loss": 0.4921, "step": 1812 }, { "epoch": 0.2236683835548839, "grad_norm": 0.5643051862716675, "learning_rate": 1e-05, "loss": 0.531, "step": 1813 }, { "epoch": 0.2237917527680967, "grad_norm": 0.5164887309074402, "learning_rate": 1e-05, "loss": 0.5078, "step": 1814 }, { "epoch": 0.22391512198130956, "grad_norm": 0.5997843146324158, "learning_rate": 1e-05, "loss": 0.6276, "step": 1815 }, { "epoch": 0.2240384911945224, "grad_norm": 0.5873100757598877, "learning_rate": 1e-05, "loss": 0.5863, "step": 1816 }, { "epoch": 0.22416186040773525, "grad_norm": 0.5692573189735413, "learning_rate": 1e-05, "loss": 0.5213, "step": 1817 }, { "epoch": 0.2242852296209481, "grad_norm": 0.6453425884246826, "learning_rate": 1e-05, "loss": 0.594, "step": 1818 }, { "epoch": 0.22440859883416092, "grad_norm": 0.5519073605537415, "learning_rate": 1e-05, "loss": 0.5559, "step": 1819 }, { "epoch": 0.22453196804737377, "grad_norm": 0.6111018657684326, "learning_rate": 1e-05, "loss": 0.5877, "step": 1820 }, { "epoch": 0.22465533726058662, "grad_norm": 0.4982612431049347, "learning_rate": 1e-05, "loss": 0.5027, "step": 1821 }, { "epoch": 0.22477870647379947, "grad_norm": 0.6057767868041992, "learning_rate": 1e-05, "loss": 0.7479, "step": 1822 }, { "epoch": 0.22490207568701231, "grad_norm": 0.5346227884292603, "learning_rate": 1e-05, "loss": 0.5017, "step": 1823 }, { "epoch": 0.22502544490022514, "grad_norm": 0.6359688639640808, "learning_rate": 1e-05, "loss": 0.5498, "step": 1824 }, { "epoch": 0.22514881411343798, "grad_norm": 0.6040794849395752, "learning_rate": 1e-05, "loss": 0.5805, "step": 1825 }, { "epoch": 0.22527218332665083, "grad_norm": 0.5713776350021362, "learning_rate": 1e-05, "loss": 0.5288, "step": 1826 }, { "epoch": 0.22539555253986368, "grad_norm": 0.6375184059143066, "learning_rate": 1e-05, "loss": 0.6709, "step": 1827 }, { "epoch": 0.22551892175307653, "grad_norm": 0.6869243383407593, "learning_rate": 1e-05, "loss": 0.6936, "step": 1828 }, { "epoch": 0.22564229096628935, "grad_norm": 0.5298478007316589, "learning_rate": 1e-05, "loss": 0.6041, "step": 1829 }, { "epoch": 0.2257656601795022, "grad_norm": 0.5819364786148071, "learning_rate": 1e-05, "loss": 0.5353, "step": 1830 }, { "epoch": 0.22588902939271505, "grad_norm": 0.6221590638160706, "learning_rate": 1e-05, "loss": 0.5734, "step": 1831 }, { "epoch": 0.2260123986059279, "grad_norm": 0.5116049647331238, "learning_rate": 1e-05, "loss": 0.4622, "step": 1832 }, { "epoch": 0.22613576781914074, "grad_norm": 0.5946975946426392, "learning_rate": 1e-05, "loss": 0.6678, "step": 1833 }, { "epoch": 0.22625913703235356, "grad_norm": 0.5224777460098267, "learning_rate": 1e-05, "loss": 0.5647, "step": 1834 }, { "epoch": 0.2263825062455664, "grad_norm": 0.6189003586769104, "learning_rate": 1e-05, "loss": 0.6555, "step": 1835 }, { "epoch": 0.22650587545877926, "grad_norm": 0.8063211441040039, "learning_rate": 1e-05, "loss": 0.6846, "step": 1836 }, { "epoch": 0.2266292446719921, "grad_norm": 0.6582860946655273, "learning_rate": 1e-05, "loss": 0.5999, "step": 1837 }, { "epoch": 0.22675261388520496, "grad_norm": 0.606476366519928, "learning_rate": 1e-05, "loss": 0.609, "step": 1838 }, { "epoch": 0.22687598309841778, "grad_norm": 0.5912072062492371, "learning_rate": 1e-05, "loss": 0.5884, "step": 1839 }, { "epoch": 0.22699935231163063, "grad_norm": 0.6500743627548218, "learning_rate": 1e-05, "loss": 0.6, "step": 1840 }, { "epoch": 0.22712272152484347, "grad_norm": 0.6621035933494568, "learning_rate": 1e-05, "loss": 0.6622, "step": 1841 }, { "epoch": 0.22724609073805632, "grad_norm": 0.5570363998413086, "learning_rate": 1e-05, "loss": 0.6046, "step": 1842 }, { "epoch": 0.22736945995126917, "grad_norm": 0.5880016088485718, "learning_rate": 1e-05, "loss": 0.5817, "step": 1843 }, { "epoch": 0.227492829164482, "grad_norm": 0.6288730502128601, "learning_rate": 1e-05, "loss": 0.5425, "step": 1844 }, { "epoch": 0.22761619837769484, "grad_norm": 0.5603077411651611, "learning_rate": 1e-05, "loss": 0.502, "step": 1845 }, { "epoch": 0.2277395675909077, "grad_norm": 0.7450944781303406, "learning_rate": 1e-05, "loss": 0.7392, "step": 1846 }, { "epoch": 0.22786293680412054, "grad_norm": 0.6661251783370972, "learning_rate": 1e-05, "loss": 0.6847, "step": 1847 }, { "epoch": 0.22798630601733338, "grad_norm": 0.6500726342201233, "learning_rate": 1e-05, "loss": 0.6082, "step": 1848 }, { "epoch": 0.2281096752305462, "grad_norm": 0.592842161655426, "learning_rate": 1e-05, "loss": 0.5785, "step": 1849 }, { "epoch": 0.22823304444375905, "grad_norm": 0.5564199686050415, "learning_rate": 1e-05, "loss": 0.5536, "step": 1850 }, { "epoch": 0.2283564136569719, "grad_norm": 0.6136468648910522, "learning_rate": 1e-05, "loss": 0.6315, "step": 1851 }, { "epoch": 0.22847978287018475, "grad_norm": 0.4918842017650604, "learning_rate": 1e-05, "loss": 0.4613, "step": 1852 }, { "epoch": 0.2286031520833976, "grad_norm": 0.5487180948257446, "learning_rate": 1e-05, "loss": 0.5783, "step": 1853 }, { "epoch": 0.22872652129661042, "grad_norm": 0.5528824329376221, "learning_rate": 1e-05, "loss": 0.6109, "step": 1854 }, { "epoch": 0.22884989050982327, "grad_norm": 0.4994637966156006, "learning_rate": 1e-05, "loss": 0.464, "step": 1855 }, { "epoch": 0.22897325972303612, "grad_norm": 0.5255768299102783, "learning_rate": 1e-05, "loss": 0.4432, "step": 1856 }, { "epoch": 0.22909662893624896, "grad_norm": 0.6003437638282776, "learning_rate": 1e-05, "loss": 0.6573, "step": 1857 }, { "epoch": 0.2292199981494618, "grad_norm": 0.6995925903320312, "learning_rate": 1e-05, "loss": 0.6189, "step": 1858 }, { "epoch": 0.22934336736267463, "grad_norm": 0.6110936999320984, "learning_rate": 1e-05, "loss": 0.5557, "step": 1859 }, { "epoch": 0.22946673657588748, "grad_norm": 0.5717414617538452, "learning_rate": 1e-05, "loss": 0.571, "step": 1860 }, { "epoch": 0.22959010578910033, "grad_norm": 0.6698420643806458, "learning_rate": 1e-05, "loss": 0.6274, "step": 1861 }, { "epoch": 0.22971347500231318, "grad_norm": 0.5682502388954163, "learning_rate": 1e-05, "loss": 0.5704, "step": 1862 }, { "epoch": 0.22983684421552603, "grad_norm": 0.62111496925354, "learning_rate": 1e-05, "loss": 0.6104, "step": 1863 }, { "epoch": 0.22996021342873885, "grad_norm": 0.5942018032073975, "learning_rate": 1e-05, "loss": 0.5521, "step": 1864 }, { "epoch": 0.2300835826419517, "grad_norm": 0.5660898089408875, "learning_rate": 1e-05, "loss": 0.5759, "step": 1865 }, { "epoch": 0.23020695185516454, "grad_norm": 0.6822975277900696, "learning_rate": 1e-05, "loss": 0.7143, "step": 1866 }, { "epoch": 0.2303303210683774, "grad_norm": 0.5773157477378845, "learning_rate": 1e-05, "loss": 0.573, "step": 1867 }, { "epoch": 0.23045369028159024, "grad_norm": 0.6431583166122437, "learning_rate": 1e-05, "loss": 0.6589, "step": 1868 }, { "epoch": 0.23057705949480306, "grad_norm": 0.6153532862663269, "learning_rate": 1e-05, "loss": 0.6059, "step": 1869 }, { "epoch": 0.2307004287080159, "grad_norm": 0.6309792399406433, "learning_rate": 1e-05, "loss": 0.6783, "step": 1870 }, { "epoch": 0.23082379792122876, "grad_norm": 0.6020069718360901, "learning_rate": 1e-05, "loss": 0.5832, "step": 1871 }, { "epoch": 0.2309471671344416, "grad_norm": 0.626592755317688, "learning_rate": 1e-05, "loss": 0.6764, "step": 1872 }, { "epoch": 0.23107053634765445, "grad_norm": 0.6067579984664917, "learning_rate": 1e-05, "loss": 0.6033, "step": 1873 }, { "epoch": 0.23119390556086727, "grad_norm": 0.5781404376029968, "learning_rate": 1e-05, "loss": 0.6253, "step": 1874 }, { "epoch": 0.23131727477408012, "grad_norm": 0.6299999952316284, "learning_rate": 1e-05, "loss": 0.608, "step": 1875 }, { "epoch": 0.23144064398729297, "grad_norm": 0.6142086386680603, "learning_rate": 1e-05, "loss": 0.5553, "step": 1876 }, { "epoch": 0.23156401320050582, "grad_norm": 0.5581423044204712, "learning_rate": 1e-05, "loss": 0.6148, "step": 1877 }, { "epoch": 0.23168738241371867, "grad_norm": 0.6828950643539429, "learning_rate": 1e-05, "loss": 0.6768, "step": 1878 }, { "epoch": 0.2318107516269315, "grad_norm": 0.6103339791297913, "learning_rate": 1e-05, "loss": 0.5787, "step": 1879 }, { "epoch": 0.23193412084014434, "grad_norm": 0.5770353674888611, "learning_rate": 1e-05, "loss": 0.5552, "step": 1880 }, { "epoch": 0.23205749005335718, "grad_norm": 0.647749125957489, "learning_rate": 1e-05, "loss": 0.6216, "step": 1881 }, { "epoch": 0.23218085926657003, "grad_norm": 0.6457089185714722, "learning_rate": 1e-05, "loss": 0.6436, "step": 1882 }, { "epoch": 0.23230422847978288, "grad_norm": 0.6589125394821167, "learning_rate": 1e-05, "loss": 0.77, "step": 1883 }, { "epoch": 0.2324275976929957, "grad_norm": 0.5966016054153442, "learning_rate": 1e-05, "loss": 0.6113, "step": 1884 }, { "epoch": 0.23255096690620855, "grad_norm": 0.5856176614761353, "learning_rate": 1e-05, "loss": 0.5486, "step": 1885 }, { "epoch": 0.2326743361194214, "grad_norm": 0.5277027487754822, "learning_rate": 1e-05, "loss": 0.4854, "step": 1886 }, { "epoch": 0.23279770533263425, "grad_norm": 0.5920373201370239, "learning_rate": 1e-05, "loss": 0.5193, "step": 1887 }, { "epoch": 0.2329210745458471, "grad_norm": 0.592307984828949, "learning_rate": 1e-05, "loss": 0.5948, "step": 1888 }, { "epoch": 0.23304444375905992, "grad_norm": 0.5697289109230042, "learning_rate": 1e-05, "loss": 0.476, "step": 1889 }, { "epoch": 0.23316781297227276, "grad_norm": 0.5640900731086731, "learning_rate": 1e-05, "loss": 0.5739, "step": 1890 }, { "epoch": 0.2332911821854856, "grad_norm": 0.6253859400749207, "learning_rate": 1e-05, "loss": 0.6885, "step": 1891 }, { "epoch": 0.23341455139869846, "grad_norm": 0.6224663257598877, "learning_rate": 1e-05, "loss": 0.7036, "step": 1892 }, { "epoch": 0.2335379206119113, "grad_norm": 0.6166849136352539, "learning_rate": 1e-05, "loss": 0.5624, "step": 1893 }, { "epoch": 0.23366128982512413, "grad_norm": 0.569633424282074, "learning_rate": 1e-05, "loss": 0.5386, "step": 1894 }, { "epoch": 0.23378465903833698, "grad_norm": 0.6035651564598083, "learning_rate": 1e-05, "loss": 0.6395, "step": 1895 }, { "epoch": 0.23390802825154983, "grad_norm": 0.6012935042381287, "learning_rate": 1e-05, "loss": 0.6474, "step": 1896 }, { "epoch": 0.23403139746476267, "grad_norm": 0.5378919243812561, "learning_rate": 1e-05, "loss": 0.5768, "step": 1897 }, { "epoch": 0.23415476667797552, "grad_norm": 0.5945752859115601, "learning_rate": 1e-05, "loss": 0.5958, "step": 1898 }, { "epoch": 0.23427813589118834, "grad_norm": 0.6340053081512451, "learning_rate": 1e-05, "loss": 0.7213, "step": 1899 }, { "epoch": 0.2344015051044012, "grad_norm": 0.5825461745262146, "learning_rate": 1e-05, "loss": 0.6314, "step": 1900 }, { "epoch": 0.23452487431761404, "grad_norm": 0.5616059303283691, "learning_rate": 1e-05, "loss": 0.5229, "step": 1901 }, { "epoch": 0.2346482435308269, "grad_norm": 0.618401050567627, "learning_rate": 1e-05, "loss": 0.6173, "step": 1902 }, { "epoch": 0.23477161274403974, "grad_norm": 0.5460101962089539, "learning_rate": 1e-05, "loss": 0.5152, "step": 1903 }, { "epoch": 0.23489498195725256, "grad_norm": 0.5193110108375549, "learning_rate": 1e-05, "loss": 0.5291, "step": 1904 }, { "epoch": 0.2350183511704654, "grad_norm": 0.5520403385162354, "learning_rate": 1e-05, "loss": 0.5229, "step": 1905 }, { "epoch": 0.23514172038367825, "grad_norm": 0.5887488126754761, "learning_rate": 1e-05, "loss": 0.5866, "step": 1906 }, { "epoch": 0.2352650895968911, "grad_norm": 0.720939040184021, "learning_rate": 1e-05, "loss": 0.618, "step": 1907 }, { "epoch": 0.23538845881010395, "grad_norm": 0.7164895534515381, "learning_rate": 1e-05, "loss": 0.585, "step": 1908 }, { "epoch": 0.23551182802331677, "grad_norm": 0.6072611808776855, "learning_rate": 1e-05, "loss": 0.6951, "step": 1909 }, { "epoch": 0.23563519723652962, "grad_norm": 0.5528948903083801, "learning_rate": 1e-05, "loss": 0.5888, "step": 1910 }, { "epoch": 0.23575856644974247, "grad_norm": 0.5894188284873962, "learning_rate": 1e-05, "loss": 0.6181, "step": 1911 }, { "epoch": 0.23588193566295532, "grad_norm": 0.6290562152862549, "learning_rate": 1e-05, "loss": 0.6177, "step": 1912 }, { "epoch": 0.23600530487616816, "grad_norm": 0.6277700066566467, "learning_rate": 1e-05, "loss": 0.5819, "step": 1913 }, { "epoch": 0.23612867408938099, "grad_norm": 0.5780625343322754, "learning_rate": 1e-05, "loss": 0.5941, "step": 1914 }, { "epoch": 0.23625204330259383, "grad_norm": 0.5178156495094299, "learning_rate": 1e-05, "loss": 0.4762, "step": 1915 }, { "epoch": 0.23637541251580668, "grad_norm": 0.5158912539482117, "learning_rate": 1e-05, "loss": 0.4587, "step": 1916 }, { "epoch": 0.23649878172901953, "grad_norm": 0.5742241740226746, "learning_rate": 1e-05, "loss": 0.5442, "step": 1917 }, { "epoch": 0.23662215094223238, "grad_norm": 0.573411226272583, "learning_rate": 1e-05, "loss": 0.5341, "step": 1918 }, { "epoch": 0.2367455201554452, "grad_norm": 0.7077033519744873, "learning_rate": 1e-05, "loss": 0.7759, "step": 1919 }, { "epoch": 0.23686888936865805, "grad_norm": 0.5601609349250793, "learning_rate": 1e-05, "loss": 0.4983, "step": 1920 }, { "epoch": 0.2369922585818709, "grad_norm": 0.6786377429962158, "learning_rate": 1e-05, "loss": 0.69, "step": 1921 }, { "epoch": 0.23711562779508374, "grad_norm": 0.6149836182594299, "learning_rate": 1e-05, "loss": 0.5917, "step": 1922 }, { "epoch": 0.2372389970082966, "grad_norm": 0.6086189150810242, "learning_rate": 1e-05, "loss": 0.5866, "step": 1923 }, { "epoch": 0.2373623662215094, "grad_norm": 0.5619145631790161, "learning_rate": 1e-05, "loss": 0.5606, "step": 1924 }, { "epoch": 0.23748573543472226, "grad_norm": 0.5998448133468628, "learning_rate": 1e-05, "loss": 0.5936, "step": 1925 }, { "epoch": 0.2376091046479351, "grad_norm": 0.546172022819519, "learning_rate": 1e-05, "loss": 0.5909, "step": 1926 }, { "epoch": 0.23773247386114796, "grad_norm": 0.5582752227783203, "learning_rate": 1e-05, "loss": 0.5491, "step": 1927 }, { "epoch": 0.2378558430743608, "grad_norm": 0.5312005281448364, "learning_rate": 1e-05, "loss": 0.5228, "step": 1928 }, { "epoch": 0.23797921228757363, "grad_norm": 0.6900638937950134, "learning_rate": 1e-05, "loss": 0.6763, "step": 1929 }, { "epoch": 0.23810258150078648, "grad_norm": 0.5687353014945984, "learning_rate": 1e-05, "loss": 0.5696, "step": 1930 }, { "epoch": 0.23822595071399932, "grad_norm": 0.5467212200164795, "learning_rate": 1e-05, "loss": 0.5153, "step": 1931 }, { "epoch": 0.23834931992721217, "grad_norm": 0.6204982995986938, "learning_rate": 1e-05, "loss": 0.5654, "step": 1932 }, { "epoch": 0.23847268914042502, "grad_norm": 0.6690681576728821, "learning_rate": 1e-05, "loss": 0.6293, "step": 1933 }, { "epoch": 0.23859605835363784, "grad_norm": 0.5970614552497864, "learning_rate": 1e-05, "loss": 0.628, "step": 1934 }, { "epoch": 0.2387194275668507, "grad_norm": 0.5797936320304871, "learning_rate": 1e-05, "loss": 0.5034, "step": 1935 }, { "epoch": 0.23884279678006354, "grad_norm": 0.5850003957748413, "learning_rate": 1e-05, "loss": 0.5183, "step": 1936 }, { "epoch": 0.23896616599327639, "grad_norm": 0.6345173120498657, "learning_rate": 1e-05, "loss": 0.5817, "step": 1937 }, { "epoch": 0.23908953520648923, "grad_norm": 0.5237281322479248, "learning_rate": 1e-05, "loss": 0.4892, "step": 1938 }, { "epoch": 0.23921290441970205, "grad_norm": 0.5806989073753357, "learning_rate": 1e-05, "loss": 0.6264, "step": 1939 }, { "epoch": 0.2393362736329149, "grad_norm": 0.6147322058677673, "learning_rate": 1e-05, "loss": 0.626, "step": 1940 }, { "epoch": 0.23945964284612775, "grad_norm": 0.6029227375984192, "learning_rate": 1e-05, "loss": 0.6165, "step": 1941 }, { "epoch": 0.2395830120593406, "grad_norm": 0.5787535905838013, "learning_rate": 1e-05, "loss": 0.5223, "step": 1942 }, { "epoch": 0.23970638127255342, "grad_norm": 0.6031496524810791, "learning_rate": 1e-05, "loss": 0.6155, "step": 1943 }, { "epoch": 0.23982975048576627, "grad_norm": 0.5610787272453308, "learning_rate": 1e-05, "loss": 0.588, "step": 1944 }, { "epoch": 0.23995311969897912, "grad_norm": 0.5277835726737976, "learning_rate": 1e-05, "loss": 0.4854, "step": 1945 }, { "epoch": 0.24007648891219197, "grad_norm": 0.6356926560401917, "learning_rate": 1e-05, "loss": 0.5835, "step": 1946 }, { "epoch": 0.2401998581254048, "grad_norm": 0.6159375309944153, "learning_rate": 1e-05, "loss": 0.6744, "step": 1947 }, { "epoch": 0.24032322733861763, "grad_norm": 0.6027088165283203, "learning_rate": 1e-05, "loss": 0.6015, "step": 1948 }, { "epoch": 0.24044659655183048, "grad_norm": 0.6388583183288574, "learning_rate": 1e-05, "loss": 0.5846, "step": 1949 }, { "epoch": 0.24056996576504333, "grad_norm": 0.5177108645439148, "learning_rate": 1e-05, "loss": 0.5077, "step": 1950 }, { "epoch": 0.24069333497825618, "grad_norm": 0.562444806098938, "learning_rate": 1e-05, "loss": 0.5192, "step": 1951 }, { "epoch": 0.24081670419146903, "grad_norm": 0.5530810356140137, "learning_rate": 1e-05, "loss": 0.5348, "step": 1952 }, { "epoch": 0.24094007340468185, "grad_norm": 0.5632868409156799, "learning_rate": 1e-05, "loss": 0.5326, "step": 1953 }, { "epoch": 0.2410634426178947, "grad_norm": 0.6275971531867981, "learning_rate": 1e-05, "loss": 0.6469, "step": 1954 }, { "epoch": 0.24118681183110754, "grad_norm": 0.6272329688072205, "learning_rate": 1e-05, "loss": 0.5754, "step": 1955 }, { "epoch": 0.2413101810443204, "grad_norm": 0.5752784609794617, "learning_rate": 1e-05, "loss": 0.5775, "step": 1956 }, { "epoch": 0.24143355025753324, "grad_norm": 0.5845722556114197, "learning_rate": 1e-05, "loss": 0.5705, "step": 1957 }, { "epoch": 0.24155691947074606, "grad_norm": 0.5370882749557495, "learning_rate": 1e-05, "loss": 0.6454, "step": 1958 }, { "epoch": 0.2416802886839589, "grad_norm": 0.5928177833557129, "learning_rate": 1e-05, "loss": 0.6565, "step": 1959 }, { "epoch": 0.24180365789717176, "grad_norm": 0.508283793926239, "learning_rate": 1e-05, "loss": 0.5286, "step": 1960 }, { "epoch": 0.2419270271103846, "grad_norm": 0.5673846006393433, "learning_rate": 1e-05, "loss": 0.6287, "step": 1961 }, { "epoch": 0.24205039632359746, "grad_norm": 0.5439802408218384, "learning_rate": 1e-05, "loss": 0.5769, "step": 1962 }, { "epoch": 0.24217376553681028, "grad_norm": 0.5441744327545166, "learning_rate": 1e-05, "loss": 0.5341, "step": 1963 }, { "epoch": 0.24229713475002312, "grad_norm": 0.5298532247543335, "learning_rate": 1e-05, "loss": 0.5327, "step": 1964 }, { "epoch": 0.24242050396323597, "grad_norm": 0.5528157949447632, "learning_rate": 1e-05, "loss": 0.4498, "step": 1965 }, { "epoch": 0.24254387317644882, "grad_norm": 0.5343120694160461, "learning_rate": 1e-05, "loss": 0.5502, "step": 1966 }, { "epoch": 0.24266724238966167, "grad_norm": 0.5940369963645935, "learning_rate": 1e-05, "loss": 0.6359, "step": 1967 }, { "epoch": 0.2427906116028745, "grad_norm": 0.6196238398551941, "learning_rate": 1e-05, "loss": 0.5585, "step": 1968 }, { "epoch": 0.24291398081608734, "grad_norm": 0.6200729012489319, "learning_rate": 1e-05, "loss": 0.6789, "step": 1969 }, { "epoch": 0.2430373500293002, "grad_norm": 0.581210732460022, "learning_rate": 1e-05, "loss": 0.586, "step": 1970 }, { "epoch": 0.24316071924251303, "grad_norm": 0.6038886904716492, "learning_rate": 1e-05, "loss": 0.5979, "step": 1971 }, { "epoch": 0.24328408845572588, "grad_norm": 0.5776984691619873, "learning_rate": 1e-05, "loss": 0.6072, "step": 1972 }, { "epoch": 0.2434074576689387, "grad_norm": 0.6662485599517822, "learning_rate": 1e-05, "loss": 0.6708, "step": 1973 }, { "epoch": 0.24353082688215155, "grad_norm": 0.625464677810669, "learning_rate": 1e-05, "loss": 0.6839, "step": 1974 }, { "epoch": 0.2436541960953644, "grad_norm": 0.6177462935447693, "learning_rate": 1e-05, "loss": 0.596, "step": 1975 }, { "epoch": 0.24377756530857725, "grad_norm": 0.6230705976486206, "learning_rate": 1e-05, "loss": 0.5914, "step": 1976 }, { "epoch": 0.2439009345217901, "grad_norm": 0.5348135232925415, "learning_rate": 1e-05, "loss": 0.5729, "step": 1977 }, { "epoch": 0.24402430373500292, "grad_norm": 0.6516309380531311, "learning_rate": 1e-05, "loss": 0.7052, "step": 1978 }, { "epoch": 0.24414767294821577, "grad_norm": 0.6023136973381042, "learning_rate": 1e-05, "loss": 0.6047, "step": 1979 }, { "epoch": 0.24427104216142861, "grad_norm": 0.5739859342575073, "learning_rate": 1e-05, "loss": 0.7151, "step": 1980 }, { "epoch": 0.24439441137464146, "grad_norm": 0.6925045847892761, "learning_rate": 1e-05, "loss": 0.6507, "step": 1981 }, { "epoch": 0.2445177805878543, "grad_norm": 0.5683016180992126, "learning_rate": 1e-05, "loss": 0.6211, "step": 1982 }, { "epoch": 0.24464114980106713, "grad_norm": 0.48654839396476746, "learning_rate": 1e-05, "loss": 0.4542, "step": 1983 }, { "epoch": 0.24476451901427998, "grad_norm": 0.5831782221794128, "learning_rate": 1e-05, "loss": 0.6051, "step": 1984 }, { "epoch": 0.24488788822749283, "grad_norm": 0.6318076252937317, "learning_rate": 1e-05, "loss": 0.5967, "step": 1985 }, { "epoch": 0.24501125744070568, "grad_norm": 0.6618272066116333, "learning_rate": 1e-05, "loss": 0.6163, "step": 1986 }, { "epoch": 0.24513462665391852, "grad_norm": 0.4990249574184418, "learning_rate": 1e-05, "loss": 0.4453, "step": 1987 }, { "epoch": 0.24525799586713135, "grad_norm": 0.553692638874054, "learning_rate": 1e-05, "loss": 0.5687, "step": 1988 }, { "epoch": 0.2453813650803442, "grad_norm": 0.5679989457130432, "learning_rate": 1e-05, "loss": 0.5073, "step": 1989 }, { "epoch": 0.24550473429355704, "grad_norm": 0.6786329746246338, "learning_rate": 1e-05, "loss": 0.7035, "step": 1990 }, { "epoch": 0.2456281035067699, "grad_norm": 0.6019949913024902, "learning_rate": 1e-05, "loss": 0.6527, "step": 1991 }, { "epoch": 0.24575147271998274, "grad_norm": 0.5788977146148682, "learning_rate": 1e-05, "loss": 0.5802, "step": 1992 }, { "epoch": 0.24587484193319556, "grad_norm": 0.6062557697296143, "learning_rate": 1e-05, "loss": 0.5842, "step": 1993 }, { "epoch": 0.2459982111464084, "grad_norm": 0.6000663638114929, "learning_rate": 1e-05, "loss": 0.6841, "step": 1994 }, { "epoch": 0.24612158035962126, "grad_norm": 0.5827223658561707, "learning_rate": 1e-05, "loss": 0.6598, "step": 1995 }, { "epoch": 0.2462449495728341, "grad_norm": 0.5975130796432495, "learning_rate": 1e-05, "loss": 0.5759, "step": 1996 }, { "epoch": 0.24636831878604695, "grad_norm": 0.5987032651901245, "learning_rate": 1e-05, "loss": 0.5255, "step": 1997 }, { "epoch": 0.24649168799925977, "grad_norm": 0.5704081654548645, "learning_rate": 1e-05, "loss": 0.5773, "step": 1998 }, { "epoch": 0.24661505721247262, "grad_norm": 0.5680478811264038, "learning_rate": 1e-05, "loss": 0.6336, "step": 1999 }, { "epoch": 0.24673842642568547, "grad_norm": 0.6620289087295532, "learning_rate": 1e-05, "loss": 0.6611, "step": 2000 }, { "epoch": 0.24686179563889832, "grad_norm": 0.5246573090553284, "learning_rate": 1e-05, "loss": 0.4802, "step": 2001 }, { "epoch": 0.24698516485211117, "grad_norm": 0.5662602782249451, "learning_rate": 1e-05, "loss": 0.485, "step": 2002 }, { "epoch": 0.247108534065324, "grad_norm": 0.5645307898521423, "learning_rate": 1e-05, "loss": 0.5506, "step": 2003 }, { "epoch": 0.24723190327853684, "grad_norm": 0.5575620532035828, "learning_rate": 1e-05, "loss": 0.5584, "step": 2004 }, { "epoch": 0.24735527249174968, "grad_norm": 0.5908694267272949, "learning_rate": 1e-05, "loss": 0.7162, "step": 2005 }, { "epoch": 0.24747864170496253, "grad_norm": 0.574220597743988, "learning_rate": 1e-05, "loss": 0.6074, "step": 2006 }, { "epoch": 0.24760201091817538, "grad_norm": 0.5977771282196045, "learning_rate": 1e-05, "loss": 0.5877, "step": 2007 }, { "epoch": 0.2477253801313882, "grad_norm": 0.6138885021209717, "learning_rate": 1e-05, "loss": 0.7042, "step": 2008 }, { "epoch": 0.24784874934460105, "grad_norm": 0.6049203872680664, "learning_rate": 1e-05, "loss": 0.5944, "step": 2009 }, { "epoch": 0.2479721185578139, "grad_norm": 0.6776479482650757, "learning_rate": 1e-05, "loss": 0.7738, "step": 2010 }, { "epoch": 0.24809548777102675, "grad_norm": 0.6269879937171936, "learning_rate": 1e-05, "loss": 0.6474, "step": 2011 }, { "epoch": 0.2482188569842396, "grad_norm": 0.6828837990760803, "learning_rate": 1e-05, "loss": 0.6726, "step": 2012 }, { "epoch": 0.24834222619745241, "grad_norm": 0.5686069130897522, "learning_rate": 1e-05, "loss": 0.5515, "step": 2013 }, { "epoch": 0.24846559541066526, "grad_norm": 0.6131106615066528, "learning_rate": 1e-05, "loss": 0.6847, "step": 2014 }, { "epoch": 0.2485889646238781, "grad_norm": 0.692478597164154, "learning_rate": 1e-05, "loss": 0.7769, "step": 2015 }, { "epoch": 0.24871233383709096, "grad_norm": 0.5828033685684204, "learning_rate": 1e-05, "loss": 0.5517, "step": 2016 }, { "epoch": 0.2488357030503038, "grad_norm": 0.6724193692207336, "learning_rate": 1e-05, "loss": 0.6244, "step": 2017 }, { "epoch": 0.24895907226351663, "grad_norm": 0.6819830536842346, "learning_rate": 1e-05, "loss": 0.7862, "step": 2018 }, { "epoch": 0.24908244147672948, "grad_norm": 0.5542069673538208, "learning_rate": 1e-05, "loss": 0.4776, "step": 2019 }, { "epoch": 0.24920581068994233, "grad_norm": 0.6117525100708008, "learning_rate": 1e-05, "loss": 0.6457, "step": 2020 }, { "epoch": 0.24932917990315517, "grad_norm": 0.6023091673851013, "learning_rate": 1e-05, "loss": 0.5367, "step": 2021 }, { "epoch": 0.24945254911636802, "grad_norm": 0.5483713746070862, "learning_rate": 1e-05, "loss": 0.5585, "step": 2022 }, { "epoch": 0.24957591832958084, "grad_norm": 0.6647390127182007, "learning_rate": 1e-05, "loss": 0.5309, "step": 2023 }, { "epoch": 0.2496992875427937, "grad_norm": 0.6434768438339233, "learning_rate": 1e-05, "loss": 0.5932, "step": 2024 }, { "epoch": 0.24982265675600654, "grad_norm": 0.6239241361618042, "learning_rate": 1e-05, "loss": 0.5897, "step": 2025 }, { "epoch": 0.2499460259692194, "grad_norm": 0.553680419921875, "learning_rate": 1e-05, "loss": 0.6068, "step": 2026 }, { "epoch": 0.2500693951824322, "grad_norm": 0.6005160212516785, "learning_rate": 1e-05, "loss": 0.5949, "step": 2027 }, { "epoch": 0.2501927643956451, "grad_norm": 0.5970323085784912, "learning_rate": 1e-05, "loss": 0.5775, "step": 2028 }, { "epoch": 0.2503161336088579, "grad_norm": 0.6195443272590637, "learning_rate": 1e-05, "loss": 0.6466, "step": 2029 }, { "epoch": 0.2504395028220707, "grad_norm": 0.5641137957572937, "learning_rate": 1e-05, "loss": 0.5848, "step": 2030 }, { "epoch": 0.2505628720352836, "grad_norm": 0.6044954061508179, "learning_rate": 1e-05, "loss": 0.5949, "step": 2031 }, { "epoch": 0.2506862412484964, "grad_norm": 0.6756864190101624, "learning_rate": 1e-05, "loss": 0.6193, "step": 2032 }, { "epoch": 0.2508096104617093, "grad_norm": 0.6410040259361267, "learning_rate": 1e-05, "loss": 0.6011, "step": 2033 }, { "epoch": 0.2509329796749221, "grad_norm": 0.5323288440704346, "learning_rate": 1e-05, "loss": 0.4929, "step": 2034 }, { "epoch": 0.25105634888813494, "grad_norm": 0.5612605214118958, "learning_rate": 1e-05, "loss": 0.5446, "step": 2035 }, { "epoch": 0.2511797181013478, "grad_norm": 0.6954962015151978, "learning_rate": 1e-05, "loss": 0.6085, "step": 2036 }, { "epoch": 0.25130308731456064, "grad_norm": 0.6375545263290405, "learning_rate": 1e-05, "loss": 0.6476, "step": 2037 }, { "epoch": 0.2514264565277735, "grad_norm": 0.5560175776481628, "learning_rate": 1e-05, "loss": 0.5688, "step": 2038 }, { "epoch": 0.25154982574098633, "grad_norm": 0.6105802059173584, "learning_rate": 1e-05, "loss": 0.6085, "step": 2039 }, { "epoch": 0.25167319495419915, "grad_norm": 0.5630727410316467, "learning_rate": 1e-05, "loss": 0.5552, "step": 2040 }, { "epoch": 0.25179656416741203, "grad_norm": 0.6015505194664001, "learning_rate": 1e-05, "loss": 0.5465, "step": 2041 }, { "epoch": 0.25191993338062485, "grad_norm": 0.6908645033836365, "learning_rate": 1e-05, "loss": 0.6412, "step": 2042 }, { "epoch": 0.2520433025938377, "grad_norm": 0.6670074462890625, "learning_rate": 1e-05, "loss": 0.662, "step": 2043 }, { "epoch": 0.25216667180705055, "grad_norm": 0.6221444606781006, "learning_rate": 1e-05, "loss": 0.6891, "step": 2044 }, { "epoch": 0.25229004102026337, "grad_norm": 0.5297544598579407, "learning_rate": 1e-05, "loss": 0.5397, "step": 2045 }, { "epoch": 0.25241341023347624, "grad_norm": 0.5456277132034302, "learning_rate": 1e-05, "loss": 0.4619, "step": 2046 }, { "epoch": 0.25253677944668906, "grad_norm": 0.5859251618385315, "learning_rate": 1e-05, "loss": 0.6062, "step": 2047 }, { "epoch": 0.25266014865990194, "grad_norm": 0.5639132857322693, "learning_rate": 1e-05, "loss": 0.5998, "step": 2048 }, { "epoch": 0.25278351787311476, "grad_norm": 0.6235526204109192, "learning_rate": 1e-05, "loss": 0.6291, "step": 2049 }, { "epoch": 0.2529068870863276, "grad_norm": 0.6492210626602173, "learning_rate": 1e-05, "loss": 0.6824, "step": 2050 }, { "epoch": 0.25303025629954046, "grad_norm": 0.5269551873207092, "learning_rate": 1e-05, "loss": 0.578, "step": 2051 }, { "epoch": 0.2531536255127533, "grad_norm": 0.5945770144462585, "learning_rate": 1e-05, "loss": 0.5506, "step": 2052 }, { "epoch": 0.25327699472596615, "grad_norm": 0.5911686420440674, "learning_rate": 1e-05, "loss": 0.5046, "step": 2053 }, { "epoch": 0.253400363939179, "grad_norm": 0.5054376721382141, "learning_rate": 1e-05, "loss": 0.5194, "step": 2054 }, { "epoch": 0.2535237331523918, "grad_norm": 0.6015253663063049, "learning_rate": 1e-05, "loss": 0.637, "step": 2055 }, { "epoch": 0.25364710236560467, "grad_norm": 0.5333486199378967, "learning_rate": 1e-05, "loss": 0.4951, "step": 2056 }, { "epoch": 0.2537704715788175, "grad_norm": 0.6085503101348877, "learning_rate": 1e-05, "loss": 0.5958, "step": 2057 }, { "epoch": 0.25389384079203037, "grad_norm": 0.5648449063301086, "learning_rate": 1e-05, "loss": 0.6552, "step": 2058 }, { "epoch": 0.2540172100052432, "grad_norm": 0.576716423034668, "learning_rate": 1e-05, "loss": 0.5402, "step": 2059 }, { "epoch": 0.254140579218456, "grad_norm": 0.5919442772865295, "learning_rate": 1e-05, "loss": 0.5697, "step": 2060 }, { "epoch": 0.2542639484316689, "grad_norm": 0.5511237382888794, "learning_rate": 1e-05, "loss": 0.5748, "step": 2061 }, { "epoch": 0.2543873176448817, "grad_norm": 0.5928934812545776, "learning_rate": 1e-05, "loss": 0.6223, "step": 2062 }, { "epoch": 0.2545106868580946, "grad_norm": 0.5946961045265198, "learning_rate": 1e-05, "loss": 0.6055, "step": 2063 }, { "epoch": 0.2546340560713074, "grad_norm": 0.5530418157577515, "learning_rate": 1e-05, "loss": 0.5082, "step": 2064 }, { "epoch": 0.2547574252845202, "grad_norm": 0.5491568446159363, "learning_rate": 1e-05, "loss": 0.5322, "step": 2065 }, { "epoch": 0.2548807944977331, "grad_norm": 0.5598821640014648, "learning_rate": 1e-05, "loss": 0.5286, "step": 2066 }, { "epoch": 0.2550041637109459, "grad_norm": 0.5555145740509033, "learning_rate": 1e-05, "loss": 0.5405, "step": 2067 }, { "epoch": 0.2551275329241588, "grad_norm": 0.5532424449920654, "learning_rate": 1e-05, "loss": 0.536, "step": 2068 }, { "epoch": 0.2552509021373716, "grad_norm": 0.6155223846435547, "learning_rate": 1e-05, "loss": 0.5854, "step": 2069 }, { "epoch": 0.25537427135058444, "grad_norm": 0.5703986883163452, "learning_rate": 1e-05, "loss": 0.6366, "step": 2070 }, { "epoch": 0.2554976405637973, "grad_norm": 0.5769411325454712, "learning_rate": 1e-05, "loss": 0.5354, "step": 2071 }, { "epoch": 0.25562100977701013, "grad_norm": 0.5484208464622498, "learning_rate": 1e-05, "loss": 0.5091, "step": 2072 }, { "epoch": 0.255744378990223, "grad_norm": 0.5747383236885071, "learning_rate": 1e-05, "loss": 0.5538, "step": 2073 }, { "epoch": 0.25586774820343583, "grad_norm": 0.5559650659561157, "learning_rate": 1e-05, "loss": 0.5597, "step": 2074 }, { "epoch": 0.25599111741664865, "grad_norm": 0.575732946395874, "learning_rate": 1e-05, "loss": 0.6083, "step": 2075 }, { "epoch": 0.2561144866298615, "grad_norm": 0.6708223819732666, "learning_rate": 1e-05, "loss": 0.685, "step": 2076 }, { "epoch": 0.25623785584307435, "grad_norm": 0.57809978723526, "learning_rate": 1e-05, "loss": 0.6474, "step": 2077 }, { "epoch": 0.2563612250562872, "grad_norm": 0.4946746528148651, "learning_rate": 1e-05, "loss": 0.4552, "step": 2078 }, { "epoch": 0.25648459426950004, "grad_norm": 0.6159746050834656, "learning_rate": 1e-05, "loss": 0.6545, "step": 2079 }, { "epoch": 0.25660796348271286, "grad_norm": 0.5319188237190247, "learning_rate": 1e-05, "loss": 0.5694, "step": 2080 }, { "epoch": 0.25673133269592574, "grad_norm": 0.5852658748626709, "learning_rate": 1e-05, "loss": 0.5533, "step": 2081 }, { "epoch": 0.25685470190913856, "grad_norm": 0.6355715990066528, "learning_rate": 1e-05, "loss": 0.7356, "step": 2082 }, { "epoch": 0.25697807112235144, "grad_norm": 0.4910171329975128, "learning_rate": 1e-05, "loss": 0.4856, "step": 2083 }, { "epoch": 0.25710144033556426, "grad_norm": 0.6329323053359985, "learning_rate": 1e-05, "loss": 0.5937, "step": 2084 }, { "epoch": 0.2572248095487771, "grad_norm": 0.6277771592140198, "learning_rate": 1e-05, "loss": 0.6461, "step": 2085 }, { "epoch": 0.25734817876198995, "grad_norm": 0.6204930543899536, "learning_rate": 1e-05, "loss": 0.6693, "step": 2086 }, { "epoch": 0.2574715479752028, "grad_norm": 0.5610408186912537, "learning_rate": 1e-05, "loss": 0.5266, "step": 2087 }, { "epoch": 0.25759491718841565, "grad_norm": 0.6030799150466919, "learning_rate": 1e-05, "loss": 0.6169, "step": 2088 }, { "epoch": 0.25771828640162847, "grad_norm": 0.6658385992050171, "learning_rate": 1e-05, "loss": 0.6461, "step": 2089 }, { "epoch": 0.2578416556148413, "grad_norm": 0.6664196848869324, "learning_rate": 1e-05, "loss": 0.6726, "step": 2090 }, { "epoch": 0.25796502482805417, "grad_norm": 0.641740620136261, "learning_rate": 1e-05, "loss": 0.6174, "step": 2091 }, { "epoch": 0.258088394041267, "grad_norm": 0.6004464030265808, "learning_rate": 1e-05, "loss": 0.5635, "step": 2092 }, { "epoch": 0.25821176325447986, "grad_norm": 0.5801783800125122, "learning_rate": 1e-05, "loss": 0.5841, "step": 2093 }, { "epoch": 0.2583351324676927, "grad_norm": 0.5075699090957642, "learning_rate": 1e-05, "loss": 0.5149, "step": 2094 }, { "epoch": 0.2584585016809055, "grad_norm": 0.5482556223869324, "learning_rate": 1e-05, "loss": 0.5524, "step": 2095 }, { "epoch": 0.2585818708941184, "grad_norm": 0.6118969321250916, "learning_rate": 1e-05, "loss": 0.612, "step": 2096 }, { "epoch": 0.2587052401073312, "grad_norm": 0.5724919438362122, "learning_rate": 1e-05, "loss": 0.6365, "step": 2097 }, { "epoch": 0.2588286093205441, "grad_norm": 0.6277189254760742, "learning_rate": 1e-05, "loss": 0.6736, "step": 2098 }, { "epoch": 0.2589519785337569, "grad_norm": 0.6223520636558533, "learning_rate": 1e-05, "loss": 0.646, "step": 2099 }, { "epoch": 0.2590753477469697, "grad_norm": 0.6543902158737183, "learning_rate": 1e-05, "loss": 0.7638, "step": 2100 }, { "epoch": 0.2591987169601826, "grad_norm": 0.5494624376296997, "learning_rate": 1e-05, "loss": 0.4749, "step": 2101 }, { "epoch": 0.2593220861733954, "grad_norm": 0.6847425103187561, "learning_rate": 1e-05, "loss": 0.6825, "step": 2102 }, { "epoch": 0.2594454553866083, "grad_norm": 0.559642493724823, "learning_rate": 1e-05, "loss": 0.5056, "step": 2103 }, { "epoch": 0.2595688245998211, "grad_norm": 0.6303842067718506, "learning_rate": 1e-05, "loss": 0.6638, "step": 2104 }, { "epoch": 0.25969219381303393, "grad_norm": 0.5803594589233398, "learning_rate": 1e-05, "loss": 0.6188, "step": 2105 }, { "epoch": 0.2598155630262468, "grad_norm": 0.6190169453620911, "learning_rate": 1e-05, "loss": 0.6067, "step": 2106 }, { "epoch": 0.25993893223945963, "grad_norm": 0.5617637038230896, "learning_rate": 1e-05, "loss": 0.4933, "step": 2107 }, { "epoch": 0.2600623014526725, "grad_norm": 0.541601300239563, "learning_rate": 1e-05, "loss": 0.5518, "step": 2108 }, { "epoch": 0.2601856706658853, "grad_norm": 0.6298816800117493, "learning_rate": 1e-05, "loss": 0.5403, "step": 2109 }, { "epoch": 0.26030903987909815, "grad_norm": 0.5243939161300659, "learning_rate": 1e-05, "loss": 0.526, "step": 2110 }, { "epoch": 0.260432409092311, "grad_norm": 0.567306399345398, "learning_rate": 1e-05, "loss": 0.4999, "step": 2111 }, { "epoch": 0.26055577830552384, "grad_norm": 0.6354963779449463, "learning_rate": 1e-05, "loss": 0.6697, "step": 2112 }, { "epoch": 0.2606791475187367, "grad_norm": 0.6014302372932434, "learning_rate": 1e-05, "loss": 0.5922, "step": 2113 }, { "epoch": 0.26080251673194954, "grad_norm": 0.6279663443565369, "learning_rate": 1e-05, "loss": 0.6916, "step": 2114 }, { "epoch": 0.26092588594516236, "grad_norm": 0.6151391267776489, "learning_rate": 1e-05, "loss": 0.621, "step": 2115 }, { "epoch": 0.26104925515837524, "grad_norm": 0.5174369812011719, "learning_rate": 1e-05, "loss": 0.5305, "step": 2116 }, { "epoch": 0.26117262437158806, "grad_norm": 0.5013149976730347, "learning_rate": 1e-05, "loss": 0.5262, "step": 2117 }, { "epoch": 0.26129599358480093, "grad_norm": 0.570179283618927, "learning_rate": 1e-05, "loss": 0.5416, "step": 2118 }, { "epoch": 0.26141936279801375, "grad_norm": 0.5811960697174072, "learning_rate": 1e-05, "loss": 0.5331, "step": 2119 }, { "epoch": 0.2615427320112266, "grad_norm": 0.5964426398277283, "learning_rate": 1e-05, "loss": 0.6139, "step": 2120 }, { "epoch": 0.26166610122443945, "grad_norm": 0.5746281147003174, "learning_rate": 1e-05, "loss": 0.5285, "step": 2121 }, { "epoch": 0.26178947043765227, "grad_norm": 0.6339004039764404, "learning_rate": 1e-05, "loss": 0.7134, "step": 2122 }, { "epoch": 0.26191283965086515, "grad_norm": 0.7674351334571838, "learning_rate": 1e-05, "loss": 0.7343, "step": 2123 }, { "epoch": 0.26203620886407797, "grad_norm": 0.6881514191627502, "learning_rate": 1e-05, "loss": 0.6473, "step": 2124 }, { "epoch": 0.2621595780772908, "grad_norm": 0.6018426418304443, "learning_rate": 1e-05, "loss": 0.5838, "step": 2125 }, { "epoch": 0.26228294729050367, "grad_norm": 0.6133590936660767, "learning_rate": 1e-05, "loss": 0.6252, "step": 2126 }, { "epoch": 0.2624063165037165, "grad_norm": 0.5773761868476868, "learning_rate": 1e-05, "loss": 0.6341, "step": 2127 }, { "epoch": 0.26252968571692936, "grad_norm": 0.5732102990150452, "learning_rate": 1e-05, "loss": 0.6144, "step": 2128 }, { "epoch": 0.2626530549301422, "grad_norm": 0.6816661953926086, "learning_rate": 1e-05, "loss": 0.6664, "step": 2129 }, { "epoch": 0.262776424143355, "grad_norm": 0.6301412582397461, "learning_rate": 1e-05, "loss": 0.6455, "step": 2130 }, { "epoch": 0.2628997933565679, "grad_norm": 0.5720808506011963, "learning_rate": 1e-05, "loss": 0.5391, "step": 2131 }, { "epoch": 0.2630231625697807, "grad_norm": 0.6289454102516174, "learning_rate": 1e-05, "loss": 0.557, "step": 2132 }, { "epoch": 0.2631465317829936, "grad_norm": 0.533484935760498, "learning_rate": 1e-05, "loss": 0.5861, "step": 2133 }, { "epoch": 0.2632699009962064, "grad_norm": 0.5500680804252625, "learning_rate": 1e-05, "loss": 0.5337, "step": 2134 }, { "epoch": 0.2633932702094192, "grad_norm": 0.6432519555091858, "learning_rate": 1e-05, "loss": 0.6592, "step": 2135 }, { "epoch": 0.2635166394226321, "grad_norm": 0.5614197850227356, "learning_rate": 1e-05, "loss": 0.5143, "step": 2136 }, { "epoch": 0.2636400086358449, "grad_norm": 0.6075356006622314, "learning_rate": 1e-05, "loss": 0.6142, "step": 2137 }, { "epoch": 0.2637633778490578, "grad_norm": 0.6372458338737488, "learning_rate": 1e-05, "loss": 0.5486, "step": 2138 }, { "epoch": 0.2638867470622706, "grad_norm": 0.6955592036247253, "learning_rate": 1e-05, "loss": 0.6653, "step": 2139 }, { "epoch": 0.26401011627548343, "grad_norm": 0.6369081735610962, "learning_rate": 1e-05, "loss": 0.6766, "step": 2140 }, { "epoch": 0.2641334854886963, "grad_norm": 0.5500726103782654, "learning_rate": 1e-05, "loss": 0.5005, "step": 2141 }, { "epoch": 0.2642568547019091, "grad_norm": 0.5835529565811157, "learning_rate": 1e-05, "loss": 0.6158, "step": 2142 }, { "epoch": 0.264380223915122, "grad_norm": 0.5417385101318359, "learning_rate": 1e-05, "loss": 0.4199, "step": 2143 }, { "epoch": 0.2645035931283348, "grad_norm": 0.5246387720108032, "learning_rate": 1e-05, "loss": 0.4848, "step": 2144 }, { "epoch": 0.26462696234154764, "grad_norm": 0.5540857315063477, "learning_rate": 1e-05, "loss": 0.5045, "step": 2145 }, { "epoch": 0.2647503315547605, "grad_norm": 0.5077005624771118, "learning_rate": 1e-05, "loss": 0.5237, "step": 2146 }, { "epoch": 0.26487370076797334, "grad_norm": 0.5796047449111938, "learning_rate": 1e-05, "loss": 0.5553, "step": 2147 }, { "epoch": 0.2649970699811862, "grad_norm": 0.6201474666595459, "learning_rate": 1e-05, "loss": 0.582, "step": 2148 }, { "epoch": 0.26512043919439904, "grad_norm": 0.6089878082275391, "learning_rate": 1e-05, "loss": 0.5213, "step": 2149 }, { "epoch": 0.26524380840761186, "grad_norm": 0.6179409027099609, "learning_rate": 1e-05, "loss": 0.5848, "step": 2150 }, { "epoch": 0.26536717762082473, "grad_norm": 0.6392179727554321, "learning_rate": 1e-05, "loss": 0.5922, "step": 2151 }, { "epoch": 0.26549054683403756, "grad_norm": 0.533717155456543, "learning_rate": 1e-05, "loss": 0.5045, "step": 2152 }, { "epoch": 0.26561391604725043, "grad_norm": 0.6029685735702515, "learning_rate": 1e-05, "loss": 0.565, "step": 2153 }, { "epoch": 0.26573728526046325, "grad_norm": 0.571110725402832, "learning_rate": 1e-05, "loss": 0.632, "step": 2154 }, { "epoch": 0.2658606544736761, "grad_norm": 0.5615322589874268, "learning_rate": 1e-05, "loss": 0.5925, "step": 2155 }, { "epoch": 0.26598402368688895, "grad_norm": 0.5769310593605042, "learning_rate": 1e-05, "loss": 0.6117, "step": 2156 }, { "epoch": 0.26610739290010177, "grad_norm": 0.6086071133613586, "learning_rate": 1e-05, "loss": 0.6432, "step": 2157 }, { "epoch": 0.26623076211331465, "grad_norm": 0.5534799695014954, "learning_rate": 1e-05, "loss": 0.5509, "step": 2158 }, { "epoch": 0.26635413132652747, "grad_norm": 0.5740921497344971, "learning_rate": 1e-05, "loss": 0.6053, "step": 2159 }, { "epoch": 0.2664775005397403, "grad_norm": 0.5557631254196167, "learning_rate": 1e-05, "loss": 0.5133, "step": 2160 }, { "epoch": 0.26660086975295316, "grad_norm": 0.5913804173469543, "learning_rate": 1e-05, "loss": 0.5933, "step": 2161 }, { "epoch": 0.266724238966166, "grad_norm": 0.6499358415603638, "learning_rate": 1e-05, "loss": 0.61, "step": 2162 }, { "epoch": 0.26684760817937886, "grad_norm": 0.5559643507003784, "learning_rate": 1e-05, "loss": 0.5274, "step": 2163 }, { "epoch": 0.2669709773925917, "grad_norm": 0.6275848746299744, "learning_rate": 1e-05, "loss": 0.6431, "step": 2164 }, { "epoch": 0.2670943466058045, "grad_norm": 0.5896042585372925, "learning_rate": 1e-05, "loss": 0.5916, "step": 2165 }, { "epoch": 0.2672177158190174, "grad_norm": 0.6655544638633728, "learning_rate": 1e-05, "loss": 0.6856, "step": 2166 }, { "epoch": 0.2673410850322302, "grad_norm": 0.544803261756897, "learning_rate": 1e-05, "loss": 0.528, "step": 2167 }, { "epoch": 0.2674644542454431, "grad_norm": 0.5624529719352722, "learning_rate": 1e-05, "loss": 0.5807, "step": 2168 }, { "epoch": 0.2675878234586559, "grad_norm": 0.548088550567627, "learning_rate": 1e-05, "loss": 0.5814, "step": 2169 }, { "epoch": 0.2677111926718687, "grad_norm": 0.5262620449066162, "learning_rate": 1e-05, "loss": 0.5274, "step": 2170 }, { "epoch": 0.2678345618850816, "grad_norm": 0.5429219603538513, "learning_rate": 1e-05, "loss": 0.5771, "step": 2171 }, { "epoch": 0.2679579310982944, "grad_norm": 0.5715206265449524, "learning_rate": 1e-05, "loss": 0.522, "step": 2172 }, { "epoch": 0.2680813003115073, "grad_norm": 0.49558427929878235, "learning_rate": 1e-05, "loss": 0.4975, "step": 2173 }, { "epoch": 0.2682046695247201, "grad_norm": 0.6480902433395386, "learning_rate": 1e-05, "loss": 0.6973, "step": 2174 }, { "epoch": 0.26832803873793293, "grad_norm": 0.5327266454696655, "learning_rate": 1e-05, "loss": 0.5123, "step": 2175 }, { "epoch": 0.2684514079511458, "grad_norm": 0.5410640239715576, "learning_rate": 1e-05, "loss": 0.5611, "step": 2176 }, { "epoch": 0.2685747771643586, "grad_norm": 0.606013834476471, "learning_rate": 1e-05, "loss": 0.5327, "step": 2177 }, { "epoch": 0.2686981463775715, "grad_norm": 0.5959902405738831, "learning_rate": 1e-05, "loss": 0.6441, "step": 2178 }, { "epoch": 0.2688215155907843, "grad_norm": 0.6142771244049072, "learning_rate": 1e-05, "loss": 0.6818, "step": 2179 }, { "epoch": 0.26894488480399714, "grad_norm": 0.6665787100791931, "learning_rate": 1e-05, "loss": 0.6388, "step": 2180 }, { "epoch": 0.26906825401721, "grad_norm": 0.5562440752983093, "learning_rate": 1e-05, "loss": 0.5846, "step": 2181 }, { "epoch": 0.26919162323042284, "grad_norm": 0.5759619474411011, "learning_rate": 1e-05, "loss": 0.5425, "step": 2182 }, { "epoch": 0.2693149924436357, "grad_norm": 0.6260390281677246, "learning_rate": 1e-05, "loss": 0.6306, "step": 2183 }, { "epoch": 0.26943836165684854, "grad_norm": 0.526411235332489, "learning_rate": 1e-05, "loss": 0.4698, "step": 2184 }, { "epoch": 0.26956173087006136, "grad_norm": 0.5464258193969727, "learning_rate": 1e-05, "loss": 0.5273, "step": 2185 }, { "epoch": 0.26968510008327423, "grad_norm": 0.6726984977722168, "learning_rate": 1e-05, "loss": 0.6032, "step": 2186 }, { "epoch": 0.26980846929648705, "grad_norm": 0.549979567527771, "learning_rate": 1e-05, "loss": 0.493, "step": 2187 }, { "epoch": 0.26993183850969993, "grad_norm": 0.5730496048927307, "learning_rate": 1e-05, "loss": 0.5623, "step": 2188 }, { "epoch": 0.27005520772291275, "grad_norm": 0.5424177646636963, "learning_rate": 1e-05, "loss": 0.5207, "step": 2189 }, { "epoch": 0.27017857693612557, "grad_norm": 0.5452865362167358, "learning_rate": 1e-05, "loss": 0.5265, "step": 2190 }, { "epoch": 0.27030194614933845, "grad_norm": 0.60597163438797, "learning_rate": 1e-05, "loss": 0.6232, "step": 2191 }, { "epoch": 0.27042531536255127, "grad_norm": 0.6722461581230164, "learning_rate": 1e-05, "loss": 0.5899, "step": 2192 }, { "epoch": 0.27054868457576414, "grad_norm": 0.5756697654724121, "learning_rate": 1e-05, "loss": 0.5867, "step": 2193 }, { "epoch": 0.27067205378897696, "grad_norm": 0.7005886435508728, "learning_rate": 1e-05, "loss": 0.5756, "step": 2194 }, { "epoch": 0.2707954230021898, "grad_norm": 0.5788170099258423, "learning_rate": 1e-05, "loss": 0.6046, "step": 2195 }, { "epoch": 0.27091879221540266, "grad_norm": 0.5066601037979126, "learning_rate": 1e-05, "loss": 0.4414, "step": 2196 }, { "epoch": 0.2710421614286155, "grad_norm": 0.5440512895584106, "learning_rate": 1e-05, "loss": 0.5109, "step": 2197 }, { "epoch": 0.27116553064182836, "grad_norm": 0.5607513189315796, "learning_rate": 1e-05, "loss": 0.5884, "step": 2198 }, { "epoch": 0.2712888998550412, "grad_norm": 0.5707900524139404, "learning_rate": 1e-05, "loss": 0.5322, "step": 2199 }, { "epoch": 0.271412269068254, "grad_norm": 0.633741557598114, "learning_rate": 1e-05, "loss": 0.7154, "step": 2200 }, { "epoch": 0.2715356382814669, "grad_norm": 0.5308734178543091, "learning_rate": 1e-05, "loss": 0.5247, "step": 2201 }, { "epoch": 0.2716590074946797, "grad_norm": 0.6724716424942017, "learning_rate": 1e-05, "loss": 0.6931, "step": 2202 }, { "epoch": 0.27178237670789257, "grad_norm": 0.5453492403030396, "learning_rate": 1e-05, "loss": 0.574, "step": 2203 }, { "epoch": 0.2719057459211054, "grad_norm": 0.5882710218429565, "learning_rate": 1e-05, "loss": 0.6129, "step": 2204 }, { "epoch": 0.2720291151343182, "grad_norm": 0.5817874073982239, "learning_rate": 1e-05, "loss": 0.5979, "step": 2205 }, { "epoch": 0.2721524843475311, "grad_norm": 0.5632855892181396, "learning_rate": 1e-05, "loss": 0.5978, "step": 2206 }, { "epoch": 0.2722758535607439, "grad_norm": 0.5549627542495728, "learning_rate": 1e-05, "loss": 0.5681, "step": 2207 }, { "epoch": 0.2723992227739568, "grad_norm": 0.5858226418495178, "learning_rate": 1e-05, "loss": 0.6037, "step": 2208 }, { "epoch": 0.2725225919871696, "grad_norm": 0.532303512096405, "learning_rate": 1e-05, "loss": 0.5362, "step": 2209 }, { "epoch": 0.2726459612003824, "grad_norm": 0.6061787605285645, "learning_rate": 1e-05, "loss": 0.6843, "step": 2210 }, { "epoch": 0.2727693304135953, "grad_norm": 0.5966851711273193, "learning_rate": 1e-05, "loss": 0.6145, "step": 2211 }, { "epoch": 0.2728926996268081, "grad_norm": 0.6757405400276184, "learning_rate": 1e-05, "loss": 0.6506, "step": 2212 }, { "epoch": 0.273016068840021, "grad_norm": 0.5614364743232727, "learning_rate": 1e-05, "loss": 0.5315, "step": 2213 }, { "epoch": 0.2731394380532338, "grad_norm": 0.5324529409408569, "learning_rate": 1e-05, "loss": 0.4949, "step": 2214 }, { "epoch": 0.27326280726644664, "grad_norm": 0.5212768316268921, "learning_rate": 1e-05, "loss": 0.5047, "step": 2215 }, { "epoch": 0.2733861764796595, "grad_norm": 0.548223614692688, "learning_rate": 1e-05, "loss": 0.5673, "step": 2216 }, { "epoch": 0.27350954569287234, "grad_norm": 0.6584210395812988, "learning_rate": 1e-05, "loss": 0.7468, "step": 2217 }, { "epoch": 0.2736329149060852, "grad_norm": 0.5778801441192627, "learning_rate": 1e-05, "loss": 0.5648, "step": 2218 }, { "epoch": 0.27375628411929803, "grad_norm": 0.5420522093772888, "learning_rate": 1e-05, "loss": 0.4748, "step": 2219 }, { "epoch": 0.27387965333251085, "grad_norm": 0.6072900295257568, "learning_rate": 1e-05, "loss": 0.6306, "step": 2220 }, { "epoch": 0.27400302254572373, "grad_norm": 0.5729108452796936, "learning_rate": 1e-05, "loss": 0.6315, "step": 2221 }, { "epoch": 0.27412639175893655, "grad_norm": 0.6082262992858887, "learning_rate": 1e-05, "loss": 0.5943, "step": 2222 }, { "epoch": 0.2742497609721494, "grad_norm": 0.518614649772644, "learning_rate": 1e-05, "loss": 0.5103, "step": 2223 }, { "epoch": 0.27437313018536225, "grad_norm": 0.6417781710624695, "learning_rate": 1e-05, "loss": 0.6921, "step": 2224 }, { "epoch": 0.27449649939857507, "grad_norm": 0.5856736898422241, "learning_rate": 1e-05, "loss": 0.5797, "step": 2225 }, { "epoch": 0.27461986861178794, "grad_norm": 0.6277192831039429, "learning_rate": 1e-05, "loss": 0.6524, "step": 2226 }, { "epoch": 0.27474323782500076, "grad_norm": 0.5780797600746155, "learning_rate": 1e-05, "loss": 0.5521, "step": 2227 }, { "epoch": 0.27486660703821364, "grad_norm": 0.6006084680557251, "learning_rate": 1e-05, "loss": 0.5456, "step": 2228 }, { "epoch": 0.27498997625142646, "grad_norm": 0.5969318151473999, "learning_rate": 1e-05, "loss": 0.6411, "step": 2229 }, { "epoch": 0.2751133454646393, "grad_norm": 0.5789240002632141, "learning_rate": 1e-05, "loss": 0.61, "step": 2230 }, { "epoch": 0.27523671467785216, "grad_norm": 0.5369839668273926, "learning_rate": 1e-05, "loss": 0.5369, "step": 2231 }, { "epoch": 0.275360083891065, "grad_norm": 0.6735830903053284, "learning_rate": 1e-05, "loss": 0.6872, "step": 2232 }, { "epoch": 0.27548345310427785, "grad_norm": 0.537632405757904, "learning_rate": 1e-05, "loss": 0.465, "step": 2233 }, { "epoch": 0.2756068223174907, "grad_norm": 0.6250188946723938, "learning_rate": 1e-05, "loss": 0.5614, "step": 2234 }, { "epoch": 0.2757301915307035, "grad_norm": 0.5574538111686707, "learning_rate": 1e-05, "loss": 0.5666, "step": 2235 }, { "epoch": 0.27585356074391637, "grad_norm": 0.539526641368866, "learning_rate": 1e-05, "loss": 0.5737, "step": 2236 }, { "epoch": 0.2759769299571292, "grad_norm": 0.5854130983352661, "learning_rate": 1e-05, "loss": 0.567, "step": 2237 }, { "epoch": 0.27610029917034207, "grad_norm": 0.5973783135414124, "learning_rate": 1e-05, "loss": 0.5758, "step": 2238 }, { "epoch": 0.2762236683835549, "grad_norm": 0.5666554570198059, "learning_rate": 1e-05, "loss": 0.5411, "step": 2239 }, { "epoch": 0.2763470375967677, "grad_norm": 0.5650559067726135, "learning_rate": 1e-05, "loss": 0.4616, "step": 2240 }, { "epoch": 0.2764704068099806, "grad_norm": 0.6380279660224915, "learning_rate": 1e-05, "loss": 0.6165, "step": 2241 }, { "epoch": 0.2765937760231934, "grad_norm": 0.5453628301620483, "learning_rate": 1e-05, "loss": 0.5093, "step": 2242 }, { "epoch": 0.2767171452364063, "grad_norm": 0.6388252973556519, "learning_rate": 1e-05, "loss": 0.7153, "step": 2243 }, { "epoch": 0.2768405144496191, "grad_norm": 0.5787032842636108, "learning_rate": 1e-05, "loss": 0.5719, "step": 2244 }, { "epoch": 0.2769638836628319, "grad_norm": 0.6002602577209473, "learning_rate": 1e-05, "loss": 0.674, "step": 2245 }, { "epoch": 0.2770872528760448, "grad_norm": 0.5997739434242249, "learning_rate": 1e-05, "loss": 0.6239, "step": 2246 }, { "epoch": 0.2772106220892576, "grad_norm": 0.5374709963798523, "learning_rate": 1e-05, "loss": 0.4936, "step": 2247 }, { "epoch": 0.2773339913024705, "grad_norm": 0.5997715592384338, "learning_rate": 1e-05, "loss": 0.5926, "step": 2248 }, { "epoch": 0.2774573605156833, "grad_norm": 0.5625751614570618, "learning_rate": 1e-05, "loss": 0.6281, "step": 2249 }, { "epoch": 0.27758072972889614, "grad_norm": 0.6118547916412354, "learning_rate": 1e-05, "loss": 0.6517, "step": 2250 }, { "epoch": 0.277704098942109, "grad_norm": 0.6256664395332336, "learning_rate": 1e-05, "loss": 0.6144, "step": 2251 }, { "epoch": 0.27782746815532183, "grad_norm": 0.5487454533576965, "learning_rate": 1e-05, "loss": 0.5741, "step": 2252 }, { "epoch": 0.2779508373685347, "grad_norm": 0.5752818584442139, "learning_rate": 1e-05, "loss": 0.5451, "step": 2253 }, { "epoch": 0.27807420658174753, "grad_norm": 0.5358497500419617, "learning_rate": 1e-05, "loss": 0.5167, "step": 2254 }, { "epoch": 0.27819757579496035, "grad_norm": 0.6294646263122559, "learning_rate": 1e-05, "loss": 0.7253, "step": 2255 }, { "epoch": 0.2783209450081732, "grad_norm": 0.5275946855545044, "learning_rate": 1e-05, "loss": 0.5244, "step": 2256 }, { "epoch": 0.27844431422138605, "grad_norm": 0.5469041466712952, "learning_rate": 1e-05, "loss": 0.5211, "step": 2257 }, { "epoch": 0.2785676834345989, "grad_norm": 0.5912072658538818, "learning_rate": 1e-05, "loss": 0.5775, "step": 2258 }, { "epoch": 0.27869105264781174, "grad_norm": 0.6104661226272583, "learning_rate": 1e-05, "loss": 0.6484, "step": 2259 }, { "epoch": 0.27881442186102456, "grad_norm": 0.6164336800575256, "learning_rate": 1e-05, "loss": 0.624, "step": 2260 }, { "epoch": 0.27893779107423744, "grad_norm": 0.5991618037223816, "learning_rate": 1e-05, "loss": 0.5426, "step": 2261 }, { "epoch": 0.27906116028745026, "grad_norm": 0.5784357190132141, "learning_rate": 1e-05, "loss": 0.613, "step": 2262 }, { "epoch": 0.27918452950066314, "grad_norm": 0.5460560321807861, "learning_rate": 1e-05, "loss": 0.5088, "step": 2263 }, { "epoch": 0.27930789871387596, "grad_norm": 0.5369218587875366, "learning_rate": 1e-05, "loss": 0.6058, "step": 2264 }, { "epoch": 0.2794312679270888, "grad_norm": 0.5893092751502991, "learning_rate": 1e-05, "loss": 0.5483, "step": 2265 }, { "epoch": 0.27955463714030165, "grad_norm": 0.5872765779495239, "learning_rate": 1e-05, "loss": 0.5348, "step": 2266 }, { "epoch": 0.2796780063535145, "grad_norm": 0.624366819858551, "learning_rate": 1e-05, "loss": 0.6641, "step": 2267 }, { "epoch": 0.27980137556672735, "grad_norm": 0.5761240124702454, "learning_rate": 1e-05, "loss": 0.5621, "step": 2268 }, { "epoch": 0.27992474477994017, "grad_norm": 0.5397926568984985, "learning_rate": 1e-05, "loss": 0.5251, "step": 2269 }, { "epoch": 0.280048113993153, "grad_norm": 0.6000964045524597, "learning_rate": 1e-05, "loss": 0.5172, "step": 2270 }, { "epoch": 0.28017148320636587, "grad_norm": 0.6654014587402344, "learning_rate": 1e-05, "loss": 0.7063, "step": 2271 }, { "epoch": 0.2802948524195787, "grad_norm": 0.6031286120414734, "learning_rate": 1e-05, "loss": 0.5707, "step": 2272 }, { "epoch": 0.28041822163279156, "grad_norm": 0.5326694250106812, "learning_rate": 1e-05, "loss": 0.5075, "step": 2273 }, { "epoch": 0.2805415908460044, "grad_norm": 0.6078317165374756, "learning_rate": 1e-05, "loss": 0.6157, "step": 2274 }, { "epoch": 0.2806649600592172, "grad_norm": 0.5960178375244141, "learning_rate": 1e-05, "loss": 0.583, "step": 2275 }, { "epoch": 0.2807883292724301, "grad_norm": 0.6158958673477173, "learning_rate": 1e-05, "loss": 0.6345, "step": 2276 }, { "epoch": 0.2809116984856429, "grad_norm": 0.5309980511665344, "learning_rate": 1e-05, "loss": 0.526, "step": 2277 }, { "epoch": 0.2810350676988557, "grad_norm": 0.5713697075843811, "learning_rate": 1e-05, "loss": 0.5853, "step": 2278 }, { "epoch": 0.2811584369120686, "grad_norm": 0.5510396361351013, "learning_rate": 1e-05, "loss": 0.6247, "step": 2279 }, { "epoch": 0.2812818061252814, "grad_norm": 0.5979798436164856, "learning_rate": 1e-05, "loss": 0.6271, "step": 2280 }, { "epoch": 0.2814051753384943, "grad_norm": 0.5944547057151794, "learning_rate": 1e-05, "loss": 0.6302, "step": 2281 }, { "epoch": 0.2815285445517071, "grad_norm": 0.527895450592041, "learning_rate": 1e-05, "loss": 0.5115, "step": 2282 }, { "epoch": 0.28165191376491994, "grad_norm": 0.5518069863319397, "learning_rate": 1e-05, "loss": 0.518, "step": 2283 }, { "epoch": 0.2817752829781328, "grad_norm": 0.6703461408615112, "learning_rate": 1e-05, "loss": 0.5864, "step": 2284 }, { "epoch": 0.28189865219134563, "grad_norm": 0.5994282364845276, "learning_rate": 1e-05, "loss": 0.5803, "step": 2285 }, { "epoch": 0.2820220214045585, "grad_norm": 0.7032206654548645, "learning_rate": 1e-05, "loss": 0.6326, "step": 2286 }, { "epoch": 0.28214539061777133, "grad_norm": 0.6824490427970886, "learning_rate": 1e-05, "loss": 0.6274, "step": 2287 }, { "epoch": 0.28226875983098415, "grad_norm": 0.6062069535255432, "learning_rate": 1e-05, "loss": 0.5232, "step": 2288 }, { "epoch": 0.282392129044197, "grad_norm": 0.6288381218910217, "learning_rate": 1e-05, "loss": 0.703, "step": 2289 }, { "epoch": 0.28251549825740985, "grad_norm": 0.5879991054534912, "learning_rate": 1e-05, "loss": 0.6345, "step": 2290 }, { "epoch": 0.2826388674706227, "grad_norm": 0.5845031142234802, "learning_rate": 1e-05, "loss": 0.5365, "step": 2291 }, { "epoch": 0.28276223668383554, "grad_norm": 0.5537060499191284, "learning_rate": 1e-05, "loss": 0.6259, "step": 2292 }, { "epoch": 0.28288560589704836, "grad_norm": 0.6131176352500916, "learning_rate": 1e-05, "loss": 0.7013, "step": 2293 }, { "epoch": 0.28300897511026124, "grad_norm": 0.6152113080024719, "learning_rate": 1e-05, "loss": 0.5622, "step": 2294 }, { "epoch": 0.28313234432347406, "grad_norm": 0.6760241389274597, "learning_rate": 1e-05, "loss": 0.594, "step": 2295 }, { "epoch": 0.28325571353668694, "grad_norm": 0.5602095723152161, "learning_rate": 1e-05, "loss": 0.5402, "step": 2296 }, { "epoch": 0.28337908274989976, "grad_norm": 0.5061765313148499, "learning_rate": 1e-05, "loss": 0.4561, "step": 2297 }, { "epoch": 0.2835024519631126, "grad_norm": 0.6275268197059631, "learning_rate": 1e-05, "loss": 0.6933, "step": 2298 }, { "epoch": 0.28362582117632545, "grad_norm": 0.6003345251083374, "learning_rate": 1e-05, "loss": 0.6539, "step": 2299 }, { "epoch": 0.2837491903895383, "grad_norm": 0.5523404479026794, "learning_rate": 1e-05, "loss": 0.5629, "step": 2300 }, { "epoch": 0.28387255960275115, "grad_norm": 0.6541203856468201, "learning_rate": 1e-05, "loss": 0.6253, "step": 2301 }, { "epoch": 0.28399592881596397, "grad_norm": 0.5945109724998474, "learning_rate": 1e-05, "loss": 0.5868, "step": 2302 }, { "epoch": 0.2841192980291768, "grad_norm": 0.585297167301178, "learning_rate": 1e-05, "loss": 0.5818, "step": 2303 }, { "epoch": 0.28424266724238967, "grad_norm": 0.552672803401947, "learning_rate": 1e-05, "loss": 0.4585, "step": 2304 }, { "epoch": 0.2843660364556025, "grad_norm": 0.5890730619430542, "learning_rate": 1e-05, "loss": 0.6882, "step": 2305 }, { "epoch": 0.28448940566881536, "grad_norm": 0.6675970554351807, "learning_rate": 1e-05, "loss": 0.6629, "step": 2306 }, { "epoch": 0.2846127748820282, "grad_norm": 0.6022123098373413, "learning_rate": 1e-05, "loss": 0.5177, "step": 2307 }, { "epoch": 0.284736144095241, "grad_norm": 0.5325331091880798, "learning_rate": 1e-05, "loss": 0.4863, "step": 2308 }, { "epoch": 0.2848595133084539, "grad_norm": 0.5248379707336426, "learning_rate": 1e-05, "loss": 0.4905, "step": 2309 }, { "epoch": 0.2849828825216667, "grad_norm": 0.5989843010902405, "learning_rate": 1e-05, "loss": 0.5763, "step": 2310 }, { "epoch": 0.2851062517348796, "grad_norm": 0.5433472394943237, "learning_rate": 1e-05, "loss": 0.4259, "step": 2311 }, { "epoch": 0.2852296209480924, "grad_norm": 0.6322371959686279, "learning_rate": 1e-05, "loss": 0.6399, "step": 2312 }, { "epoch": 0.2853529901613052, "grad_norm": 0.5604290962219238, "learning_rate": 1e-05, "loss": 0.5524, "step": 2313 }, { "epoch": 0.2854763593745181, "grad_norm": 0.588885486125946, "learning_rate": 1e-05, "loss": 0.6208, "step": 2314 }, { "epoch": 0.2855997285877309, "grad_norm": 0.6469317674636841, "learning_rate": 1e-05, "loss": 0.6179, "step": 2315 }, { "epoch": 0.2857230978009438, "grad_norm": 0.5953658223152161, "learning_rate": 1e-05, "loss": 0.5668, "step": 2316 }, { "epoch": 0.2858464670141566, "grad_norm": 0.6499996781349182, "learning_rate": 1e-05, "loss": 0.6044, "step": 2317 }, { "epoch": 0.28596983622736943, "grad_norm": 0.5890172719955444, "learning_rate": 1e-05, "loss": 0.6155, "step": 2318 }, { "epoch": 0.2860932054405823, "grad_norm": 0.681381106376648, "learning_rate": 1e-05, "loss": 0.7349, "step": 2319 }, { "epoch": 0.28621657465379513, "grad_norm": 0.5077255964279175, "learning_rate": 1e-05, "loss": 0.4586, "step": 2320 }, { "epoch": 0.286339943867008, "grad_norm": 0.5773807168006897, "learning_rate": 1e-05, "loss": 0.5842, "step": 2321 }, { "epoch": 0.2864633130802208, "grad_norm": 0.49900662899017334, "learning_rate": 1e-05, "loss": 0.4706, "step": 2322 }, { "epoch": 0.28658668229343365, "grad_norm": 0.5490404963493347, "learning_rate": 1e-05, "loss": 0.5164, "step": 2323 }, { "epoch": 0.2867100515066465, "grad_norm": 0.5758062601089478, "learning_rate": 1e-05, "loss": 0.581, "step": 2324 }, { "epoch": 0.28683342071985934, "grad_norm": 0.503603458404541, "learning_rate": 1e-05, "loss": 0.4893, "step": 2325 }, { "epoch": 0.2869567899330722, "grad_norm": 0.6031987071037292, "learning_rate": 1e-05, "loss": 0.5955, "step": 2326 }, { "epoch": 0.28708015914628504, "grad_norm": 0.6303683519363403, "learning_rate": 1e-05, "loss": 0.583, "step": 2327 }, { "epoch": 0.28720352835949786, "grad_norm": 0.55641108751297, "learning_rate": 1e-05, "loss": 0.562, "step": 2328 }, { "epoch": 0.28732689757271074, "grad_norm": 0.6013742089271545, "learning_rate": 1e-05, "loss": 0.615, "step": 2329 }, { "epoch": 0.28745026678592356, "grad_norm": 0.6129924654960632, "learning_rate": 1e-05, "loss": 0.6809, "step": 2330 }, { "epoch": 0.28757363599913643, "grad_norm": 0.6180858612060547, "learning_rate": 1e-05, "loss": 0.5389, "step": 2331 }, { "epoch": 0.28769700521234925, "grad_norm": 0.546268880367279, "learning_rate": 1e-05, "loss": 0.4659, "step": 2332 }, { "epoch": 0.2878203744255621, "grad_norm": 0.635063111782074, "learning_rate": 1e-05, "loss": 0.6507, "step": 2333 }, { "epoch": 0.28794374363877495, "grad_norm": 0.615260899066925, "learning_rate": 1e-05, "loss": 0.633, "step": 2334 }, { "epoch": 0.28806711285198777, "grad_norm": 0.5641335248947144, "learning_rate": 1e-05, "loss": 0.581, "step": 2335 }, { "epoch": 0.28819048206520065, "grad_norm": 0.5553092956542969, "learning_rate": 1e-05, "loss": 0.4441, "step": 2336 }, { "epoch": 0.28831385127841347, "grad_norm": 0.5795096158981323, "learning_rate": 1e-05, "loss": 0.6353, "step": 2337 }, { "epoch": 0.2884372204916263, "grad_norm": 0.5392029881477356, "learning_rate": 1e-05, "loss": 0.5288, "step": 2338 }, { "epoch": 0.28856058970483917, "grad_norm": 0.6342945098876953, "learning_rate": 1e-05, "loss": 0.7292, "step": 2339 }, { "epoch": 0.288683958918052, "grad_norm": 0.5537346005439758, "learning_rate": 1e-05, "loss": 0.5554, "step": 2340 }, { "epoch": 0.28880732813126486, "grad_norm": 0.6107829809188843, "learning_rate": 1e-05, "loss": 0.5702, "step": 2341 }, { "epoch": 0.2889306973444777, "grad_norm": 0.6226266026496887, "learning_rate": 1e-05, "loss": 0.6977, "step": 2342 }, { "epoch": 0.2890540665576905, "grad_norm": 0.6120601892471313, "learning_rate": 1e-05, "loss": 0.5463, "step": 2343 }, { "epoch": 0.2891774357709034, "grad_norm": 0.5810701251029968, "learning_rate": 1e-05, "loss": 0.5525, "step": 2344 }, { "epoch": 0.2893008049841162, "grad_norm": 0.569961428642273, "learning_rate": 1e-05, "loss": 0.6166, "step": 2345 }, { "epoch": 0.2894241741973291, "grad_norm": 0.5786641836166382, "learning_rate": 1e-05, "loss": 0.6273, "step": 2346 }, { "epoch": 0.2895475434105419, "grad_norm": 0.592585027217865, "learning_rate": 1e-05, "loss": 0.5625, "step": 2347 }, { "epoch": 0.2896709126237547, "grad_norm": 0.6379648447036743, "learning_rate": 1e-05, "loss": 0.6546, "step": 2348 }, { "epoch": 0.2897942818369676, "grad_norm": 0.5445111989974976, "learning_rate": 1e-05, "loss": 0.4854, "step": 2349 }, { "epoch": 0.2899176510501804, "grad_norm": 0.6403691172599792, "learning_rate": 1e-05, "loss": 0.6213, "step": 2350 }, { "epoch": 0.2900410202633933, "grad_norm": 0.5552164316177368, "learning_rate": 1e-05, "loss": 0.5479, "step": 2351 }, { "epoch": 0.2901643894766061, "grad_norm": 0.5948025584220886, "learning_rate": 1e-05, "loss": 0.5849, "step": 2352 }, { "epoch": 0.29028775868981893, "grad_norm": 0.6450167298316956, "learning_rate": 1e-05, "loss": 0.6496, "step": 2353 }, { "epoch": 0.2904111279030318, "grad_norm": 0.6129176020622253, "learning_rate": 1e-05, "loss": 0.6891, "step": 2354 }, { "epoch": 0.2905344971162446, "grad_norm": 0.5853319764137268, "learning_rate": 1e-05, "loss": 0.4834, "step": 2355 }, { "epoch": 0.2906578663294575, "grad_norm": 0.5333989262580872, "learning_rate": 1e-05, "loss": 0.5487, "step": 2356 }, { "epoch": 0.2907812355426703, "grad_norm": 0.6229951977729797, "learning_rate": 1e-05, "loss": 0.6149, "step": 2357 }, { "epoch": 0.29090460475588314, "grad_norm": 0.585779070854187, "learning_rate": 1e-05, "loss": 0.5537, "step": 2358 }, { "epoch": 0.291027973969096, "grad_norm": 0.5579192042350769, "learning_rate": 1e-05, "loss": 0.5617, "step": 2359 }, { "epoch": 0.29115134318230884, "grad_norm": 0.5332575440406799, "learning_rate": 1e-05, "loss": 0.5641, "step": 2360 }, { "epoch": 0.2912747123955217, "grad_norm": 0.6902775168418884, "learning_rate": 1e-05, "loss": 0.7053, "step": 2361 }, { "epoch": 0.29139808160873454, "grad_norm": 0.6597367525100708, "learning_rate": 1e-05, "loss": 0.5935, "step": 2362 }, { "epoch": 0.29152145082194736, "grad_norm": 0.6162698268890381, "learning_rate": 1e-05, "loss": 0.6261, "step": 2363 }, { "epoch": 0.29164482003516023, "grad_norm": 0.5811376571655273, "learning_rate": 1e-05, "loss": 0.5387, "step": 2364 }, { "epoch": 0.29176818924837306, "grad_norm": 0.5596752762794495, "learning_rate": 1e-05, "loss": 0.6111, "step": 2365 }, { "epoch": 0.29189155846158593, "grad_norm": 0.633841872215271, "learning_rate": 1e-05, "loss": 0.6107, "step": 2366 }, { "epoch": 0.29201492767479875, "grad_norm": 0.5864389538764954, "learning_rate": 1e-05, "loss": 0.6366, "step": 2367 }, { "epoch": 0.2921382968880116, "grad_norm": 0.5833103656768799, "learning_rate": 1e-05, "loss": 0.5521, "step": 2368 }, { "epoch": 0.29226166610122445, "grad_norm": 0.7431034445762634, "learning_rate": 1e-05, "loss": 0.6059, "step": 2369 }, { "epoch": 0.29238503531443727, "grad_norm": 0.565449059009552, "learning_rate": 1e-05, "loss": 0.5368, "step": 2370 }, { "epoch": 0.29250840452765015, "grad_norm": 0.5702537894248962, "learning_rate": 1e-05, "loss": 0.5827, "step": 2371 }, { "epoch": 0.29263177374086297, "grad_norm": 0.6723724007606506, "learning_rate": 1e-05, "loss": 0.6424, "step": 2372 }, { "epoch": 0.2927551429540758, "grad_norm": 0.5692319869995117, "learning_rate": 1e-05, "loss": 0.5266, "step": 2373 }, { "epoch": 0.29287851216728866, "grad_norm": 0.6311162114143372, "learning_rate": 1e-05, "loss": 0.6211, "step": 2374 }, { "epoch": 0.2930018813805015, "grad_norm": 0.6031387448310852, "learning_rate": 1e-05, "loss": 0.6764, "step": 2375 }, { "epoch": 0.29312525059371436, "grad_norm": 0.5878311395645142, "learning_rate": 1e-05, "loss": 0.5523, "step": 2376 }, { "epoch": 0.2932486198069272, "grad_norm": 0.5232607126235962, "learning_rate": 1e-05, "loss": 0.5215, "step": 2377 }, { "epoch": 0.29337198902014, "grad_norm": 0.5275657176971436, "learning_rate": 1e-05, "loss": 0.5216, "step": 2378 }, { "epoch": 0.2934953582333529, "grad_norm": 0.5634950399398804, "learning_rate": 1e-05, "loss": 0.5973, "step": 2379 }, { "epoch": 0.2936187274465657, "grad_norm": 0.674054741859436, "learning_rate": 1e-05, "loss": 0.7328, "step": 2380 }, { "epoch": 0.2937420966597786, "grad_norm": 0.6404186487197876, "learning_rate": 1e-05, "loss": 0.6247, "step": 2381 }, { "epoch": 0.2938654658729914, "grad_norm": 0.5585705041885376, "learning_rate": 1e-05, "loss": 0.5829, "step": 2382 }, { "epoch": 0.2939888350862042, "grad_norm": 0.608405590057373, "learning_rate": 1e-05, "loss": 0.6482, "step": 2383 }, { "epoch": 0.2941122042994171, "grad_norm": 0.5888540148735046, "learning_rate": 1e-05, "loss": 0.627, "step": 2384 }, { "epoch": 0.2942355735126299, "grad_norm": 0.5527772903442383, "learning_rate": 1e-05, "loss": 0.5636, "step": 2385 }, { "epoch": 0.2943589427258428, "grad_norm": 0.5751370191574097, "learning_rate": 1e-05, "loss": 0.547, "step": 2386 }, { "epoch": 0.2944823119390556, "grad_norm": 0.5628889799118042, "learning_rate": 1e-05, "loss": 0.539, "step": 2387 }, { "epoch": 0.29460568115226843, "grad_norm": 0.5382176041603088, "learning_rate": 1e-05, "loss": 0.4843, "step": 2388 }, { "epoch": 0.2947290503654813, "grad_norm": 0.6039326786994934, "learning_rate": 1e-05, "loss": 0.6711, "step": 2389 }, { "epoch": 0.2948524195786941, "grad_norm": 0.6583518981933594, "learning_rate": 1e-05, "loss": 0.7386, "step": 2390 }, { "epoch": 0.294975788791907, "grad_norm": 0.6082655787467957, "learning_rate": 1e-05, "loss": 0.6523, "step": 2391 }, { "epoch": 0.2950991580051198, "grad_norm": 0.529854953289032, "learning_rate": 1e-05, "loss": 0.4869, "step": 2392 }, { "epoch": 0.29522252721833264, "grad_norm": 0.6232805252075195, "learning_rate": 1e-05, "loss": 0.6474, "step": 2393 }, { "epoch": 0.2953458964315455, "grad_norm": 0.5458241701126099, "learning_rate": 1e-05, "loss": 0.5532, "step": 2394 }, { "epoch": 0.29546926564475834, "grad_norm": 0.6342198848724365, "learning_rate": 1e-05, "loss": 0.612, "step": 2395 }, { "epoch": 0.2955926348579712, "grad_norm": 0.48685595393180847, "learning_rate": 1e-05, "loss": 0.5034, "step": 2396 }, { "epoch": 0.29571600407118404, "grad_norm": 0.6199683547019958, "learning_rate": 1e-05, "loss": 0.5637, "step": 2397 }, { "epoch": 0.29583937328439686, "grad_norm": 0.6577099561691284, "learning_rate": 1e-05, "loss": 0.6199, "step": 2398 }, { "epoch": 0.29596274249760973, "grad_norm": 0.5408560037612915, "learning_rate": 1e-05, "loss": 0.4921, "step": 2399 }, { "epoch": 0.29608611171082255, "grad_norm": 0.5897321701049805, "learning_rate": 1e-05, "loss": 0.5841, "step": 2400 }, { "epoch": 0.29620948092403543, "grad_norm": 0.6104374527931213, "learning_rate": 1e-05, "loss": 0.7025, "step": 2401 }, { "epoch": 0.29633285013724825, "grad_norm": 0.5948423147201538, "learning_rate": 1e-05, "loss": 0.7103, "step": 2402 }, { "epoch": 0.29645621935046107, "grad_norm": 0.5439925193786621, "learning_rate": 1e-05, "loss": 0.5217, "step": 2403 }, { "epoch": 0.29657958856367395, "grad_norm": 0.5917166471481323, "learning_rate": 1e-05, "loss": 0.6049, "step": 2404 }, { "epoch": 0.29670295777688677, "grad_norm": 0.545092761516571, "learning_rate": 1e-05, "loss": 0.5904, "step": 2405 }, { "epoch": 0.29682632699009964, "grad_norm": 0.5646793842315674, "learning_rate": 1e-05, "loss": 0.5232, "step": 2406 }, { "epoch": 0.29694969620331246, "grad_norm": 0.6263461112976074, "learning_rate": 1e-05, "loss": 0.6221, "step": 2407 }, { "epoch": 0.2970730654165253, "grad_norm": 0.60877925157547, "learning_rate": 1e-05, "loss": 0.625, "step": 2408 }, { "epoch": 0.29719643462973816, "grad_norm": 0.5531255006790161, "learning_rate": 1e-05, "loss": 0.5212, "step": 2409 }, { "epoch": 0.297319803842951, "grad_norm": 0.5670085549354553, "learning_rate": 1e-05, "loss": 0.6281, "step": 2410 }, { "epoch": 0.29744317305616386, "grad_norm": 0.5074671506881714, "learning_rate": 1e-05, "loss": 0.5397, "step": 2411 }, { "epoch": 0.2975665422693767, "grad_norm": 0.6116983294487, "learning_rate": 1e-05, "loss": 0.5556, "step": 2412 }, { "epoch": 0.2976899114825895, "grad_norm": 0.6105951070785522, "learning_rate": 1e-05, "loss": 0.5372, "step": 2413 }, { "epoch": 0.2978132806958024, "grad_norm": 0.5474944710731506, "learning_rate": 1e-05, "loss": 0.4784, "step": 2414 }, { "epoch": 0.2979366499090152, "grad_norm": 0.5944148898124695, "learning_rate": 1e-05, "loss": 0.5498, "step": 2415 }, { "epoch": 0.29806001912222807, "grad_norm": 0.5477686524391174, "learning_rate": 1e-05, "loss": 0.5136, "step": 2416 }, { "epoch": 0.2981833883354409, "grad_norm": 0.5905075073242188, "learning_rate": 1e-05, "loss": 0.6071, "step": 2417 }, { "epoch": 0.2983067575486537, "grad_norm": 0.5468660593032837, "learning_rate": 1e-05, "loss": 0.6029, "step": 2418 }, { "epoch": 0.2984301267618666, "grad_norm": 0.5942768454551697, "learning_rate": 1e-05, "loss": 0.5813, "step": 2419 }, { "epoch": 0.2985534959750794, "grad_norm": 0.5814658403396606, "learning_rate": 1e-05, "loss": 0.6092, "step": 2420 }, { "epoch": 0.2986768651882923, "grad_norm": 0.6341057419776917, "learning_rate": 1e-05, "loss": 0.6656, "step": 2421 }, { "epoch": 0.2988002344015051, "grad_norm": 0.5882170796394348, "learning_rate": 1e-05, "loss": 0.5506, "step": 2422 }, { "epoch": 0.2989236036147179, "grad_norm": 0.481862872838974, "learning_rate": 1e-05, "loss": 0.4182, "step": 2423 }, { "epoch": 0.2990469728279308, "grad_norm": 0.5694898962974548, "learning_rate": 1e-05, "loss": 0.6403, "step": 2424 }, { "epoch": 0.2991703420411436, "grad_norm": 0.6658369898796082, "learning_rate": 1e-05, "loss": 0.7575, "step": 2425 }, { "epoch": 0.2992937112543565, "grad_norm": 0.5372181534767151, "learning_rate": 1e-05, "loss": 0.4875, "step": 2426 }, { "epoch": 0.2994170804675693, "grad_norm": 0.5551501512527466, "learning_rate": 1e-05, "loss": 0.5704, "step": 2427 }, { "epoch": 0.29954044968078214, "grad_norm": 0.6246120929718018, "learning_rate": 1e-05, "loss": 0.5861, "step": 2428 }, { "epoch": 0.299663818893995, "grad_norm": 0.5948664546012878, "learning_rate": 1e-05, "loss": 0.689, "step": 2429 }, { "epoch": 0.29978718810720784, "grad_norm": 0.571743369102478, "learning_rate": 1e-05, "loss": 0.5416, "step": 2430 }, { "epoch": 0.2999105573204207, "grad_norm": 0.6736022233963013, "learning_rate": 1e-05, "loss": 0.7265, "step": 2431 }, { "epoch": 0.30003392653363353, "grad_norm": 0.5688742399215698, "learning_rate": 1e-05, "loss": 0.5978, "step": 2432 }, { "epoch": 0.30015729574684635, "grad_norm": 0.6241238117218018, "learning_rate": 1e-05, "loss": 0.7358, "step": 2433 }, { "epoch": 0.30028066496005923, "grad_norm": 0.5546680688858032, "learning_rate": 1e-05, "loss": 0.539, "step": 2434 }, { "epoch": 0.30040403417327205, "grad_norm": 0.5490196943283081, "learning_rate": 1e-05, "loss": 0.5108, "step": 2435 }, { "epoch": 0.3005274033864849, "grad_norm": 0.6476840972900391, "learning_rate": 1e-05, "loss": 0.6881, "step": 2436 }, { "epoch": 0.30065077259969775, "grad_norm": 0.5716485381126404, "learning_rate": 1e-05, "loss": 0.6169, "step": 2437 }, { "epoch": 0.30077414181291057, "grad_norm": 0.7057814002037048, "learning_rate": 1e-05, "loss": 0.6592, "step": 2438 }, { "epoch": 0.30089751102612344, "grad_norm": 0.5615618228912354, "learning_rate": 1e-05, "loss": 0.4937, "step": 2439 }, { "epoch": 0.30102088023933626, "grad_norm": 0.6017553806304932, "learning_rate": 1e-05, "loss": 0.5673, "step": 2440 }, { "epoch": 0.30114424945254914, "grad_norm": 0.6481439471244812, "learning_rate": 1e-05, "loss": 0.6038, "step": 2441 }, { "epoch": 0.30126761866576196, "grad_norm": 0.5629379153251648, "learning_rate": 1e-05, "loss": 0.5545, "step": 2442 }, { "epoch": 0.3013909878789748, "grad_norm": 0.5941904187202454, "learning_rate": 1e-05, "loss": 0.4981, "step": 2443 }, { "epoch": 0.30151435709218766, "grad_norm": 0.5161367058753967, "learning_rate": 1e-05, "loss": 0.5052, "step": 2444 }, { "epoch": 0.3016377263054005, "grad_norm": 0.5937045812606812, "learning_rate": 1e-05, "loss": 0.567, "step": 2445 }, { "epoch": 0.30176109551861335, "grad_norm": 0.5279855132102966, "learning_rate": 1e-05, "loss": 0.5035, "step": 2446 }, { "epoch": 0.3018844647318262, "grad_norm": 0.5682076215744019, "learning_rate": 1e-05, "loss": 0.5501, "step": 2447 }, { "epoch": 0.302007833945039, "grad_norm": 0.6561829447746277, "learning_rate": 1e-05, "loss": 0.6203, "step": 2448 }, { "epoch": 0.30213120315825187, "grad_norm": 0.6151185631752014, "learning_rate": 1e-05, "loss": 0.5034, "step": 2449 }, { "epoch": 0.3022545723714647, "grad_norm": 0.561551570892334, "learning_rate": 1e-05, "loss": 0.538, "step": 2450 }, { "epoch": 0.30237794158467757, "grad_norm": 0.6252589225769043, "learning_rate": 1e-05, "loss": 0.671, "step": 2451 }, { "epoch": 0.3025013107978904, "grad_norm": 0.5557673573493958, "learning_rate": 1e-05, "loss": 0.5321, "step": 2452 }, { "epoch": 0.3026246800111032, "grad_norm": 0.6381874680519104, "learning_rate": 1e-05, "loss": 0.7067, "step": 2453 }, { "epoch": 0.3027480492243161, "grad_norm": 0.5511767268180847, "learning_rate": 1e-05, "loss": 0.5936, "step": 2454 }, { "epoch": 0.3028714184375289, "grad_norm": 0.6302254796028137, "learning_rate": 1e-05, "loss": 0.6024, "step": 2455 }, { "epoch": 0.3029947876507418, "grad_norm": 0.5478036999702454, "learning_rate": 1e-05, "loss": 0.5356, "step": 2456 }, { "epoch": 0.3031181568639546, "grad_norm": 0.5277837514877319, "learning_rate": 1e-05, "loss": 0.5141, "step": 2457 }, { "epoch": 0.3032415260771674, "grad_norm": 0.548097550868988, "learning_rate": 1e-05, "loss": 0.5312, "step": 2458 }, { "epoch": 0.3033648952903803, "grad_norm": 0.5956099033355713, "learning_rate": 1e-05, "loss": 0.517, "step": 2459 }, { "epoch": 0.3034882645035931, "grad_norm": 0.6073034405708313, "learning_rate": 1e-05, "loss": 0.6173, "step": 2460 }, { "epoch": 0.303611633716806, "grad_norm": 0.6130875945091248, "learning_rate": 1e-05, "loss": 0.5272, "step": 2461 }, { "epoch": 0.3037350029300188, "grad_norm": 0.4797961413860321, "learning_rate": 1e-05, "loss": 0.4496, "step": 2462 }, { "epoch": 0.30385837214323164, "grad_norm": 0.5575603246688843, "learning_rate": 1e-05, "loss": 0.5341, "step": 2463 }, { "epoch": 0.3039817413564445, "grad_norm": 0.600479006767273, "learning_rate": 1e-05, "loss": 0.6315, "step": 2464 }, { "epoch": 0.30410511056965733, "grad_norm": 0.5672824382781982, "learning_rate": 1e-05, "loss": 0.6196, "step": 2465 }, { "epoch": 0.3042284797828702, "grad_norm": 0.5675901770591736, "learning_rate": 1e-05, "loss": 0.5254, "step": 2466 }, { "epoch": 0.30435184899608303, "grad_norm": 0.577071487903595, "learning_rate": 1e-05, "loss": 0.5257, "step": 2467 }, { "epoch": 0.30447521820929585, "grad_norm": 0.6256905794143677, "learning_rate": 1e-05, "loss": 0.5601, "step": 2468 }, { "epoch": 0.3045985874225087, "grad_norm": 0.5800197720527649, "learning_rate": 1e-05, "loss": 0.5964, "step": 2469 }, { "epoch": 0.30472195663572155, "grad_norm": 0.49829408526420593, "learning_rate": 1e-05, "loss": 0.4432, "step": 2470 }, { "epoch": 0.3048453258489344, "grad_norm": 0.5455201268196106, "learning_rate": 1e-05, "loss": 0.5884, "step": 2471 }, { "epoch": 0.30496869506214724, "grad_norm": 0.6222801208496094, "learning_rate": 1e-05, "loss": 0.4852, "step": 2472 }, { "epoch": 0.30509206427536006, "grad_norm": 0.6033494472503662, "learning_rate": 1e-05, "loss": 0.6496, "step": 2473 }, { "epoch": 0.30521543348857294, "grad_norm": 0.6438754200935364, "learning_rate": 1e-05, "loss": 0.6811, "step": 2474 }, { "epoch": 0.30533880270178576, "grad_norm": 0.6520148515701294, "learning_rate": 1e-05, "loss": 0.6166, "step": 2475 }, { "epoch": 0.30546217191499864, "grad_norm": 0.5768372416496277, "learning_rate": 1e-05, "loss": 0.6091, "step": 2476 }, { "epoch": 0.30558554112821146, "grad_norm": 0.522641658782959, "learning_rate": 1e-05, "loss": 0.4759, "step": 2477 }, { "epoch": 0.3057089103414243, "grad_norm": 0.5710687637329102, "learning_rate": 1e-05, "loss": 0.5581, "step": 2478 }, { "epoch": 0.30583227955463715, "grad_norm": 0.6735024452209473, "learning_rate": 1e-05, "loss": 0.7659, "step": 2479 }, { "epoch": 0.30595564876785, "grad_norm": 0.6197830438613892, "learning_rate": 1e-05, "loss": 0.6211, "step": 2480 }, { "epoch": 0.30607901798106285, "grad_norm": 0.5271833539009094, "learning_rate": 1e-05, "loss": 0.5142, "step": 2481 }, { "epoch": 0.30620238719427567, "grad_norm": 0.5711662769317627, "learning_rate": 1e-05, "loss": 0.6195, "step": 2482 }, { "epoch": 0.3063257564074885, "grad_norm": 0.623096227645874, "learning_rate": 1e-05, "loss": 0.5841, "step": 2483 }, { "epoch": 0.30644912562070137, "grad_norm": 0.5338861346244812, "learning_rate": 1e-05, "loss": 0.5511, "step": 2484 }, { "epoch": 0.3065724948339142, "grad_norm": 0.5732784271240234, "learning_rate": 1e-05, "loss": 0.598, "step": 2485 }, { "epoch": 0.30669586404712706, "grad_norm": 0.6532509922981262, "learning_rate": 1e-05, "loss": 0.665, "step": 2486 }, { "epoch": 0.3068192332603399, "grad_norm": 0.5634185671806335, "learning_rate": 1e-05, "loss": 0.5853, "step": 2487 }, { "epoch": 0.3069426024735527, "grad_norm": 0.5396997928619385, "learning_rate": 1e-05, "loss": 0.4646, "step": 2488 }, { "epoch": 0.3070659716867656, "grad_norm": 0.6592796444892883, "learning_rate": 1e-05, "loss": 0.6612, "step": 2489 }, { "epoch": 0.3071893408999784, "grad_norm": 0.5680592060089111, "learning_rate": 1e-05, "loss": 0.5997, "step": 2490 }, { "epoch": 0.3073127101131913, "grad_norm": 0.5428869128227234, "learning_rate": 1e-05, "loss": 0.5645, "step": 2491 }, { "epoch": 0.3074360793264041, "grad_norm": 0.580058753490448, "learning_rate": 1e-05, "loss": 0.5632, "step": 2492 }, { "epoch": 0.3075594485396169, "grad_norm": 0.585939884185791, "learning_rate": 1e-05, "loss": 0.5659, "step": 2493 }, { "epoch": 0.3076828177528298, "grad_norm": 0.5957947373390198, "learning_rate": 1e-05, "loss": 0.5769, "step": 2494 }, { "epoch": 0.3078061869660426, "grad_norm": 0.6395699977874756, "learning_rate": 1e-05, "loss": 0.6633, "step": 2495 }, { "epoch": 0.3079295561792555, "grad_norm": 0.4676031768321991, "learning_rate": 1e-05, "loss": 0.4071, "step": 2496 }, { "epoch": 0.3080529253924683, "grad_norm": 0.5595964193344116, "learning_rate": 1e-05, "loss": 0.5659, "step": 2497 }, { "epoch": 0.30817629460568113, "grad_norm": 0.5541489124298096, "learning_rate": 1e-05, "loss": 0.5412, "step": 2498 }, { "epoch": 0.308299663818894, "grad_norm": 0.5652081966400146, "learning_rate": 1e-05, "loss": 0.6166, "step": 2499 }, { "epoch": 0.30842303303210683, "grad_norm": 0.5605484843254089, "learning_rate": 1e-05, "loss": 0.616, "step": 2500 }, { "epoch": 0.3085464022453197, "grad_norm": 0.5562141537666321, "learning_rate": 1e-05, "loss": 0.5949, "step": 2501 }, { "epoch": 0.3086697714585325, "grad_norm": 0.6314830780029297, "learning_rate": 1e-05, "loss": 0.7041, "step": 2502 }, { "epoch": 0.30879314067174535, "grad_norm": 0.5999230146408081, "learning_rate": 1e-05, "loss": 0.6249, "step": 2503 }, { "epoch": 0.3089165098849582, "grad_norm": 0.5595463514328003, "learning_rate": 1e-05, "loss": 0.5262, "step": 2504 }, { "epoch": 0.30903987909817104, "grad_norm": 0.6804388165473938, "learning_rate": 1e-05, "loss": 0.6871, "step": 2505 }, { "epoch": 0.3091632483113839, "grad_norm": 0.5998055338859558, "learning_rate": 1e-05, "loss": 0.6699, "step": 2506 }, { "epoch": 0.30928661752459674, "grad_norm": 0.715404748916626, "learning_rate": 1e-05, "loss": 0.6483, "step": 2507 }, { "epoch": 0.30940998673780956, "grad_norm": 0.6843740940093994, "learning_rate": 1e-05, "loss": 0.7593, "step": 2508 }, { "epoch": 0.30953335595102244, "grad_norm": 0.5761123895645142, "learning_rate": 1e-05, "loss": 0.5795, "step": 2509 }, { "epoch": 0.30965672516423526, "grad_norm": 0.5998950004577637, "learning_rate": 1e-05, "loss": 0.4752, "step": 2510 }, { "epoch": 0.30978009437744813, "grad_norm": 0.5527323484420776, "learning_rate": 1e-05, "loss": 0.5248, "step": 2511 }, { "epoch": 0.30990346359066095, "grad_norm": 0.5437840819358826, "learning_rate": 1e-05, "loss": 0.5592, "step": 2512 }, { "epoch": 0.3100268328038738, "grad_norm": 0.5574712753295898, "learning_rate": 1e-05, "loss": 0.6157, "step": 2513 }, { "epoch": 0.31015020201708665, "grad_norm": 0.5318427085876465, "learning_rate": 1e-05, "loss": 0.5616, "step": 2514 }, { "epoch": 0.31027357123029947, "grad_norm": 0.57403165102005, "learning_rate": 1e-05, "loss": 0.5069, "step": 2515 }, { "epoch": 0.31039694044351235, "grad_norm": 0.5195658802986145, "learning_rate": 1e-05, "loss": 0.4879, "step": 2516 }, { "epoch": 0.31052030965672517, "grad_norm": 0.5283798575401306, "learning_rate": 1e-05, "loss": 0.5093, "step": 2517 }, { "epoch": 0.310643678869938, "grad_norm": 0.6424228549003601, "learning_rate": 1e-05, "loss": 0.6192, "step": 2518 }, { "epoch": 0.31076704808315087, "grad_norm": 0.6159998178482056, "learning_rate": 1e-05, "loss": 0.6224, "step": 2519 }, { "epoch": 0.3108904172963637, "grad_norm": 0.6046650409698486, "learning_rate": 1e-05, "loss": 0.5476, "step": 2520 }, { "epoch": 0.31101378650957656, "grad_norm": 0.5854642391204834, "learning_rate": 1e-05, "loss": 0.5878, "step": 2521 }, { "epoch": 0.3111371557227894, "grad_norm": 0.5661904811859131, "learning_rate": 1e-05, "loss": 0.5737, "step": 2522 }, { "epoch": 0.3112605249360022, "grad_norm": 0.5975456237792969, "learning_rate": 1e-05, "loss": 0.5772, "step": 2523 }, { "epoch": 0.3113838941492151, "grad_norm": 0.6651083827018738, "learning_rate": 1e-05, "loss": 0.6569, "step": 2524 }, { "epoch": 0.3115072633624279, "grad_norm": 0.6226404309272766, "learning_rate": 1e-05, "loss": 0.5873, "step": 2525 }, { "epoch": 0.3116306325756408, "grad_norm": 0.5583333969116211, "learning_rate": 1e-05, "loss": 0.6317, "step": 2526 }, { "epoch": 0.3117540017888536, "grad_norm": 0.5163148641586304, "learning_rate": 1e-05, "loss": 0.4406, "step": 2527 }, { "epoch": 0.3118773710020664, "grad_norm": 0.5302978754043579, "learning_rate": 1e-05, "loss": 0.5476, "step": 2528 }, { "epoch": 0.3120007402152793, "grad_norm": 0.5641602277755737, "learning_rate": 1e-05, "loss": 0.5841, "step": 2529 }, { "epoch": 0.3121241094284921, "grad_norm": 0.5504106879234314, "learning_rate": 1e-05, "loss": 0.5565, "step": 2530 }, { "epoch": 0.312247478641705, "grad_norm": 0.572467029094696, "learning_rate": 1e-05, "loss": 0.5571, "step": 2531 }, { "epoch": 0.3123708478549178, "grad_norm": 0.570240318775177, "learning_rate": 1e-05, "loss": 0.5369, "step": 2532 }, { "epoch": 0.31249421706813063, "grad_norm": 0.6682143211364746, "learning_rate": 1e-05, "loss": 0.701, "step": 2533 }, { "epoch": 0.3126175862813435, "grad_norm": 0.5877562165260315, "learning_rate": 1e-05, "loss": 0.5373, "step": 2534 }, { "epoch": 0.3127409554945563, "grad_norm": 0.5930585861206055, "learning_rate": 1e-05, "loss": 0.6005, "step": 2535 }, { "epoch": 0.3128643247077692, "grad_norm": 0.6569700837135315, "learning_rate": 1e-05, "loss": 0.588, "step": 2536 }, { "epoch": 0.312987693920982, "grad_norm": 0.5960720181465149, "learning_rate": 1e-05, "loss": 0.5509, "step": 2537 }, { "epoch": 0.31311106313419484, "grad_norm": 0.6197784543037415, "learning_rate": 1e-05, "loss": 0.6417, "step": 2538 }, { "epoch": 0.3132344323474077, "grad_norm": 0.5403630137443542, "learning_rate": 1e-05, "loss": 0.5228, "step": 2539 }, { "epoch": 0.31335780156062054, "grad_norm": 0.6618843078613281, "learning_rate": 1e-05, "loss": 0.658, "step": 2540 }, { "epoch": 0.3134811707738334, "grad_norm": 0.6822791695594788, "learning_rate": 1e-05, "loss": 0.7402, "step": 2541 }, { "epoch": 0.31360453998704624, "grad_norm": 0.5488811731338501, "learning_rate": 1e-05, "loss": 0.5364, "step": 2542 }, { "epoch": 0.31372790920025906, "grad_norm": 0.6947833895683289, "learning_rate": 1e-05, "loss": 0.7531, "step": 2543 }, { "epoch": 0.31385127841347193, "grad_norm": 0.5532807111740112, "learning_rate": 1e-05, "loss": 0.5517, "step": 2544 }, { "epoch": 0.31397464762668476, "grad_norm": 0.5255846977233887, "learning_rate": 1e-05, "loss": 0.5014, "step": 2545 }, { "epoch": 0.3140980168398976, "grad_norm": 0.5443882942199707, "learning_rate": 1e-05, "loss": 0.5131, "step": 2546 }, { "epoch": 0.31422138605311045, "grad_norm": 0.5686295628547668, "learning_rate": 1e-05, "loss": 0.6159, "step": 2547 }, { "epoch": 0.3143447552663233, "grad_norm": 0.5579115748405457, "learning_rate": 1e-05, "loss": 0.4793, "step": 2548 }, { "epoch": 0.31446812447953615, "grad_norm": 0.6152960658073425, "learning_rate": 1e-05, "loss": 0.5443, "step": 2549 }, { "epoch": 0.31459149369274897, "grad_norm": 0.5677899718284607, "learning_rate": 1e-05, "loss": 0.5568, "step": 2550 }, { "epoch": 0.3147148629059618, "grad_norm": 0.5480667352676392, "learning_rate": 1e-05, "loss": 0.5506, "step": 2551 }, { "epoch": 0.31483823211917467, "grad_norm": 0.5807320475578308, "learning_rate": 1e-05, "loss": 0.6961, "step": 2552 }, { "epoch": 0.3149616013323875, "grad_norm": 0.49485936760902405, "learning_rate": 1e-05, "loss": 0.4949, "step": 2553 }, { "epoch": 0.31508497054560036, "grad_norm": 0.6315593719482422, "learning_rate": 1e-05, "loss": 0.5815, "step": 2554 }, { "epoch": 0.3152083397588132, "grad_norm": 0.6425935626029968, "learning_rate": 1e-05, "loss": 0.7158, "step": 2555 }, { "epoch": 0.315331708972026, "grad_norm": 0.6148173213005066, "learning_rate": 1e-05, "loss": 0.5896, "step": 2556 }, { "epoch": 0.3154550781852389, "grad_norm": 0.5599427223205566, "learning_rate": 1e-05, "loss": 0.5013, "step": 2557 }, { "epoch": 0.3155784473984517, "grad_norm": 0.5550650954246521, "learning_rate": 1e-05, "loss": 0.5523, "step": 2558 }, { "epoch": 0.3157018166116646, "grad_norm": 0.615492045879364, "learning_rate": 1e-05, "loss": 0.6282, "step": 2559 }, { "epoch": 0.3158251858248774, "grad_norm": 0.5981733202934265, "learning_rate": 1e-05, "loss": 0.6508, "step": 2560 }, { "epoch": 0.3159485550380902, "grad_norm": 0.5459149479866028, "learning_rate": 1e-05, "loss": 0.5059, "step": 2561 }, { "epoch": 0.3160719242513031, "grad_norm": 0.5573390126228333, "learning_rate": 1e-05, "loss": 0.5153, "step": 2562 }, { "epoch": 0.3161952934645159, "grad_norm": 0.6024904251098633, "learning_rate": 1e-05, "loss": 0.631, "step": 2563 }, { "epoch": 0.3163186626777288, "grad_norm": 0.602179765701294, "learning_rate": 1e-05, "loss": 0.6335, "step": 2564 }, { "epoch": 0.3164420318909416, "grad_norm": 0.6010375022888184, "learning_rate": 1e-05, "loss": 0.5469, "step": 2565 }, { "epoch": 0.31656540110415443, "grad_norm": 0.5333555340766907, "learning_rate": 1e-05, "loss": 0.5088, "step": 2566 }, { "epoch": 0.3166887703173673, "grad_norm": 0.5435758829116821, "learning_rate": 1e-05, "loss": 0.5185, "step": 2567 }, { "epoch": 0.31681213953058013, "grad_norm": 0.6075606942176819, "learning_rate": 1e-05, "loss": 0.4818, "step": 2568 }, { "epoch": 0.316935508743793, "grad_norm": 0.6031115651130676, "learning_rate": 1e-05, "loss": 0.5441, "step": 2569 }, { "epoch": 0.3170588779570058, "grad_norm": 0.5686927437782288, "learning_rate": 1e-05, "loss": 0.5873, "step": 2570 }, { "epoch": 0.31718224717021865, "grad_norm": 0.6098840236663818, "learning_rate": 1e-05, "loss": 0.5989, "step": 2571 }, { "epoch": 0.3173056163834315, "grad_norm": 0.6333073377609253, "learning_rate": 1e-05, "loss": 0.6432, "step": 2572 }, { "epoch": 0.31742898559664434, "grad_norm": 0.7056474089622498, "learning_rate": 1e-05, "loss": 0.6168, "step": 2573 }, { "epoch": 0.3175523548098572, "grad_norm": 0.6044830083847046, "learning_rate": 1e-05, "loss": 0.6382, "step": 2574 }, { "epoch": 0.31767572402307004, "grad_norm": 0.5560965538024902, "learning_rate": 1e-05, "loss": 0.5452, "step": 2575 }, { "epoch": 0.31779909323628286, "grad_norm": 0.6210083365440369, "learning_rate": 1e-05, "loss": 0.5778, "step": 2576 }, { "epoch": 0.31792246244949574, "grad_norm": 0.5676433444023132, "learning_rate": 1e-05, "loss": 0.5507, "step": 2577 }, { "epoch": 0.31804583166270856, "grad_norm": 0.5196451544761658, "learning_rate": 1e-05, "loss": 0.5757, "step": 2578 }, { "epoch": 0.31816920087592143, "grad_norm": 0.5893961787223816, "learning_rate": 1e-05, "loss": 0.5722, "step": 2579 }, { "epoch": 0.31829257008913425, "grad_norm": 0.545592188835144, "learning_rate": 1e-05, "loss": 0.5223, "step": 2580 }, { "epoch": 0.3184159393023471, "grad_norm": 0.698428750038147, "learning_rate": 1e-05, "loss": 0.7184, "step": 2581 }, { "epoch": 0.31853930851555995, "grad_norm": 0.5260190367698669, "learning_rate": 1e-05, "loss": 0.5273, "step": 2582 }, { "epoch": 0.31866267772877277, "grad_norm": 0.6372617483139038, "learning_rate": 1e-05, "loss": 0.6726, "step": 2583 }, { "epoch": 0.31878604694198565, "grad_norm": 0.575483500957489, "learning_rate": 1e-05, "loss": 0.6026, "step": 2584 }, { "epoch": 0.31890941615519847, "grad_norm": 0.6105288863182068, "learning_rate": 1e-05, "loss": 0.5306, "step": 2585 }, { "epoch": 0.3190327853684113, "grad_norm": 0.6009963750839233, "learning_rate": 1e-05, "loss": 0.6501, "step": 2586 }, { "epoch": 0.31915615458162416, "grad_norm": 0.6261745691299438, "learning_rate": 1e-05, "loss": 0.6588, "step": 2587 }, { "epoch": 0.319279523794837, "grad_norm": 0.5651233792304993, "learning_rate": 1e-05, "loss": 0.5767, "step": 2588 }, { "epoch": 0.31940289300804986, "grad_norm": 0.5591412782669067, "learning_rate": 1e-05, "loss": 0.598, "step": 2589 }, { "epoch": 0.3195262622212627, "grad_norm": 0.617236316204071, "learning_rate": 1e-05, "loss": 0.6327, "step": 2590 }, { "epoch": 0.3196496314344755, "grad_norm": 0.5945587754249573, "learning_rate": 1e-05, "loss": 0.5675, "step": 2591 }, { "epoch": 0.3197730006476884, "grad_norm": 0.5176888704299927, "learning_rate": 1e-05, "loss": 0.4907, "step": 2592 }, { "epoch": 0.3198963698609012, "grad_norm": 0.6132410168647766, "learning_rate": 1e-05, "loss": 0.6871, "step": 2593 }, { "epoch": 0.3200197390741141, "grad_norm": 0.5820103883743286, "learning_rate": 1e-05, "loss": 0.6654, "step": 2594 }, { "epoch": 0.3201431082873269, "grad_norm": 0.6131154894828796, "learning_rate": 1e-05, "loss": 0.6268, "step": 2595 }, { "epoch": 0.3202664775005397, "grad_norm": 0.6162562966346741, "learning_rate": 1e-05, "loss": 0.7111, "step": 2596 }, { "epoch": 0.3203898467137526, "grad_norm": 0.5588403344154358, "learning_rate": 1e-05, "loss": 0.5293, "step": 2597 }, { "epoch": 0.3205132159269654, "grad_norm": 0.6349279284477234, "learning_rate": 1e-05, "loss": 0.5663, "step": 2598 }, { "epoch": 0.3206365851401783, "grad_norm": 0.5977320075035095, "learning_rate": 1e-05, "loss": 0.5272, "step": 2599 }, { "epoch": 0.3207599543533911, "grad_norm": 0.6882795095443726, "learning_rate": 1e-05, "loss": 0.6639, "step": 2600 }, { "epoch": 0.32088332356660393, "grad_norm": 0.5831867456436157, "learning_rate": 1e-05, "loss": 0.5887, "step": 2601 }, { "epoch": 0.3210066927798168, "grad_norm": 0.6203039288520813, "learning_rate": 1e-05, "loss": 0.5455, "step": 2602 }, { "epoch": 0.3211300619930296, "grad_norm": 0.6333592534065247, "learning_rate": 1e-05, "loss": 0.6616, "step": 2603 }, { "epoch": 0.3212534312062425, "grad_norm": 0.5258819460868835, "learning_rate": 1e-05, "loss": 0.5044, "step": 2604 }, { "epoch": 0.3213768004194553, "grad_norm": 0.6115358471870422, "learning_rate": 1e-05, "loss": 0.6222, "step": 2605 }, { "epoch": 0.32150016963266814, "grad_norm": 0.5506835579872131, "learning_rate": 1e-05, "loss": 0.5462, "step": 2606 }, { "epoch": 0.321623538845881, "grad_norm": 0.5724254846572876, "learning_rate": 1e-05, "loss": 0.5567, "step": 2607 }, { "epoch": 0.32174690805909384, "grad_norm": 0.6276203989982605, "learning_rate": 1e-05, "loss": 0.5951, "step": 2608 }, { "epoch": 0.3218702772723067, "grad_norm": 0.5878865122795105, "learning_rate": 1e-05, "loss": 0.5983, "step": 2609 }, { "epoch": 0.32199364648551954, "grad_norm": 0.5177088379859924, "learning_rate": 1e-05, "loss": 0.5049, "step": 2610 }, { "epoch": 0.32211701569873236, "grad_norm": 0.5642683506011963, "learning_rate": 1e-05, "loss": 0.5939, "step": 2611 }, { "epoch": 0.32224038491194523, "grad_norm": 0.6023879647254944, "learning_rate": 1e-05, "loss": 0.5713, "step": 2612 }, { "epoch": 0.32236375412515805, "grad_norm": 0.555223286151886, "learning_rate": 1e-05, "loss": 0.5311, "step": 2613 }, { "epoch": 0.32248712333837093, "grad_norm": 0.5732038021087646, "learning_rate": 1e-05, "loss": 0.5747, "step": 2614 }, { "epoch": 0.32261049255158375, "grad_norm": 0.5908642411231995, "learning_rate": 1e-05, "loss": 0.5812, "step": 2615 }, { "epoch": 0.32273386176479657, "grad_norm": 0.5699208378791809, "learning_rate": 1e-05, "loss": 0.5657, "step": 2616 }, { "epoch": 0.32285723097800945, "grad_norm": 0.5378119349479675, "learning_rate": 1e-05, "loss": 0.4847, "step": 2617 }, { "epoch": 0.32298060019122227, "grad_norm": 0.5933324098587036, "learning_rate": 1e-05, "loss": 0.5214, "step": 2618 }, { "epoch": 0.32310396940443514, "grad_norm": 0.5673894882202148, "learning_rate": 1e-05, "loss": 0.5486, "step": 2619 }, { "epoch": 0.32322733861764796, "grad_norm": 0.50710129737854, "learning_rate": 1e-05, "loss": 0.5224, "step": 2620 }, { "epoch": 0.3233507078308608, "grad_norm": 0.6207606196403503, "learning_rate": 1e-05, "loss": 0.6156, "step": 2621 }, { "epoch": 0.32347407704407366, "grad_norm": 0.6594679951667786, "learning_rate": 1e-05, "loss": 0.7235, "step": 2622 }, { "epoch": 0.3235974462572865, "grad_norm": 0.5880134105682373, "learning_rate": 1e-05, "loss": 0.5523, "step": 2623 }, { "epoch": 0.32372081547049936, "grad_norm": 0.548048734664917, "learning_rate": 1e-05, "loss": 0.5035, "step": 2624 }, { "epoch": 0.3238441846837122, "grad_norm": 0.6494524478912354, "learning_rate": 1e-05, "loss": 0.6258, "step": 2625 }, { "epoch": 0.323967553896925, "grad_norm": 0.5863369107246399, "learning_rate": 1e-05, "loss": 0.6018, "step": 2626 }, { "epoch": 0.3240909231101379, "grad_norm": 0.6219700574874878, "learning_rate": 1e-05, "loss": 0.6088, "step": 2627 }, { "epoch": 0.3242142923233507, "grad_norm": 0.5807848572731018, "learning_rate": 1e-05, "loss": 0.6089, "step": 2628 }, { "epoch": 0.32433766153656357, "grad_norm": 0.6085454225540161, "learning_rate": 1e-05, "loss": 0.6937, "step": 2629 }, { "epoch": 0.3244610307497764, "grad_norm": 0.5374130010604858, "learning_rate": 1e-05, "loss": 0.5892, "step": 2630 }, { "epoch": 0.3245843999629892, "grad_norm": 0.5975980162620544, "learning_rate": 1e-05, "loss": 0.6418, "step": 2631 }, { "epoch": 0.3247077691762021, "grad_norm": 0.6252769231796265, "learning_rate": 1e-05, "loss": 0.6351, "step": 2632 }, { "epoch": 0.3248311383894149, "grad_norm": 0.5673202276229858, "learning_rate": 1e-05, "loss": 0.5772, "step": 2633 }, { "epoch": 0.3249545076026278, "grad_norm": 0.5724198818206787, "learning_rate": 1e-05, "loss": 0.5481, "step": 2634 }, { "epoch": 0.3250778768158406, "grad_norm": 0.6132233738899231, "learning_rate": 1e-05, "loss": 0.5633, "step": 2635 }, { "epoch": 0.3252012460290534, "grad_norm": 0.6107031106948853, "learning_rate": 1e-05, "loss": 0.5459, "step": 2636 }, { "epoch": 0.3253246152422663, "grad_norm": 0.6346080899238586, "learning_rate": 1e-05, "loss": 0.5106, "step": 2637 }, { "epoch": 0.3254479844554791, "grad_norm": 0.521771252155304, "learning_rate": 1e-05, "loss": 0.5765, "step": 2638 }, { "epoch": 0.325571353668692, "grad_norm": 0.525071918964386, "learning_rate": 1e-05, "loss": 0.4578, "step": 2639 }, { "epoch": 0.3256947228819048, "grad_norm": 0.5948806405067444, "learning_rate": 1e-05, "loss": 0.5479, "step": 2640 }, { "epoch": 0.32581809209511764, "grad_norm": 0.5974879264831543, "learning_rate": 1e-05, "loss": 0.5746, "step": 2641 }, { "epoch": 0.3259414613083305, "grad_norm": 0.7236047387123108, "learning_rate": 1e-05, "loss": 0.6954, "step": 2642 }, { "epoch": 0.32606483052154334, "grad_norm": 0.554881751537323, "learning_rate": 1e-05, "loss": 0.5702, "step": 2643 }, { "epoch": 0.3261881997347562, "grad_norm": 0.5819019079208374, "learning_rate": 1e-05, "loss": 0.6023, "step": 2644 }, { "epoch": 0.32631156894796903, "grad_norm": 0.5972740650177002, "learning_rate": 1e-05, "loss": 0.5761, "step": 2645 }, { "epoch": 0.32643493816118185, "grad_norm": 0.5783820748329163, "learning_rate": 1e-05, "loss": 0.552, "step": 2646 }, { "epoch": 0.32655830737439473, "grad_norm": 0.4883982241153717, "learning_rate": 1e-05, "loss": 0.4376, "step": 2647 }, { "epoch": 0.32668167658760755, "grad_norm": 0.5003172755241394, "learning_rate": 1e-05, "loss": 0.423, "step": 2648 }, { "epoch": 0.3268050458008204, "grad_norm": 0.6295067667961121, "learning_rate": 1e-05, "loss": 0.6046, "step": 2649 }, { "epoch": 0.32692841501403325, "grad_norm": 0.6384091377258301, "learning_rate": 1e-05, "loss": 0.6617, "step": 2650 }, { "epoch": 0.32705178422724607, "grad_norm": 0.5514048337936401, "learning_rate": 1e-05, "loss": 0.5305, "step": 2651 }, { "epoch": 0.32717515344045894, "grad_norm": 0.557288408279419, "learning_rate": 1e-05, "loss": 0.5673, "step": 2652 }, { "epoch": 0.32729852265367176, "grad_norm": 0.588378369808197, "learning_rate": 1e-05, "loss": 0.5476, "step": 2653 }, { "epoch": 0.32742189186688464, "grad_norm": 0.6186018586158752, "learning_rate": 1e-05, "loss": 0.6287, "step": 2654 }, { "epoch": 0.32754526108009746, "grad_norm": 0.5858659148216248, "learning_rate": 1e-05, "loss": 0.4962, "step": 2655 }, { "epoch": 0.3276686302933103, "grad_norm": 0.5678394436836243, "learning_rate": 1e-05, "loss": 0.5485, "step": 2656 }, { "epoch": 0.32779199950652316, "grad_norm": 0.648635983467102, "learning_rate": 1e-05, "loss": 0.5671, "step": 2657 }, { "epoch": 0.327915368719736, "grad_norm": 0.5497594475746155, "learning_rate": 1e-05, "loss": 0.5239, "step": 2658 }, { "epoch": 0.32803873793294885, "grad_norm": 0.5885316729545593, "learning_rate": 1e-05, "loss": 0.6556, "step": 2659 }, { "epoch": 0.3281621071461617, "grad_norm": 0.6013105511665344, "learning_rate": 1e-05, "loss": 0.5557, "step": 2660 }, { "epoch": 0.3282854763593745, "grad_norm": 0.5572304129600525, "learning_rate": 1e-05, "loss": 0.5846, "step": 2661 }, { "epoch": 0.32840884557258737, "grad_norm": 0.5977896451950073, "learning_rate": 1e-05, "loss": 0.591, "step": 2662 }, { "epoch": 0.3285322147858002, "grad_norm": 0.5853450894355774, "learning_rate": 1e-05, "loss": 0.5313, "step": 2663 }, { "epoch": 0.32865558399901307, "grad_norm": 0.572464644908905, "learning_rate": 1e-05, "loss": 0.5602, "step": 2664 }, { "epoch": 0.3287789532122259, "grad_norm": 0.5374737977981567, "learning_rate": 1e-05, "loss": 0.5367, "step": 2665 }, { "epoch": 0.3289023224254387, "grad_norm": 0.6198915243148804, "learning_rate": 1e-05, "loss": 0.5918, "step": 2666 }, { "epoch": 0.3290256916386516, "grad_norm": 0.5935406684875488, "learning_rate": 1e-05, "loss": 0.6489, "step": 2667 }, { "epoch": 0.3291490608518644, "grad_norm": 0.5637674331665039, "learning_rate": 1e-05, "loss": 0.6226, "step": 2668 }, { "epoch": 0.3292724300650773, "grad_norm": 0.5371713042259216, "learning_rate": 1e-05, "loss": 0.6177, "step": 2669 }, { "epoch": 0.3293957992782901, "grad_norm": 0.5575650334358215, "learning_rate": 1e-05, "loss": 0.4995, "step": 2670 }, { "epoch": 0.3295191684915029, "grad_norm": 0.5120946168899536, "learning_rate": 1e-05, "loss": 0.4812, "step": 2671 }, { "epoch": 0.3296425377047158, "grad_norm": 0.5681183934211731, "learning_rate": 1e-05, "loss": 0.502, "step": 2672 }, { "epoch": 0.3297659069179286, "grad_norm": 0.4747759699821472, "learning_rate": 1e-05, "loss": 0.4299, "step": 2673 }, { "epoch": 0.3298892761311415, "grad_norm": 0.5916443467140198, "learning_rate": 1e-05, "loss": 0.6118, "step": 2674 }, { "epoch": 0.3300126453443543, "grad_norm": 0.5723130702972412, "learning_rate": 1e-05, "loss": 0.6185, "step": 2675 }, { "epoch": 0.33013601455756714, "grad_norm": 0.6678124666213989, "learning_rate": 1e-05, "loss": 0.6665, "step": 2676 }, { "epoch": 0.33025938377078, "grad_norm": 0.6219947338104248, "learning_rate": 1e-05, "loss": 0.6755, "step": 2677 }, { "epoch": 0.33038275298399283, "grad_norm": 0.6291581392288208, "learning_rate": 1e-05, "loss": 0.6276, "step": 2678 }, { "epoch": 0.3305061221972057, "grad_norm": 0.5527792572975159, "learning_rate": 1e-05, "loss": 0.5149, "step": 2679 }, { "epoch": 0.33062949141041853, "grad_norm": 0.6274359226226807, "learning_rate": 1e-05, "loss": 0.6738, "step": 2680 }, { "epoch": 0.33075286062363135, "grad_norm": 0.6126340627670288, "learning_rate": 1e-05, "loss": 0.6538, "step": 2681 }, { "epoch": 0.3308762298368442, "grad_norm": 0.5834219455718994, "learning_rate": 1e-05, "loss": 0.5703, "step": 2682 }, { "epoch": 0.33099959905005705, "grad_norm": 0.6002717614173889, "learning_rate": 1e-05, "loss": 0.5811, "step": 2683 }, { "epoch": 0.3311229682632699, "grad_norm": 0.5154274702072144, "learning_rate": 1e-05, "loss": 0.4992, "step": 2684 }, { "epoch": 0.33124633747648274, "grad_norm": 0.524155855178833, "learning_rate": 1e-05, "loss": 0.5333, "step": 2685 }, { "epoch": 0.33136970668969556, "grad_norm": 0.6480532884597778, "learning_rate": 1e-05, "loss": 0.6914, "step": 2686 }, { "epoch": 0.33149307590290844, "grad_norm": 0.6221327185630798, "learning_rate": 1e-05, "loss": 0.6217, "step": 2687 }, { "epoch": 0.33161644511612126, "grad_norm": 0.6245303750038147, "learning_rate": 1e-05, "loss": 0.6714, "step": 2688 }, { "epoch": 0.33173981432933414, "grad_norm": 0.5876434445381165, "learning_rate": 1e-05, "loss": 0.6797, "step": 2689 }, { "epoch": 0.33186318354254696, "grad_norm": 0.5794705152511597, "learning_rate": 1e-05, "loss": 0.6465, "step": 2690 }, { "epoch": 0.3319865527557598, "grad_norm": 0.5279040932655334, "learning_rate": 1e-05, "loss": 0.4667, "step": 2691 }, { "epoch": 0.33210992196897265, "grad_norm": 0.5328657031059265, "learning_rate": 1e-05, "loss": 0.511, "step": 2692 }, { "epoch": 0.3322332911821855, "grad_norm": 0.5614954829216003, "learning_rate": 1e-05, "loss": 0.5254, "step": 2693 }, { "epoch": 0.33235666039539835, "grad_norm": 0.6415400505065918, "learning_rate": 1e-05, "loss": 0.5871, "step": 2694 }, { "epoch": 0.33248002960861117, "grad_norm": 0.5573257803916931, "learning_rate": 1e-05, "loss": 0.5202, "step": 2695 }, { "epoch": 0.332603398821824, "grad_norm": 0.659960925579071, "learning_rate": 1e-05, "loss": 0.6461, "step": 2696 }, { "epoch": 0.33272676803503687, "grad_norm": 0.5948347449302673, "learning_rate": 1e-05, "loss": 0.544, "step": 2697 }, { "epoch": 0.3328501372482497, "grad_norm": 0.6006671786308289, "learning_rate": 1e-05, "loss": 0.5755, "step": 2698 }, { "epoch": 0.33297350646146257, "grad_norm": 0.6420713067054749, "learning_rate": 1e-05, "loss": 0.6253, "step": 2699 }, { "epoch": 0.3330968756746754, "grad_norm": 0.5826205015182495, "learning_rate": 1e-05, "loss": 0.6157, "step": 2700 }, { "epoch": 0.3332202448878882, "grad_norm": 0.5027444958686829, "learning_rate": 1e-05, "loss": 0.5563, "step": 2701 }, { "epoch": 0.3333436141011011, "grad_norm": 0.6444686651229858, "learning_rate": 1e-05, "loss": 0.6875, "step": 2702 }, { "epoch": 0.3334669833143139, "grad_norm": 0.5459985733032227, "learning_rate": 1e-05, "loss": 0.5624, "step": 2703 }, { "epoch": 0.3335903525275268, "grad_norm": 0.6646938920021057, "learning_rate": 1e-05, "loss": 0.6564, "step": 2704 }, { "epoch": 0.3337137217407396, "grad_norm": 0.5917642712593079, "learning_rate": 1e-05, "loss": 0.5934, "step": 2705 }, { "epoch": 0.3338370909539524, "grad_norm": 0.6209236979484558, "learning_rate": 1e-05, "loss": 0.6995, "step": 2706 }, { "epoch": 0.3339604601671653, "grad_norm": 0.6316426992416382, "learning_rate": 1e-05, "loss": 0.6628, "step": 2707 }, { "epoch": 0.3340838293803781, "grad_norm": 0.5609678626060486, "learning_rate": 1e-05, "loss": 0.5472, "step": 2708 }, { "epoch": 0.334207198593591, "grad_norm": 0.5436873435974121, "learning_rate": 1e-05, "loss": 0.5874, "step": 2709 }, { "epoch": 0.3343305678068038, "grad_norm": 0.57396399974823, "learning_rate": 1e-05, "loss": 0.6003, "step": 2710 }, { "epoch": 0.33445393702001663, "grad_norm": 0.5303236246109009, "learning_rate": 1e-05, "loss": 0.5102, "step": 2711 }, { "epoch": 0.3345773062332295, "grad_norm": 0.5681478381156921, "learning_rate": 1e-05, "loss": 0.5586, "step": 2712 }, { "epoch": 0.33470067544644233, "grad_norm": 0.5796952247619629, "learning_rate": 1e-05, "loss": 0.4629, "step": 2713 }, { "epoch": 0.3348240446596552, "grad_norm": 0.6314813494682312, "learning_rate": 1e-05, "loss": 0.6867, "step": 2714 }, { "epoch": 0.334947413872868, "grad_norm": 0.6047354936599731, "learning_rate": 1e-05, "loss": 0.6006, "step": 2715 }, { "epoch": 0.33507078308608085, "grad_norm": 0.6419082880020142, "learning_rate": 1e-05, "loss": 0.7506, "step": 2716 }, { "epoch": 0.3351941522992937, "grad_norm": 0.6188533306121826, "learning_rate": 1e-05, "loss": 0.6117, "step": 2717 }, { "epoch": 0.33531752151250654, "grad_norm": 0.5842743515968323, "learning_rate": 1e-05, "loss": 0.6546, "step": 2718 }, { "epoch": 0.3354408907257194, "grad_norm": 0.47759583592414856, "learning_rate": 1e-05, "loss": 0.4133, "step": 2719 }, { "epoch": 0.33556425993893224, "grad_norm": 0.597667932510376, "learning_rate": 1e-05, "loss": 0.5495, "step": 2720 }, { "epoch": 0.33568762915214506, "grad_norm": 0.5798318982124329, "learning_rate": 1e-05, "loss": 0.5536, "step": 2721 }, { "epoch": 0.33581099836535794, "grad_norm": 0.591609537601471, "learning_rate": 1e-05, "loss": 0.6097, "step": 2722 }, { "epoch": 0.33593436757857076, "grad_norm": 0.591373860836029, "learning_rate": 1e-05, "loss": 0.655, "step": 2723 }, { "epoch": 0.33605773679178363, "grad_norm": 0.6504549980163574, "learning_rate": 1e-05, "loss": 0.7629, "step": 2724 }, { "epoch": 0.33618110600499646, "grad_norm": 0.5643844604492188, "learning_rate": 1e-05, "loss": 0.5714, "step": 2725 }, { "epoch": 0.3363044752182093, "grad_norm": 0.6208524703979492, "learning_rate": 1e-05, "loss": 0.6389, "step": 2726 }, { "epoch": 0.33642784443142215, "grad_norm": 0.5620993971824646, "learning_rate": 1e-05, "loss": 0.5154, "step": 2727 }, { "epoch": 0.33655121364463497, "grad_norm": 0.5707035064697266, "learning_rate": 1e-05, "loss": 0.6361, "step": 2728 }, { "epoch": 0.33667458285784785, "grad_norm": 0.5644209980964661, "learning_rate": 1e-05, "loss": 0.5348, "step": 2729 }, { "epoch": 0.33679795207106067, "grad_norm": 0.6227869391441345, "learning_rate": 1e-05, "loss": 0.6587, "step": 2730 }, { "epoch": 0.3369213212842735, "grad_norm": 0.5297881960868835, "learning_rate": 1e-05, "loss": 0.4246, "step": 2731 }, { "epoch": 0.33704469049748637, "grad_norm": 0.5221588015556335, "learning_rate": 1e-05, "loss": 0.5032, "step": 2732 }, { "epoch": 0.3371680597106992, "grad_norm": 0.6014009118080139, "learning_rate": 1e-05, "loss": 0.5331, "step": 2733 }, { "epoch": 0.33729142892391206, "grad_norm": 0.5629745721817017, "learning_rate": 1e-05, "loss": 0.6221, "step": 2734 }, { "epoch": 0.3374147981371249, "grad_norm": 0.5556718111038208, "learning_rate": 1e-05, "loss": 0.593, "step": 2735 }, { "epoch": 0.3375381673503377, "grad_norm": 0.6704650521278381, "learning_rate": 1e-05, "loss": 0.6252, "step": 2736 }, { "epoch": 0.3376615365635506, "grad_norm": 0.5890873670578003, "learning_rate": 1e-05, "loss": 0.5868, "step": 2737 }, { "epoch": 0.3377849057767634, "grad_norm": 0.653628408908844, "learning_rate": 1e-05, "loss": 0.6285, "step": 2738 }, { "epoch": 0.3379082749899763, "grad_norm": 0.6597785353660583, "learning_rate": 1e-05, "loss": 0.6327, "step": 2739 }, { "epoch": 0.3380316442031891, "grad_norm": 0.6025200486183167, "learning_rate": 1e-05, "loss": 0.5753, "step": 2740 }, { "epoch": 0.3381550134164019, "grad_norm": 0.6038762331008911, "learning_rate": 1e-05, "loss": 0.6584, "step": 2741 }, { "epoch": 0.3382783826296148, "grad_norm": 0.6128084659576416, "learning_rate": 1e-05, "loss": 0.5977, "step": 2742 }, { "epoch": 0.3384017518428276, "grad_norm": 0.6107230186462402, "learning_rate": 1e-05, "loss": 0.5824, "step": 2743 }, { "epoch": 0.3385251210560405, "grad_norm": 0.5894199013710022, "learning_rate": 1e-05, "loss": 0.5618, "step": 2744 }, { "epoch": 0.3386484902692533, "grad_norm": 0.5905420780181885, "learning_rate": 1e-05, "loss": 0.6318, "step": 2745 }, { "epoch": 0.33877185948246613, "grad_norm": 0.5079423189163208, "learning_rate": 1e-05, "loss": 0.5234, "step": 2746 }, { "epoch": 0.338895228695679, "grad_norm": 0.5921322107315063, "learning_rate": 1e-05, "loss": 0.6081, "step": 2747 }, { "epoch": 0.33901859790889183, "grad_norm": 0.6528266668319702, "learning_rate": 1e-05, "loss": 0.5999, "step": 2748 }, { "epoch": 0.3391419671221047, "grad_norm": 0.6005618572235107, "learning_rate": 1e-05, "loss": 0.6177, "step": 2749 }, { "epoch": 0.3392653363353175, "grad_norm": 0.5967350006103516, "learning_rate": 1e-05, "loss": 0.5912, "step": 2750 }, { "epoch": 0.33938870554853035, "grad_norm": 0.5815388560295105, "learning_rate": 1e-05, "loss": 0.5746, "step": 2751 }, { "epoch": 0.3395120747617432, "grad_norm": 0.5692428350448608, "learning_rate": 1e-05, "loss": 0.5266, "step": 2752 }, { "epoch": 0.33963544397495604, "grad_norm": 0.5321149230003357, "learning_rate": 1e-05, "loss": 0.5232, "step": 2753 }, { "epoch": 0.3397588131881689, "grad_norm": 0.6008802652359009, "learning_rate": 1e-05, "loss": 0.5978, "step": 2754 }, { "epoch": 0.33988218240138174, "grad_norm": 0.5548668503761292, "learning_rate": 1e-05, "loss": 0.4752, "step": 2755 }, { "epoch": 0.34000555161459456, "grad_norm": 0.5881125330924988, "learning_rate": 1e-05, "loss": 0.56, "step": 2756 }, { "epoch": 0.34012892082780743, "grad_norm": 0.5637097358703613, "learning_rate": 1e-05, "loss": 0.6095, "step": 2757 }, { "epoch": 0.34025229004102026, "grad_norm": 0.5187801122665405, "learning_rate": 1e-05, "loss": 0.4374, "step": 2758 }, { "epoch": 0.34037565925423313, "grad_norm": 0.5649757981300354, "learning_rate": 1e-05, "loss": 0.5992, "step": 2759 }, { "epoch": 0.34049902846744595, "grad_norm": 0.5336349606513977, "learning_rate": 1e-05, "loss": 0.5201, "step": 2760 }, { "epoch": 0.3406223976806588, "grad_norm": 0.6362891793251038, "learning_rate": 1e-05, "loss": 0.6295, "step": 2761 }, { "epoch": 0.34074576689387165, "grad_norm": 0.5510485768318176, "learning_rate": 1e-05, "loss": 0.5401, "step": 2762 }, { "epoch": 0.34086913610708447, "grad_norm": 0.5581241250038147, "learning_rate": 1e-05, "loss": 0.5185, "step": 2763 }, { "epoch": 0.34099250532029735, "grad_norm": 0.5314164757728577, "learning_rate": 1e-05, "loss": 0.5373, "step": 2764 }, { "epoch": 0.34111587453351017, "grad_norm": 0.5761904716491699, "learning_rate": 1e-05, "loss": 0.5809, "step": 2765 }, { "epoch": 0.341239243746723, "grad_norm": 0.5628737807273865, "learning_rate": 1e-05, "loss": 0.4299, "step": 2766 }, { "epoch": 0.34136261295993586, "grad_norm": 0.5679818987846375, "learning_rate": 1e-05, "loss": 0.6193, "step": 2767 }, { "epoch": 0.3414859821731487, "grad_norm": 0.538576066493988, "learning_rate": 1e-05, "loss": 0.5242, "step": 2768 }, { "epoch": 0.34160935138636156, "grad_norm": 0.5681934952735901, "learning_rate": 1e-05, "loss": 0.546, "step": 2769 }, { "epoch": 0.3417327205995744, "grad_norm": 0.5744562745094299, "learning_rate": 1e-05, "loss": 0.5062, "step": 2770 }, { "epoch": 0.3418560898127872, "grad_norm": 0.5848538875579834, "learning_rate": 1e-05, "loss": 0.6473, "step": 2771 }, { "epoch": 0.3419794590260001, "grad_norm": 0.5692976713180542, "learning_rate": 1e-05, "loss": 0.6142, "step": 2772 }, { "epoch": 0.3421028282392129, "grad_norm": 0.5755501985549927, "learning_rate": 1e-05, "loss": 0.5169, "step": 2773 }, { "epoch": 0.3422261974524258, "grad_norm": 0.5570811629295349, "learning_rate": 1e-05, "loss": 0.593, "step": 2774 }, { "epoch": 0.3423495666656386, "grad_norm": 0.5697108507156372, "learning_rate": 1e-05, "loss": 0.6095, "step": 2775 }, { "epoch": 0.3424729358788514, "grad_norm": 0.5985772013664246, "learning_rate": 1e-05, "loss": 0.638, "step": 2776 }, { "epoch": 0.3425963050920643, "grad_norm": 0.5832822918891907, "learning_rate": 1e-05, "loss": 0.5532, "step": 2777 }, { "epoch": 0.3427196743052771, "grad_norm": 0.6336337924003601, "learning_rate": 1e-05, "loss": 0.6087, "step": 2778 }, { "epoch": 0.34284304351849, "grad_norm": 0.533855140209198, "learning_rate": 1e-05, "loss": 0.4731, "step": 2779 }, { "epoch": 0.3429664127317028, "grad_norm": 0.7100414633750916, "learning_rate": 1e-05, "loss": 0.6026, "step": 2780 }, { "epoch": 0.34308978194491563, "grad_norm": 0.5455341935157776, "learning_rate": 1e-05, "loss": 0.5013, "step": 2781 }, { "epoch": 0.3432131511581285, "grad_norm": 0.5498772263526917, "learning_rate": 1e-05, "loss": 0.5199, "step": 2782 }, { "epoch": 0.3433365203713413, "grad_norm": 0.5843566656112671, "learning_rate": 1e-05, "loss": 0.6168, "step": 2783 }, { "epoch": 0.3434598895845542, "grad_norm": 0.565807580947876, "learning_rate": 1e-05, "loss": 0.5813, "step": 2784 }, { "epoch": 0.343583258797767, "grad_norm": 0.5546219348907471, "learning_rate": 1e-05, "loss": 0.5832, "step": 2785 }, { "epoch": 0.34370662801097984, "grad_norm": 0.4674046039581299, "learning_rate": 1e-05, "loss": 0.4297, "step": 2786 }, { "epoch": 0.3438299972241927, "grad_norm": 0.5985456705093384, "learning_rate": 1e-05, "loss": 0.4904, "step": 2787 }, { "epoch": 0.34395336643740554, "grad_norm": 0.5416097044944763, "learning_rate": 1e-05, "loss": 0.5856, "step": 2788 }, { "epoch": 0.3440767356506184, "grad_norm": 0.6615899205207825, "learning_rate": 1e-05, "loss": 0.7366, "step": 2789 }, { "epoch": 0.34420010486383124, "grad_norm": 0.564382791519165, "learning_rate": 1e-05, "loss": 0.5427, "step": 2790 }, { "epoch": 0.34432347407704406, "grad_norm": 0.6603736281394958, "learning_rate": 1e-05, "loss": 0.6757, "step": 2791 }, { "epoch": 0.34444684329025693, "grad_norm": 0.6206501722335815, "learning_rate": 1e-05, "loss": 0.6158, "step": 2792 }, { "epoch": 0.34457021250346975, "grad_norm": 0.5440435409545898, "learning_rate": 1e-05, "loss": 0.5424, "step": 2793 }, { "epoch": 0.34469358171668263, "grad_norm": 0.5588982105255127, "learning_rate": 1e-05, "loss": 0.5353, "step": 2794 }, { "epoch": 0.34481695092989545, "grad_norm": 0.5258537530899048, "learning_rate": 1e-05, "loss": 0.5122, "step": 2795 }, { "epoch": 0.34494032014310827, "grad_norm": 0.6283261775970459, "learning_rate": 1e-05, "loss": 0.615, "step": 2796 }, { "epoch": 0.34506368935632115, "grad_norm": 0.6102871298789978, "learning_rate": 1e-05, "loss": 0.6832, "step": 2797 }, { "epoch": 0.34518705856953397, "grad_norm": 0.6072924137115479, "learning_rate": 1e-05, "loss": 0.585, "step": 2798 }, { "epoch": 0.34531042778274684, "grad_norm": 0.594573974609375, "learning_rate": 1e-05, "loss": 0.6493, "step": 2799 }, { "epoch": 0.34543379699595966, "grad_norm": 0.5952876210212708, "learning_rate": 1e-05, "loss": 0.594, "step": 2800 }, { "epoch": 0.3455571662091725, "grad_norm": 0.6317829489707947, "learning_rate": 1e-05, "loss": 0.5988, "step": 2801 }, { "epoch": 0.34568053542238536, "grad_norm": 0.6829769015312195, "learning_rate": 1e-05, "loss": 0.7013, "step": 2802 }, { "epoch": 0.3458039046355982, "grad_norm": 0.5819827318191528, "learning_rate": 1e-05, "loss": 0.5828, "step": 2803 }, { "epoch": 0.34592727384881106, "grad_norm": 0.6072257161140442, "learning_rate": 1e-05, "loss": 0.6623, "step": 2804 }, { "epoch": 0.3460506430620239, "grad_norm": 0.5468859076499939, "learning_rate": 1e-05, "loss": 0.5997, "step": 2805 }, { "epoch": 0.3461740122752367, "grad_norm": 0.5587475299835205, "learning_rate": 1e-05, "loss": 0.5831, "step": 2806 }, { "epoch": 0.3462973814884496, "grad_norm": 0.6807935833930969, "learning_rate": 1e-05, "loss": 0.6528, "step": 2807 }, { "epoch": 0.3464207507016624, "grad_norm": 0.5241155028343201, "learning_rate": 1e-05, "loss": 0.5562, "step": 2808 }, { "epoch": 0.34654411991487527, "grad_norm": 0.6324060559272766, "learning_rate": 1e-05, "loss": 0.7068, "step": 2809 }, { "epoch": 0.3466674891280881, "grad_norm": 0.5495719313621521, "learning_rate": 1e-05, "loss": 0.5478, "step": 2810 }, { "epoch": 0.3467908583413009, "grad_norm": 0.579926609992981, "learning_rate": 1e-05, "loss": 0.5283, "step": 2811 }, { "epoch": 0.3469142275545138, "grad_norm": 0.5908557176589966, "learning_rate": 1e-05, "loss": 0.6365, "step": 2812 }, { "epoch": 0.3470375967677266, "grad_norm": 0.6008375883102417, "learning_rate": 1e-05, "loss": 0.6622, "step": 2813 }, { "epoch": 0.34716096598093943, "grad_norm": 0.5562805533409119, "learning_rate": 1e-05, "loss": 0.528, "step": 2814 }, { "epoch": 0.3472843351941523, "grad_norm": 0.5528295636177063, "learning_rate": 1e-05, "loss": 0.5287, "step": 2815 }, { "epoch": 0.3474077044073651, "grad_norm": 0.5659312605857849, "learning_rate": 1e-05, "loss": 0.618, "step": 2816 }, { "epoch": 0.347531073620578, "grad_norm": 0.5729237794876099, "learning_rate": 1e-05, "loss": 0.5467, "step": 2817 }, { "epoch": 0.3476544428337908, "grad_norm": 0.48595574498176575, "learning_rate": 1e-05, "loss": 0.4796, "step": 2818 }, { "epoch": 0.34777781204700364, "grad_norm": 0.5789028406143188, "learning_rate": 1e-05, "loss": 0.5289, "step": 2819 }, { "epoch": 0.3479011812602165, "grad_norm": 0.7233790159225464, "learning_rate": 1e-05, "loss": 0.7475, "step": 2820 }, { "epoch": 0.34802455047342934, "grad_norm": 0.6329880952835083, "learning_rate": 1e-05, "loss": 0.7088, "step": 2821 }, { "epoch": 0.3481479196866422, "grad_norm": 0.5327693223953247, "learning_rate": 1e-05, "loss": 0.5073, "step": 2822 }, { "epoch": 0.34827128889985504, "grad_norm": 0.5962215662002563, "learning_rate": 1e-05, "loss": 0.6179, "step": 2823 }, { "epoch": 0.34839465811306786, "grad_norm": 0.6025140881538391, "learning_rate": 1e-05, "loss": 0.5636, "step": 2824 }, { "epoch": 0.34851802732628073, "grad_norm": 0.57033371925354, "learning_rate": 1e-05, "loss": 0.5497, "step": 2825 }, { "epoch": 0.34864139653949355, "grad_norm": 0.5798059701919556, "learning_rate": 1e-05, "loss": 0.6737, "step": 2826 }, { "epoch": 0.34876476575270643, "grad_norm": 0.5779118537902832, "learning_rate": 1e-05, "loss": 0.618, "step": 2827 }, { "epoch": 0.34888813496591925, "grad_norm": 0.5473120808601379, "learning_rate": 1e-05, "loss": 0.5282, "step": 2828 }, { "epoch": 0.34901150417913207, "grad_norm": 0.5878083109855652, "learning_rate": 1e-05, "loss": 0.5703, "step": 2829 }, { "epoch": 0.34913487339234495, "grad_norm": 0.6579760313034058, "learning_rate": 1e-05, "loss": 0.7078, "step": 2830 }, { "epoch": 0.34925824260555777, "grad_norm": 0.5713212490081787, "learning_rate": 1e-05, "loss": 0.5713, "step": 2831 }, { "epoch": 0.34938161181877064, "grad_norm": 0.6975808143615723, "learning_rate": 1e-05, "loss": 0.6414, "step": 2832 }, { "epoch": 0.34950498103198346, "grad_norm": 0.6157360672950745, "learning_rate": 1e-05, "loss": 0.6842, "step": 2833 }, { "epoch": 0.3496283502451963, "grad_norm": 0.546185314655304, "learning_rate": 1e-05, "loss": 0.5913, "step": 2834 }, { "epoch": 0.34975171945840916, "grad_norm": 0.5874433517456055, "learning_rate": 1e-05, "loss": 0.5234, "step": 2835 }, { "epoch": 0.349875088671622, "grad_norm": 0.5363745093345642, "learning_rate": 1e-05, "loss": 0.5627, "step": 2836 }, { "epoch": 0.34999845788483486, "grad_norm": 0.5872260928153992, "learning_rate": 1e-05, "loss": 0.4892, "step": 2837 }, { "epoch": 0.3501218270980477, "grad_norm": 0.5374835133552551, "learning_rate": 1e-05, "loss": 0.5513, "step": 2838 }, { "epoch": 0.3502451963112605, "grad_norm": 0.6249265074729919, "learning_rate": 1e-05, "loss": 0.6296, "step": 2839 }, { "epoch": 0.3503685655244734, "grad_norm": 0.5501384139060974, "learning_rate": 1e-05, "loss": 0.5705, "step": 2840 }, { "epoch": 0.3504919347376862, "grad_norm": 0.5904916524887085, "learning_rate": 1e-05, "loss": 0.5987, "step": 2841 }, { "epoch": 0.35061530395089907, "grad_norm": 0.5206071734428406, "learning_rate": 1e-05, "loss": 0.5347, "step": 2842 }, { "epoch": 0.3507386731641119, "grad_norm": 0.588853657245636, "learning_rate": 1e-05, "loss": 0.5323, "step": 2843 }, { "epoch": 0.3508620423773247, "grad_norm": 0.7246431708335876, "learning_rate": 1e-05, "loss": 0.8058, "step": 2844 }, { "epoch": 0.3509854115905376, "grad_norm": 0.6273754835128784, "learning_rate": 1e-05, "loss": 0.5841, "step": 2845 }, { "epoch": 0.3511087808037504, "grad_norm": 0.6397843360900879, "learning_rate": 1e-05, "loss": 0.614, "step": 2846 }, { "epoch": 0.3512321500169633, "grad_norm": 0.6060466766357422, "learning_rate": 1e-05, "loss": 0.6445, "step": 2847 }, { "epoch": 0.3513555192301761, "grad_norm": 0.5998456478118896, "learning_rate": 1e-05, "loss": 0.5928, "step": 2848 }, { "epoch": 0.3514788884433889, "grad_norm": 0.5934113264083862, "learning_rate": 1e-05, "loss": 0.646, "step": 2849 }, { "epoch": 0.3516022576566018, "grad_norm": 0.5324088335037231, "learning_rate": 1e-05, "loss": 0.5287, "step": 2850 }, { "epoch": 0.3517256268698146, "grad_norm": 0.5506688952445984, "learning_rate": 1e-05, "loss": 0.5556, "step": 2851 }, { "epoch": 0.3518489960830275, "grad_norm": 0.6296108961105347, "learning_rate": 1e-05, "loss": 0.6052, "step": 2852 }, { "epoch": 0.3519723652962403, "grad_norm": 0.6033813953399658, "learning_rate": 1e-05, "loss": 0.6721, "step": 2853 }, { "epoch": 0.35209573450945314, "grad_norm": 0.4974447786808014, "learning_rate": 1e-05, "loss": 0.466, "step": 2854 }, { "epoch": 0.352219103722666, "grad_norm": 0.5763205885887146, "learning_rate": 1e-05, "loss": 0.5781, "step": 2855 }, { "epoch": 0.35234247293587884, "grad_norm": 0.5539776086807251, "learning_rate": 1e-05, "loss": 0.6425, "step": 2856 }, { "epoch": 0.3524658421490917, "grad_norm": 0.5658628940582275, "learning_rate": 1e-05, "loss": 0.6624, "step": 2857 }, { "epoch": 0.35258921136230453, "grad_norm": 0.5704881548881531, "learning_rate": 1e-05, "loss": 0.5554, "step": 2858 }, { "epoch": 0.35271258057551735, "grad_norm": 0.6187602281570435, "learning_rate": 1e-05, "loss": 0.5586, "step": 2859 }, { "epoch": 0.35283594978873023, "grad_norm": 0.498569518327713, "learning_rate": 1e-05, "loss": 0.4626, "step": 2860 }, { "epoch": 0.35295931900194305, "grad_norm": 0.49587127566337585, "learning_rate": 1e-05, "loss": 0.4025, "step": 2861 }, { "epoch": 0.3530826882151559, "grad_norm": 0.5736995339393616, "learning_rate": 1e-05, "loss": 0.6099, "step": 2862 }, { "epoch": 0.35320605742836875, "grad_norm": 0.5471959710121155, "learning_rate": 1e-05, "loss": 0.5142, "step": 2863 }, { "epoch": 0.35332942664158157, "grad_norm": 0.5928611755371094, "learning_rate": 1e-05, "loss": 0.6329, "step": 2864 }, { "epoch": 0.35345279585479444, "grad_norm": 0.5860775113105774, "learning_rate": 1e-05, "loss": 0.6246, "step": 2865 }, { "epoch": 0.35357616506800726, "grad_norm": 0.49854710698127747, "learning_rate": 1e-05, "loss": 0.462, "step": 2866 }, { "epoch": 0.35369953428122014, "grad_norm": 0.6321138143539429, "learning_rate": 1e-05, "loss": 0.6235, "step": 2867 }, { "epoch": 0.35382290349443296, "grad_norm": 0.5303909778594971, "learning_rate": 1e-05, "loss": 0.5738, "step": 2868 }, { "epoch": 0.3539462727076458, "grad_norm": 0.6717904210090637, "learning_rate": 1e-05, "loss": 0.5392, "step": 2869 }, { "epoch": 0.35406964192085866, "grad_norm": 0.6701617240905762, "learning_rate": 1e-05, "loss": 0.6864, "step": 2870 }, { "epoch": 0.3541930111340715, "grad_norm": 0.6109520196914673, "learning_rate": 1e-05, "loss": 0.5867, "step": 2871 }, { "epoch": 0.35431638034728435, "grad_norm": 0.5641191005706787, "learning_rate": 1e-05, "loss": 0.513, "step": 2872 }, { "epoch": 0.3544397495604972, "grad_norm": 0.5617503523826599, "learning_rate": 1e-05, "loss": 0.5695, "step": 2873 }, { "epoch": 0.35456311877371, "grad_norm": 0.5567490458488464, "learning_rate": 1e-05, "loss": 0.5472, "step": 2874 }, { "epoch": 0.35468648798692287, "grad_norm": 0.5481709837913513, "learning_rate": 1e-05, "loss": 0.5514, "step": 2875 }, { "epoch": 0.3548098572001357, "grad_norm": 0.650813639163971, "learning_rate": 1e-05, "loss": 0.6666, "step": 2876 }, { "epoch": 0.35493322641334857, "grad_norm": 0.5520899891853333, "learning_rate": 1e-05, "loss": 0.5779, "step": 2877 }, { "epoch": 0.3550565956265614, "grad_norm": 0.6940165162086487, "learning_rate": 1e-05, "loss": 0.6261, "step": 2878 }, { "epoch": 0.3551799648397742, "grad_norm": 0.5429071187973022, "learning_rate": 1e-05, "loss": 0.5084, "step": 2879 }, { "epoch": 0.3553033340529871, "grad_norm": 0.6451153755187988, "learning_rate": 1e-05, "loss": 0.5942, "step": 2880 }, { "epoch": 0.3554267032661999, "grad_norm": 0.5614988803863525, "learning_rate": 1e-05, "loss": 0.562, "step": 2881 }, { "epoch": 0.3555500724794128, "grad_norm": 0.5447078943252563, "learning_rate": 1e-05, "loss": 0.5581, "step": 2882 }, { "epoch": 0.3556734416926256, "grad_norm": 0.6060201525688171, "learning_rate": 1e-05, "loss": 0.5596, "step": 2883 }, { "epoch": 0.3557968109058384, "grad_norm": 0.5894069075584412, "learning_rate": 1e-05, "loss": 0.5614, "step": 2884 }, { "epoch": 0.3559201801190513, "grad_norm": 0.5368551015853882, "learning_rate": 1e-05, "loss": 0.5636, "step": 2885 }, { "epoch": 0.3560435493322641, "grad_norm": 0.5952097773551941, "learning_rate": 1e-05, "loss": 0.5927, "step": 2886 }, { "epoch": 0.356166918545477, "grad_norm": 0.6140316724777222, "learning_rate": 1e-05, "loss": 0.5928, "step": 2887 }, { "epoch": 0.3562902877586898, "grad_norm": 0.6686484217643738, "learning_rate": 1e-05, "loss": 0.5957, "step": 2888 }, { "epoch": 0.35641365697190264, "grad_norm": 0.5832680463790894, "learning_rate": 1e-05, "loss": 0.5452, "step": 2889 }, { "epoch": 0.3565370261851155, "grad_norm": 0.5446078181266785, "learning_rate": 1e-05, "loss": 0.4903, "step": 2890 }, { "epoch": 0.35666039539832833, "grad_norm": 0.57147616147995, "learning_rate": 1e-05, "loss": 0.5542, "step": 2891 }, { "epoch": 0.3567837646115412, "grad_norm": 0.6116459965705872, "learning_rate": 1e-05, "loss": 0.6311, "step": 2892 }, { "epoch": 0.35690713382475403, "grad_norm": 0.5739576816558838, "learning_rate": 1e-05, "loss": 0.6103, "step": 2893 }, { "epoch": 0.35703050303796685, "grad_norm": 0.6114524602890015, "learning_rate": 1e-05, "loss": 0.5822, "step": 2894 }, { "epoch": 0.3571538722511797, "grad_norm": 0.5749885439872742, "learning_rate": 1e-05, "loss": 0.5502, "step": 2895 }, { "epoch": 0.35727724146439255, "grad_norm": 0.5894848108291626, "learning_rate": 1e-05, "loss": 0.5992, "step": 2896 }, { "epoch": 0.3574006106776054, "grad_norm": 0.5586111545562744, "learning_rate": 1e-05, "loss": 0.5475, "step": 2897 }, { "epoch": 0.35752397989081824, "grad_norm": 0.5711377859115601, "learning_rate": 1e-05, "loss": 0.5584, "step": 2898 }, { "epoch": 0.35764734910403106, "grad_norm": 0.6262373924255371, "learning_rate": 1e-05, "loss": 0.6743, "step": 2899 }, { "epoch": 0.35777071831724394, "grad_norm": 0.5262122750282288, "learning_rate": 1e-05, "loss": 0.5406, "step": 2900 }, { "epoch": 0.35789408753045676, "grad_norm": 0.5313953757286072, "learning_rate": 1e-05, "loss": 0.5022, "step": 2901 }, { "epoch": 0.35801745674366964, "grad_norm": 0.5254266858100891, "learning_rate": 1e-05, "loss": 0.5458, "step": 2902 }, { "epoch": 0.35814082595688246, "grad_norm": 0.5531641840934753, "learning_rate": 1e-05, "loss": 0.5478, "step": 2903 }, { "epoch": 0.3582641951700953, "grad_norm": 0.6268121600151062, "learning_rate": 1e-05, "loss": 0.608, "step": 2904 }, { "epoch": 0.35838756438330815, "grad_norm": 0.5926318168640137, "learning_rate": 1e-05, "loss": 0.6453, "step": 2905 }, { "epoch": 0.358510933596521, "grad_norm": 0.6019264459609985, "learning_rate": 1e-05, "loss": 0.6413, "step": 2906 }, { "epoch": 0.35863430280973385, "grad_norm": 0.5900680422782898, "learning_rate": 1e-05, "loss": 0.5962, "step": 2907 }, { "epoch": 0.35875767202294667, "grad_norm": 0.6083818674087524, "learning_rate": 1e-05, "loss": 0.6293, "step": 2908 }, { "epoch": 0.3588810412361595, "grad_norm": 0.5728723406791687, "learning_rate": 1e-05, "loss": 0.5966, "step": 2909 }, { "epoch": 0.35900441044937237, "grad_norm": 0.6439715623855591, "learning_rate": 1e-05, "loss": 0.5273, "step": 2910 }, { "epoch": 0.3591277796625852, "grad_norm": 0.5864740610122681, "learning_rate": 1e-05, "loss": 0.6045, "step": 2911 }, { "epoch": 0.35925114887579807, "grad_norm": 0.564078152179718, "learning_rate": 1e-05, "loss": 0.5488, "step": 2912 }, { "epoch": 0.3593745180890109, "grad_norm": 0.6397421360015869, "learning_rate": 1e-05, "loss": 0.676, "step": 2913 }, { "epoch": 0.3594978873022237, "grad_norm": 0.5799394845962524, "learning_rate": 1e-05, "loss": 0.4989, "step": 2914 }, { "epoch": 0.3596212565154366, "grad_norm": 0.5151711106300354, "learning_rate": 1e-05, "loss": 0.5286, "step": 2915 }, { "epoch": 0.3597446257286494, "grad_norm": 0.5349020957946777, "learning_rate": 1e-05, "loss": 0.5215, "step": 2916 }, { "epoch": 0.3598679949418623, "grad_norm": 0.5970150232315063, "learning_rate": 1e-05, "loss": 0.656, "step": 2917 }, { "epoch": 0.3599913641550751, "grad_norm": 0.5632339715957642, "learning_rate": 1e-05, "loss": 0.4984, "step": 2918 }, { "epoch": 0.3601147333682879, "grad_norm": 0.584243893623352, "learning_rate": 1e-05, "loss": 0.5725, "step": 2919 }, { "epoch": 0.3602381025815008, "grad_norm": 0.5376777648925781, "learning_rate": 1e-05, "loss": 0.542, "step": 2920 }, { "epoch": 0.3603614717947136, "grad_norm": 0.6456992626190186, "learning_rate": 1e-05, "loss": 0.7127, "step": 2921 }, { "epoch": 0.3604848410079265, "grad_norm": 0.5709275603294373, "learning_rate": 1e-05, "loss": 0.591, "step": 2922 }, { "epoch": 0.3606082102211393, "grad_norm": 0.6486138701438904, "learning_rate": 1e-05, "loss": 0.6389, "step": 2923 }, { "epoch": 0.36073157943435213, "grad_norm": 0.5801479816436768, "learning_rate": 1e-05, "loss": 0.5456, "step": 2924 }, { "epoch": 0.360854948647565, "grad_norm": 0.6061136722564697, "learning_rate": 1e-05, "loss": 0.6088, "step": 2925 }, { "epoch": 0.36097831786077783, "grad_norm": 0.5621854066848755, "learning_rate": 1e-05, "loss": 0.5848, "step": 2926 }, { "epoch": 0.3611016870739907, "grad_norm": 0.5561694502830505, "learning_rate": 1e-05, "loss": 0.5395, "step": 2927 }, { "epoch": 0.3612250562872035, "grad_norm": 0.6001797914505005, "learning_rate": 1e-05, "loss": 0.638, "step": 2928 }, { "epoch": 0.36134842550041635, "grad_norm": 0.6312984228134155, "learning_rate": 1e-05, "loss": 0.6663, "step": 2929 }, { "epoch": 0.3614717947136292, "grad_norm": 0.5595642328262329, "learning_rate": 1e-05, "loss": 0.6083, "step": 2930 }, { "epoch": 0.36159516392684204, "grad_norm": 0.4944949746131897, "learning_rate": 1e-05, "loss": 0.4618, "step": 2931 }, { "epoch": 0.3617185331400549, "grad_norm": 0.6098899841308594, "learning_rate": 1e-05, "loss": 0.5644, "step": 2932 }, { "epoch": 0.36184190235326774, "grad_norm": 0.5930312275886536, "learning_rate": 1e-05, "loss": 0.5713, "step": 2933 }, { "epoch": 0.36196527156648056, "grad_norm": 0.5343701839447021, "learning_rate": 1e-05, "loss": 0.5768, "step": 2934 }, { "epoch": 0.36208864077969344, "grad_norm": 0.6247645616531372, "learning_rate": 1e-05, "loss": 0.5734, "step": 2935 }, { "epoch": 0.36221200999290626, "grad_norm": 0.5322680473327637, "learning_rate": 1e-05, "loss": 0.538, "step": 2936 }, { "epoch": 0.36233537920611913, "grad_norm": 0.5431004166603088, "learning_rate": 1e-05, "loss": 0.5219, "step": 2937 }, { "epoch": 0.36245874841933196, "grad_norm": 0.5347900986671448, "learning_rate": 1e-05, "loss": 0.5167, "step": 2938 }, { "epoch": 0.3625821176325448, "grad_norm": 0.5796582102775574, "learning_rate": 1e-05, "loss": 0.5472, "step": 2939 }, { "epoch": 0.36270548684575765, "grad_norm": 0.6362269520759583, "learning_rate": 1e-05, "loss": 0.5685, "step": 2940 }, { "epoch": 0.3628288560589705, "grad_norm": 0.5865545868873596, "learning_rate": 1e-05, "loss": 0.5229, "step": 2941 }, { "epoch": 0.36295222527218335, "grad_norm": 0.5294147729873657, "learning_rate": 1e-05, "loss": 0.5499, "step": 2942 }, { "epoch": 0.36307559448539617, "grad_norm": 0.5198037028312683, "learning_rate": 1e-05, "loss": 0.4841, "step": 2943 }, { "epoch": 0.363198963698609, "grad_norm": 0.5419676899909973, "learning_rate": 1e-05, "loss": 0.5145, "step": 2944 }, { "epoch": 0.36332233291182187, "grad_norm": 0.6237730383872986, "learning_rate": 1e-05, "loss": 0.6047, "step": 2945 }, { "epoch": 0.3634457021250347, "grad_norm": 0.5840424299240112, "learning_rate": 1e-05, "loss": 0.6204, "step": 2946 }, { "epoch": 0.36356907133824756, "grad_norm": 0.6233068108558655, "learning_rate": 1e-05, "loss": 0.6433, "step": 2947 }, { "epoch": 0.3636924405514604, "grad_norm": 0.5552866458892822, "learning_rate": 1e-05, "loss": 0.4975, "step": 2948 }, { "epoch": 0.3638158097646732, "grad_norm": 0.5567596554756165, "learning_rate": 1e-05, "loss": 0.5589, "step": 2949 }, { "epoch": 0.3639391789778861, "grad_norm": 0.5940096974372864, "learning_rate": 1e-05, "loss": 0.6189, "step": 2950 }, { "epoch": 0.3640625481910989, "grad_norm": 0.6105766296386719, "learning_rate": 1e-05, "loss": 0.5787, "step": 2951 }, { "epoch": 0.3641859174043118, "grad_norm": 0.5536457300186157, "learning_rate": 1e-05, "loss": 0.5352, "step": 2952 }, { "epoch": 0.3643092866175246, "grad_norm": 0.5226534605026245, "learning_rate": 1e-05, "loss": 0.5222, "step": 2953 }, { "epoch": 0.3644326558307374, "grad_norm": 0.5460636019706726, "learning_rate": 1e-05, "loss": 0.4944, "step": 2954 }, { "epoch": 0.3645560250439503, "grad_norm": 0.584141194820404, "learning_rate": 1e-05, "loss": 0.5857, "step": 2955 }, { "epoch": 0.3646793942571631, "grad_norm": 0.5622216463088989, "learning_rate": 1e-05, "loss": 0.6454, "step": 2956 }, { "epoch": 0.364802763470376, "grad_norm": 0.5744539499282837, "learning_rate": 1e-05, "loss": 0.6425, "step": 2957 }, { "epoch": 0.3649261326835888, "grad_norm": 0.6072788834571838, "learning_rate": 1e-05, "loss": 0.6109, "step": 2958 }, { "epoch": 0.36504950189680163, "grad_norm": 0.5836388468742371, "learning_rate": 1e-05, "loss": 0.5618, "step": 2959 }, { "epoch": 0.3651728711100145, "grad_norm": 0.5176626443862915, "learning_rate": 1e-05, "loss": 0.5145, "step": 2960 }, { "epoch": 0.36529624032322733, "grad_norm": 0.5477195978164673, "learning_rate": 1e-05, "loss": 0.5358, "step": 2961 }, { "epoch": 0.3654196095364402, "grad_norm": 0.5370292067527771, "learning_rate": 1e-05, "loss": 0.5155, "step": 2962 }, { "epoch": 0.365542978749653, "grad_norm": 0.5763407349586487, "learning_rate": 1e-05, "loss": 0.5532, "step": 2963 }, { "epoch": 0.36566634796286585, "grad_norm": 0.5803996920585632, "learning_rate": 1e-05, "loss": 0.6017, "step": 2964 }, { "epoch": 0.3657897171760787, "grad_norm": 0.6501797437667847, "learning_rate": 1e-05, "loss": 0.653, "step": 2965 }, { "epoch": 0.36591308638929154, "grad_norm": 0.5218883156776428, "learning_rate": 1e-05, "loss": 0.5177, "step": 2966 }, { "epoch": 0.3660364556025044, "grad_norm": 0.541182816028595, "learning_rate": 1e-05, "loss": 0.5483, "step": 2967 }, { "epoch": 0.36615982481571724, "grad_norm": 0.6145259737968445, "learning_rate": 1e-05, "loss": 0.6266, "step": 2968 }, { "epoch": 0.36628319402893006, "grad_norm": 0.7670039534568787, "learning_rate": 1e-05, "loss": 0.6928, "step": 2969 }, { "epoch": 0.36640656324214294, "grad_norm": 0.5199810266494751, "learning_rate": 1e-05, "loss": 0.4634, "step": 2970 }, { "epoch": 0.36652993245535576, "grad_norm": 0.5646346807479858, "learning_rate": 1e-05, "loss": 0.5744, "step": 2971 }, { "epoch": 0.36665330166856863, "grad_norm": 0.5483995676040649, "learning_rate": 1e-05, "loss": 0.5374, "step": 2972 }, { "epoch": 0.36677667088178145, "grad_norm": 0.6440656185150146, "learning_rate": 1e-05, "loss": 0.6636, "step": 2973 }, { "epoch": 0.3669000400949943, "grad_norm": 0.6678549647331238, "learning_rate": 1e-05, "loss": 0.7817, "step": 2974 }, { "epoch": 0.36702340930820715, "grad_norm": 0.6324949264526367, "learning_rate": 1e-05, "loss": 0.6718, "step": 2975 }, { "epoch": 0.36714677852141997, "grad_norm": 0.5856647491455078, "learning_rate": 1e-05, "loss": 0.6156, "step": 2976 }, { "epoch": 0.36727014773463285, "grad_norm": 0.6042177677154541, "learning_rate": 1e-05, "loss": 0.5436, "step": 2977 }, { "epoch": 0.36739351694784567, "grad_norm": 0.5506766438484192, "learning_rate": 1e-05, "loss": 0.5741, "step": 2978 }, { "epoch": 0.3675168861610585, "grad_norm": 0.5446516275405884, "learning_rate": 1e-05, "loss": 0.5431, "step": 2979 }, { "epoch": 0.36764025537427136, "grad_norm": 0.5373630523681641, "learning_rate": 1e-05, "loss": 0.5075, "step": 2980 }, { "epoch": 0.3677636245874842, "grad_norm": 0.5620628595352173, "learning_rate": 1e-05, "loss": 0.5556, "step": 2981 }, { "epoch": 0.36788699380069706, "grad_norm": 0.579494297504425, "learning_rate": 1e-05, "loss": 0.6026, "step": 2982 }, { "epoch": 0.3680103630139099, "grad_norm": 0.5655757188796997, "learning_rate": 1e-05, "loss": 0.6063, "step": 2983 }, { "epoch": 0.3681337322271227, "grad_norm": 0.5770558714866638, "learning_rate": 1e-05, "loss": 0.5792, "step": 2984 }, { "epoch": 0.3682571014403356, "grad_norm": 0.5722485780715942, "learning_rate": 1e-05, "loss": 0.5669, "step": 2985 }, { "epoch": 0.3683804706535484, "grad_norm": 0.5224024653434753, "learning_rate": 1e-05, "loss": 0.4823, "step": 2986 }, { "epoch": 0.3685038398667613, "grad_norm": 0.5666846036911011, "learning_rate": 1e-05, "loss": 0.5257, "step": 2987 }, { "epoch": 0.3686272090799741, "grad_norm": 0.561859667301178, "learning_rate": 1e-05, "loss": 0.5755, "step": 2988 }, { "epoch": 0.3687505782931869, "grad_norm": 0.5861628651618958, "learning_rate": 1e-05, "loss": 0.4863, "step": 2989 }, { "epoch": 0.3688739475063998, "grad_norm": 0.592782199382782, "learning_rate": 1e-05, "loss": 0.6227, "step": 2990 }, { "epoch": 0.3689973167196126, "grad_norm": 0.6368445754051208, "learning_rate": 1e-05, "loss": 0.6913, "step": 2991 }, { "epoch": 0.3691206859328255, "grad_norm": 0.593220591545105, "learning_rate": 1e-05, "loss": 0.5455, "step": 2992 }, { "epoch": 0.3692440551460383, "grad_norm": 0.6623802185058594, "learning_rate": 1e-05, "loss": 0.6324, "step": 2993 }, { "epoch": 0.36936742435925113, "grad_norm": 0.5774980187416077, "learning_rate": 1e-05, "loss": 0.5221, "step": 2994 }, { "epoch": 0.369490793572464, "grad_norm": 0.5879089832305908, "learning_rate": 1e-05, "loss": 0.5895, "step": 2995 }, { "epoch": 0.3696141627856768, "grad_norm": 0.5775116682052612, "learning_rate": 1e-05, "loss": 0.657, "step": 2996 }, { "epoch": 0.3697375319988897, "grad_norm": 0.5407228469848633, "learning_rate": 1e-05, "loss": 0.5529, "step": 2997 }, { "epoch": 0.3698609012121025, "grad_norm": 0.5778003931045532, "learning_rate": 1e-05, "loss": 0.63, "step": 2998 }, { "epoch": 0.36998427042531534, "grad_norm": 0.5873610377311707, "learning_rate": 1e-05, "loss": 0.5865, "step": 2999 }, { "epoch": 0.3701076396385282, "grad_norm": 0.5286722183227539, "learning_rate": 1e-05, "loss": 0.5183, "step": 3000 }, { "epoch": 0.37023100885174104, "grad_norm": 0.5820950865745544, "learning_rate": 1e-05, "loss": 0.5891, "step": 3001 }, { "epoch": 0.3703543780649539, "grad_norm": 0.6928324699401855, "learning_rate": 1e-05, "loss": 0.6307, "step": 3002 }, { "epoch": 0.37047774727816674, "grad_norm": 0.5195987224578857, "learning_rate": 1e-05, "loss": 0.5286, "step": 3003 }, { "epoch": 0.37060111649137956, "grad_norm": 0.533922016620636, "learning_rate": 1e-05, "loss": 0.577, "step": 3004 }, { "epoch": 0.37072448570459243, "grad_norm": 0.5489411354064941, "learning_rate": 1e-05, "loss": 0.5188, "step": 3005 }, { "epoch": 0.37084785491780525, "grad_norm": 0.47914859652519226, "learning_rate": 1e-05, "loss": 0.4103, "step": 3006 }, { "epoch": 0.37097122413101813, "grad_norm": 0.6331865787506104, "learning_rate": 1e-05, "loss": 0.6749, "step": 3007 }, { "epoch": 0.37109459334423095, "grad_norm": 0.5966166257858276, "learning_rate": 1e-05, "loss": 0.4841, "step": 3008 }, { "epoch": 0.37121796255744377, "grad_norm": 0.58765709400177, "learning_rate": 1e-05, "loss": 0.6857, "step": 3009 }, { "epoch": 0.37134133177065665, "grad_norm": 0.5886469483375549, "learning_rate": 1e-05, "loss": 0.5577, "step": 3010 }, { "epoch": 0.37146470098386947, "grad_norm": 0.6352699398994446, "learning_rate": 1e-05, "loss": 0.6707, "step": 3011 }, { "epoch": 0.37158807019708234, "grad_norm": 0.5711622834205627, "learning_rate": 1e-05, "loss": 0.549, "step": 3012 }, { "epoch": 0.37171143941029516, "grad_norm": 0.584288477897644, "learning_rate": 1e-05, "loss": 0.59, "step": 3013 }, { "epoch": 0.371834808623508, "grad_norm": 0.6426131129264832, "learning_rate": 1e-05, "loss": 0.7396, "step": 3014 }, { "epoch": 0.37195817783672086, "grad_norm": 0.568684458732605, "learning_rate": 1e-05, "loss": 0.5941, "step": 3015 }, { "epoch": 0.3720815470499337, "grad_norm": 0.623653769493103, "learning_rate": 1e-05, "loss": 0.6452, "step": 3016 }, { "epoch": 0.37220491626314656, "grad_norm": 0.6473556756973267, "learning_rate": 1e-05, "loss": 0.5496, "step": 3017 }, { "epoch": 0.3723282854763594, "grad_norm": 0.6363291144371033, "learning_rate": 1e-05, "loss": 0.6439, "step": 3018 }, { "epoch": 0.3724516546895722, "grad_norm": 0.5901399850845337, "learning_rate": 1e-05, "loss": 0.7111, "step": 3019 }, { "epoch": 0.3725750239027851, "grad_norm": 0.6232993006706238, "learning_rate": 1e-05, "loss": 0.607, "step": 3020 }, { "epoch": 0.3726983931159979, "grad_norm": 0.6197306513786316, "learning_rate": 1e-05, "loss": 0.6341, "step": 3021 }, { "epoch": 0.37282176232921077, "grad_norm": 0.591190755367279, "learning_rate": 1e-05, "loss": 0.6129, "step": 3022 }, { "epoch": 0.3729451315424236, "grad_norm": 0.5396103262901306, "learning_rate": 1e-05, "loss": 0.5445, "step": 3023 }, { "epoch": 0.3730685007556364, "grad_norm": 0.5709892511367798, "learning_rate": 1e-05, "loss": 0.5464, "step": 3024 }, { "epoch": 0.3731918699688493, "grad_norm": 0.7423781156539917, "learning_rate": 1e-05, "loss": 0.673, "step": 3025 }, { "epoch": 0.3733152391820621, "grad_norm": 0.5938796997070312, "learning_rate": 1e-05, "loss": 0.5768, "step": 3026 }, { "epoch": 0.373438608395275, "grad_norm": 0.6296753287315369, "learning_rate": 1e-05, "loss": 0.6384, "step": 3027 }, { "epoch": 0.3735619776084878, "grad_norm": 0.6035594344139099, "learning_rate": 1e-05, "loss": 0.5783, "step": 3028 }, { "epoch": 0.3736853468217006, "grad_norm": 0.5880927443504333, "learning_rate": 1e-05, "loss": 0.6732, "step": 3029 }, { "epoch": 0.3738087160349135, "grad_norm": 0.5332705974578857, "learning_rate": 1e-05, "loss": 0.5504, "step": 3030 }, { "epoch": 0.3739320852481263, "grad_norm": 0.5403416156768799, "learning_rate": 1e-05, "loss": 0.479, "step": 3031 }, { "epoch": 0.3740554544613392, "grad_norm": 0.5394124388694763, "learning_rate": 1e-05, "loss": 0.5259, "step": 3032 }, { "epoch": 0.374178823674552, "grad_norm": 0.6102770566940308, "learning_rate": 1e-05, "loss": 0.5899, "step": 3033 }, { "epoch": 0.37430219288776484, "grad_norm": 0.5768865346908569, "learning_rate": 1e-05, "loss": 0.5908, "step": 3034 }, { "epoch": 0.3744255621009777, "grad_norm": 0.5565271973609924, "learning_rate": 1e-05, "loss": 0.577, "step": 3035 }, { "epoch": 0.37454893131419054, "grad_norm": 0.5805045366287231, "learning_rate": 1e-05, "loss": 0.5645, "step": 3036 }, { "epoch": 0.3746723005274034, "grad_norm": 0.6476191878318787, "learning_rate": 1e-05, "loss": 0.7037, "step": 3037 }, { "epoch": 0.37479566974061623, "grad_norm": 0.5337827205657959, "learning_rate": 1e-05, "loss": 0.4186, "step": 3038 }, { "epoch": 0.37491903895382905, "grad_norm": 0.5638209581375122, "learning_rate": 1e-05, "loss": 0.6196, "step": 3039 }, { "epoch": 0.37504240816704193, "grad_norm": 0.5807219743728638, "learning_rate": 1e-05, "loss": 0.5818, "step": 3040 }, { "epoch": 0.37516577738025475, "grad_norm": 0.6406872868537903, "learning_rate": 1e-05, "loss": 0.5904, "step": 3041 }, { "epoch": 0.3752891465934676, "grad_norm": 0.6372016668319702, "learning_rate": 1e-05, "loss": 0.6686, "step": 3042 }, { "epoch": 0.37541251580668045, "grad_norm": 0.6580915451049805, "learning_rate": 1e-05, "loss": 0.5499, "step": 3043 }, { "epoch": 0.37553588501989327, "grad_norm": 0.5602670907974243, "learning_rate": 1e-05, "loss": 0.5185, "step": 3044 }, { "epoch": 0.37565925423310614, "grad_norm": 0.6322024464607239, "learning_rate": 1e-05, "loss": 0.5746, "step": 3045 }, { "epoch": 0.37578262344631896, "grad_norm": 0.5042926073074341, "learning_rate": 1e-05, "loss": 0.5321, "step": 3046 }, { "epoch": 0.37590599265953184, "grad_norm": 0.5066234469413757, "learning_rate": 1e-05, "loss": 0.5527, "step": 3047 }, { "epoch": 0.37602936187274466, "grad_norm": 0.6009799242019653, "learning_rate": 1e-05, "loss": 0.5819, "step": 3048 }, { "epoch": 0.3761527310859575, "grad_norm": 0.5342724919319153, "learning_rate": 1e-05, "loss": 0.5615, "step": 3049 }, { "epoch": 0.37627610029917036, "grad_norm": 0.6055448651313782, "learning_rate": 1e-05, "loss": 0.6889, "step": 3050 }, { "epoch": 0.3763994695123832, "grad_norm": 0.5302293300628662, "learning_rate": 1e-05, "loss": 0.4799, "step": 3051 }, { "epoch": 0.37652283872559605, "grad_norm": 0.593411386013031, "learning_rate": 1e-05, "loss": 0.5829, "step": 3052 }, { "epoch": 0.3766462079388089, "grad_norm": 0.5867234468460083, "learning_rate": 1e-05, "loss": 0.629, "step": 3053 }, { "epoch": 0.3767695771520217, "grad_norm": 0.5606476664543152, "learning_rate": 1e-05, "loss": 0.5662, "step": 3054 }, { "epoch": 0.37689294636523457, "grad_norm": 0.5736486315727234, "learning_rate": 1e-05, "loss": 0.5851, "step": 3055 }, { "epoch": 0.3770163155784474, "grad_norm": 0.5532087087631226, "learning_rate": 1e-05, "loss": 0.5309, "step": 3056 }, { "epoch": 0.37713968479166027, "grad_norm": 0.5325124263763428, "learning_rate": 1e-05, "loss": 0.5662, "step": 3057 }, { "epoch": 0.3772630540048731, "grad_norm": 0.6689046025276184, "learning_rate": 1e-05, "loss": 0.7391, "step": 3058 }, { "epoch": 0.3773864232180859, "grad_norm": 0.6869233250617981, "learning_rate": 1e-05, "loss": 0.6475, "step": 3059 }, { "epoch": 0.3775097924312988, "grad_norm": 0.5426994562149048, "learning_rate": 1e-05, "loss": 0.5808, "step": 3060 }, { "epoch": 0.3776331616445116, "grad_norm": 0.5497912168502808, "learning_rate": 1e-05, "loss": 0.434, "step": 3061 }, { "epoch": 0.3777565308577245, "grad_norm": 0.5718759298324585, "learning_rate": 1e-05, "loss": 0.5742, "step": 3062 }, { "epoch": 0.3778799000709373, "grad_norm": 0.6023191809654236, "learning_rate": 1e-05, "loss": 0.5812, "step": 3063 }, { "epoch": 0.3780032692841501, "grad_norm": 0.5647251009941101, "learning_rate": 1e-05, "loss": 0.4962, "step": 3064 }, { "epoch": 0.378126638497363, "grad_norm": 0.5520938634872437, "learning_rate": 1e-05, "loss": 0.5348, "step": 3065 }, { "epoch": 0.3782500077105758, "grad_norm": 0.6049796342849731, "learning_rate": 1e-05, "loss": 0.5309, "step": 3066 }, { "epoch": 0.3783733769237887, "grad_norm": 0.6678699851036072, "learning_rate": 1e-05, "loss": 0.5898, "step": 3067 }, { "epoch": 0.3784967461370015, "grad_norm": 0.5222225785255432, "learning_rate": 1e-05, "loss": 0.5106, "step": 3068 }, { "epoch": 0.37862011535021434, "grad_norm": 0.6580740809440613, "learning_rate": 1e-05, "loss": 0.7116, "step": 3069 }, { "epoch": 0.3787434845634272, "grad_norm": 0.6171318292617798, "learning_rate": 1e-05, "loss": 0.6291, "step": 3070 }, { "epoch": 0.37886685377664003, "grad_norm": 0.5574058890342712, "learning_rate": 1e-05, "loss": 0.5678, "step": 3071 }, { "epoch": 0.3789902229898529, "grad_norm": 0.5182671546936035, "learning_rate": 1e-05, "loss": 0.4681, "step": 3072 }, { "epoch": 0.37911359220306573, "grad_norm": 0.5498594045639038, "learning_rate": 1e-05, "loss": 0.5644, "step": 3073 }, { "epoch": 0.37923696141627855, "grad_norm": 0.5579624772071838, "learning_rate": 1e-05, "loss": 0.5222, "step": 3074 }, { "epoch": 0.3793603306294914, "grad_norm": 0.562920331954956, "learning_rate": 1e-05, "loss": 0.5837, "step": 3075 }, { "epoch": 0.37948369984270425, "grad_norm": 0.5735771059989929, "learning_rate": 1e-05, "loss": 0.5539, "step": 3076 }, { "epoch": 0.3796070690559171, "grad_norm": 0.534627377986908, "learning_rate": 1e-05, "loss": 0.5473, "step": 3077 }, { "epoch": 0.37973043826912994, "grad_norm": 0.5509268641471863, "learning_rate": 1e-05, "loss": 0.6081, "step": 3078 }, { "epoch": 0.37985380748234276, "grad_norm": 0.5974009037017822, "learning_rate": 1e-05, "loss": 0.5885, "step": 3079 }, { "epoch": 0.37997717669555564, "grad_norm": 0.5927830338478088, "learning_rate": 1e-05, "loss": 0.609, "step": 3080 }, { "epoch": 0.38010054590876846, "grad_norm": 0.5493411421775818, "learning_rate": 1e-05, "loss": 0.5082, "step": 3081 }, { "epoch": 0.3802239151219813, "grad_norm": 0.5480793714523315, "learning_rate": 1e-05, "loss": 0.5568, "step": 3082 }, { "epoch": 0.38034728433519416, "grad_norm": 0.7206184267997742, "learning_rate": 1e-05, "loss": 0.5725, "step": 3083 }, { "epoch": 0.380470653548407, "grad_norm": 0.6098241209983826, "learning_rate": 1e-05, "loss": 0.6096, "step": 3084 }, { "epoch": 0.38059402276161985, "grad_norm": 0.541135311126709, "learning_rate": 1e-05, "loss": 0.5804, "step": 3085 }, { "epoch": 0.3807173919748327, "grad_norm": 0.5319471955299377, "learning_rate": 1e-05, "loss": 0.5094, "step": 3086 }, { "epoch": 0.3808407611880455, "grad_norm": 0.7012749910354614, "learning_rate": 1e-05, "loss": 0.7636, "step": 3087 }, { "epoch": 0.38096413040125837, "grad_norm": 0.5486223101615906, "learning_rate": 1e-05, "loss": 0.4991, "step": 3088 }, { "epoch": 0.3810874996144712, "grad_norm": 0.647128701210022, "learning_rate": 1e-05, "loss": 0.6991, "step": 3089 }, { "epoch": 0.38121086882768407, "grad_norm": 0.6058773994445801, "learning_rate": 1e-05, "loss": 0.5557, "step": 3090 }, { "epoch": 0.3813342380408969, "grad_norm": 0.5716757774353027, "learning_rate": 1e-05, "loss": 0.5782, "step": 3091 }, { "epoch": 0.3814576072541097, "grad_norm": 0.5357521176338196, "learning_rate": 1e-05, "loss": 0.4501, "step": 3092 }, { "epoch": 0.3815809764673226, "grad_norm": 0.5335794687271118, "learning_rate": 1e-05, "loss": 0.5389, "step": 3093 }, { "epoch": 0.3817043456805354, "grad_norm": 0.656773567199707, "learning_rate": 1e-05, "loss": 0.7003, "step": 3094 }, { "epoch": 0.3818277148937483, "grad_norm": 0.5737707614898682, "learning_rate": 1e-05, "loss": 0.5914, "step": 3095 }, { "epoch": 0.3819510841069611, "grad_norm": 0.5160267353057861, "learning_rate": 1e-05, "loss": 0.4443, "step": 3096 }, { "epoch": 0.3820744533201739, "grad_norm": 0.5716261863708496, "learning_rate": 1e-05, "loss": 0.5131, "step": 3097 }, { "epoch": 0.3821978225333868, "grad_norm": 0.6056031584739685, "learning_rate": 1e-05, "loss": 0.7411, "step": 3098 }, { "epoch": 0.3823211917465996, "grad_norm": 0.5679676532745361, "learning_rate": 1e-05, "loss": 0.4984, "step": 3099 }, { "epoch": 0.3824445609598125, "grad_norm": 0.6312944889068604, "learning_rate": 1e-05, "loss": 0.5945, "step": 3100 }, { "epoch": 0.3825679301730253, "grad_norm": 0.606552243232727, "learning_rate": 1e-05, "loss": 0.5985, "step": 3101 }, { "epoch": 0.38269129938623814, "grad_norm": 0.5917397737503052, "learning_rate": 1e-05, "loss": 0.6757, "step": 3102 }, { "epoch": 0.382814668599451, "grad_norm": 0.5172686576843262, "learning_rate": 1e-05, "loss": 0.5071, "step": 3103 }, { "epoch": 0.38293803781266383, "grad_norm": 0.5947912931442261, "learning_rate": 1e-05, "loss": 0.597, "step": 3104 }, { "epoch": 0.3830614070258767, "grad_norm": 0.6555713415145874, "learning_rate": 1e-05, "loss": 0.6457, "step": 3105 }, { "epoch": 0.38318477623908953, "grad_norm": 0.5890206694602966, "learning_rate": 1e-05, "loss": 0.5668, "step": 3106 }, { "epoch": 0.38330814545230235, "grad_norm": 0.6147009134292603, "learning_rate": 1e-05, "loss": 0.6471, "step": 3107 }, { "epoch": 0.3834315146655152, "grad_norm": 0.5541001558303833, "learning_rate": 1e-05, "loss": 0.5303, "step": 3108 }, { "epoch": 0.38355488387872805, "grad_norm": 0.5778834819793701, "learning_rate": 1e-05, "loss": 0.5712, "step": 3109 }, { "epoch": 0.3836782530919409, "grad_norm": 0.6135790348052979, "learning_rate": 1e-05, "loss": 0.6272, "step": 3110 }, { "epoch": 0.38380162230515374, "grad_norm": 0.6603097319602966, "learning_rate": 1e-05, "loss": 0.5972, "step": 3111 }, { "epoch": 0.38392499151836657, "grad_norm": 0.5535052418708801, "learning_rate": 1e-05, "loss": 0.5182, "step": 3112 }, { "epoch": 0.38404836073157944, "grad_norm": 0.545960009098053, "learning_rate": 1e-05, "loss": 0.5291, "step": 3113 }, { "epoch": 0.38417172994479226, "grad_norm": 0.5887326002120972, "learning_rate": 1e-05, "loss": 0.662, "step": 3114 }, { "epoch": 0.38429509915800514, "grad_norm": 0.5087003707885742, "learning_rate": 1e-05, "loss": 0.528, "step": 3115 }, { "epoch": 0.38441846837121796, "grad_norm": 0.724620521068573, "learning_rate": 1e-05, "loss": 0.6712, "step": 3116 }, { "epoch": 0.3845418375844308, "grad_norm": 0.6210514903068542, "learning_rate": 1e-05, "loss": 0.6423, "step": 3117 }, { "epoch": 0.38466520679764366, "grad_norm": 0.6087942123413086, "learning_rate": 1e-05, "loss": 0.6788, "step": 3118 }, { "epoch": 0.3847885760108565, "grad_norm": 0.8547704219818115, "learning_rate": 1e-05, "loss": 0.5723, "step": 3119 }, { "epoch": 0.38491194522406935, "grad_norm": 0.5697944760322571, "learning_rate": 1e-05, "loss": 0.6047, "step": 3120 }, { "epoch": 0.3850353144372822, "grad_norm": 0.5665070414543152, "learning_rate": 1e-05, "loss": 0.554, "step": 3121 }, { "epoch": 0.385158683650495, "grad_norm": 0.5881862640380859, "learning_rate": 1e-05, "loss": 0.5551, "step": 3122 }, { "epoch": 0.38528205286370787, "grad_norm": 0.6663018465042114, "learning_rate": 1e-05, "loss": 0.7762, "step": 3123 }, { "epoch": 0.3854054220769207, "grad_norm": 0.5690166354179382, "learning_rate": 1e-05, "loss": 0.5577, "step": 3124 }, { "epoch": 0.38552879129013357, "grad_norm": 0.6297511458396912, "learning_rate": 1e-05, "loss": 0.6623, "step": 3125 }, { "epoch": 0.3856521605033464, "grad_norm": 0.5716400742530823, "learning_rate": 1e-05, "loss": 0.5458, "step": 3126 }, { "epoch": 0.3857755297165592, "grad_norm": 0.594106137752533, "learning_rate": 1e-05, "loss": 0.5988, "step": 3127 }, { "epoch": 0.3858988989297721, "grad_norm": 0.6210083365440369, "learning_rate": 1e-05, "loss": 0.6173, "step": 3128 }, { "epoch": 0.3860222681429849, "grad_norm": 0.6803325414657593, "learning_rate": 1e-05, "loss": 0.7193, "step": 3129 }, { "epoch": 0.3861456373561978, "grad_norm": 0.5490274429321289, "learning_rate": 1e-05, "loss": 0.5547, "step": 3130 }, { "epoch": 0.3862690065694106, "grad_norm": 0.6281974911689758, "learning_rate": 1e-05, "loss": 0.6775, "step": 3131 }, { "epoch": 0.3863923757826234, "grad_norm": 0.5853630304336548, "learning_rate": 1e-05, "loss": 0.6265, "step": 3132 }, { "epoch": 0.3865157449958363, "grad_norm": 0.6116522550582886, "learning_rate": 1e-05, "loss": 0.5393, "step": 3133 }, { "epoch": 0.3866391142090491, "grad_norm": 0.6402584910392761, "learning_rate": 1e-05, "loss": 0.6356, "step": 3134 }, { "epoch": 0.386762483422262, "grad_norm": 0.5505338311195374, "learning_rate": 1e-05, "loss": 0.5565, "step": 3135 }, { "epoch": 0.3868858526354748, "grad_norm": 0.5996549725532532, "learning_rate": 1e-05, "loss": 0.6027, "step": 3136 }, { "epoch": 0.38700922184868763, "grad_norm": 0.6119025945663452, "learning_rate": 1e-05, "loss": 0.6065, "step": 3137 }, { "epoch": 0.3871325910619005, "grad_norm": 0.5892775654792786, "learning_rate": 1e-05, "loss": 0.5376, "step": 3138 }, { "epoch": 0.38725596027511333, "grad_norm": 0.5776733160018921, "learning_rate": 1e-05, "loss": 0.5376, "step": 3139 }, { "epoch": 0.3873793294883262, "grad_norm": 0.6440864205360413, "learning_rate": 1e-05, "loss": 0.5777, "step": 3140 }, { "epoch": 0.38750269870153903, "grad_norm": 0.5891920328140259, "learning_rate": 1e-05, "loss": 0.66, "step": 3141 }, { "epoch": 0.38762606791475185, "grad_norm": 0.5389650464057922, "learning_rate": 1e-05, "loss": 0.4795, "step": 3142 }, { "epoch": 0.3877494371279647, "grad_norm": 0.7073188424110413, "learning_rate": 1e-05, "loss": 0.7791, "step": 3143 }, { "epoch": 0.38787280634117755, "grad_norm": 0.6555617451667786, "learning_rate": 1e-05, "loss": 0.6926, "step": 3144 }, { "epoch": 0.3879961755543904, "grad_norm": 0.6343688368797302, "learning_rate": 1e-05, "loss": 0.6495, "step": 3145 }, { "epoch": 0.38811954476760324, "grad_norm": 0.5918529629707336, "learning_rate": 1e-05, "loss": 0.627, "step": 3146 }, { "epoch": 0.38824291398081606, "grad_norm": 0.4861700236797333, "learning_rate": 1e-05, "loss": 0.4691, "step": 3147 }, { "epoch": 0.38836628319402894, "grad_norm": 0.5308101773262024, "learning_rate": 1e-05, "loss": 0.5615, "step": 3148 }, { "epoch": 0.38848965240724176, "grad_norm": 0.6179810166358948, "learning_rate": 1e-05, "loss": 0.561, "step": 3149 }, { "epoch": 0.38861302162045464, "grad_norm": 0.5914848446846008, "learning_rate": 1e-05, "loss": 0.5877, "step": 3150 }, { "epoch": 0.38873639083366746, "grad_norm": 0.5795027017593384, "learning_rate": 1e-05, "loss": 0.5973, "step": 3151 }, { "epoch": 0.3888597600468803, "grad_norm": 0.5626875162124634, "learning_rate": 1e-05, "loss": 0.5182, "step": 3152 }, { "epoch": 0.38898312926009315, "grad_norm": 0.6363645195960999, "learning_rate": 1e-05, "loss": 0.6616, "step": 3153 }, { "epoch": 0.389106498473306, "grad_norm": 0.5056419372558594, "learning_rate": 1e-05, "loss": 0.4823, "step": 3154 }, { "epoch": 0.38922986768651885, "grad_norm": 0.6082915663719177, "learning_rate": 1e-05, "loss": 0.6853, "step": 3155 }, { "epoch": 0.38935323689973167, "grad_norm": 0.6278605461120605, "learning_rate": 1e-05, "loss": 0.6431, "step": 3156 }, { "epoch": 0.3894766061129445, "grad_norm": 0.5960647463798523, "learning_rate": 1e-05, "loss": 0.6416, "step": 3157 }, { "epoch": 0.38959997532615737, "grad_norm": 0.5100641250610352, "learning_rate": 1e-05, "loss": 0.4761, "step": 3158 }, { "epoch": 0.3897233445393702, "grad_norm": 0.5309823155403137, "learning_rate": 1e-05, "loss": 0.5386, "step": 3159 }, { "epoch": 0.38984671375258306, "grad_norm": 0.5431652665138245, "learning_rate": 1e-05, "loss": 0.5072, "step": 3160 }, { "epoch": 0.3899700829657959, "grad_norm": 0.611786961555481, "learning_rate": 1e-05, "loss": 0.5782, "step": 3161 }, { "epoch": 0.3900934521790087, "grad_norm": 0.5261616706848145, "learning_rate": 1e-05, "loss": 0.4697, "step": 3162 }, { "epoch": 0.3902168213922216, "grad_norm": 0.6078078150749207, "learning_rate": 1e-05, "loss": 0.5721, "step": 3163 }, { "epoch": 0.3903401906054344, "grad_norm": 0.6052103042602539, "learning_rate": 1e-05, "loss": 0.5572, "step": 3164 }, { "epoch": 0.3904635598186473, "grad_norm": 0.6387269496917725, "learning_rate": 1e-05, "loss": 0.7133, "step": 3165 }, { "epoch": 0.3905869290318601, "grad_norm": 0.6051912903785706, "learning_rate": 1e-05, "loss": 0.6278, "step": 3166 }, { "epoch": 0.3907102982450729, "grad_norm": 0.6118505597114563, "learning_rate": 1e-05, "loss": 0.5974, "step": 3167 }, { "epoch": 0.3908336674582858, "grad_norm": 0.5834602117538452, "learning_rate": 1e-05, "loss": 0.6353, "step": 3168 }, { "epoch": 0.3909570366714986, "grad_norm": 0.6050713658332825, "learning_rate": 1e-05, "loss": 0.5961, "step": 3169 }, { "epoch": 0.3910804058847115, "grad_norm": 0.595632016658783, "learning_rate": 1e-05, "loss": 0.6682, "step": 3170 }, { "epoch": 0.3912037750979243, "grad_norm": 0.580701470375061, "learning_rate": 1e-05, "loss": 0.5357, "step": 3171 }, { "epoch": 0.39132714431113713, "grad_norm": 0.5983871221542358, "learning_rate": 1e-05, "loss": 0.6678, "step": 3172 }, { "epoch": 0.39145051352435, "grad_norm": 0.6230427026748657, "learning_rate": 1e-05, "loss": 0.6773, "step": 3173 }, { "epoch": 0.39157388273756283, "grad_norm": 0.6309583187103271, "learning_rate": 1e-05, "loss": 0.5278, "step": 3174 }, { "epoch": 0.3916972519507757, "grad_norm": 0.5931730270385742, "learning_rate": 1e-05, "loss": 0.5972, "step": 3175 }, { "epoch": 0.3918206211639885, "grad_norm": 0.5920822620391846, "learning_rate": 1e-05, "loss": 0.633, "step": 3176 }, { "epoch": 0.39194399037720135, "grad_norm": 0.5936835408210754, "learning_rate": 1e-05, "loss": 0.6484, "step": 3177 }, { "epoch": 0.3920673595904142, "grad_norm": 0.6315973401069641, "learning_rate": 1e-05, "loss": 0.6017, "step": 3178 }, { "epoch": 0.39219072880362704, "grad_norm": 0.6527517437934875, "learning_rate": 1e-05, "loss": 0.7768, "step": 3179 }, { "epoch": 0.3923140980168399, "grad_norm": 0.6298494935035706, "learning_rate": 1e-05, "loss": 0.6063, "step": 3180 }, { "epoch": 0.39243746723005274, "grad_norm": 0.6096734404563904, "learning_rate": 1e-05, "loss": 0.6026, "step": 3181 }, { "epoch": 0.39256083644326556, "grad_norm": 0.5694643259048462, "learning_rate": 1e-05, "loss": 0.607, "step": 3182 }, { "epoch": 0.39268420565647844, "grad_norm": 0.5795056223869324, "learning_rate": 1e-05, "loss": 0.5404, "step": 3183 }, { "epoch": 0.39280757486969126, "grad_norm": 0.57166987657547, "learning_rate": 1e-05, "loss": 0.6217, "step": 3184 }, { "epoch": 0.39293094408290413, "grad_norm": 0.5665484070777893, "learning_rate": 1e-05, "loss": 0.622, "step": 3185 }, { "epoch": 0.39305431329611695, "grad_norm": 0.6107555627822876, "learning_rate": 1e-05, "loss": 0.5372, "step": 3186 }, { "epoch": 0.3931776825093298, "grad_norm": 0.6196961998939514, "learning_rate": 1e-05, "loss": 0.5767, "step": 3187 }, { "epoch": 0.39330105172254265, "grad_norm": 0.5910053849220276, "learning_rate": 1e-05, "loss": 0.6302, "step": 3188 }, { "epoch": 0.39342442093575547, "grad_norm": 0.5458456873893738, "learning_rate": 1e-05, "loss": 0.4891, "step": 3189 }, { "epoch": 0.39354779014896835, "grad_norm": 0.5746874809265137, "learning_rate": 1e-05, "loss": 0.5615, "step": 3190 }, { "epoch": 0.39367115936218117, "grad_norm": 0.5496452450752258, "learning_rate": 1e-05, "loss": 0.5104, "step": 3191 }, { "epoch": 0.393794528575394, "grad_norm": 0.638568639755249, "learning_rate": 1e-05, "loss": 0.5492, "step": 3192 }, { "epoch": 0.39391789778860686, "grad_norm": 0.6214975118637085, "learning_rate": 1e-05, "loss": 0.6839, "step": 3193 }, { "epoch": 0.3940412670018197, "grad_norm": 0.47865647077560425, "learning_rate": 1e-05, "loss": 0.4716, "step": 3194 }, { "epoch": 0.39416463621503256, "grad_norm": 0.5747140049934387, "learning_rate": 1e-05, "loss": 0.567, "step": 3195 }, { "epoch": 0.3942880054282454, "grad_norm": 0.5831957459449768, "learning_rate": 1e-05, "loss": 0.5932, "step": 3196 }, { "epoch": 0.3944113746414582, "grad_norm": 0.5479838848114014, "learning_rate": 1e-05, "loss": 0.526, "step": 3197 }, { "epoch": 0.3945347438546711, "grad_norm": 0.5748164057731628, "learning_rate": 1e-05, "loss": 0.5445, "step": 3198 }, { "epoch": 0.3946581130678839, "grad_norm": 0.5579365491867065, "learning_rate": 1e-05, "loss": 0.5481, "step": 3199 }, { "epoch": 0.3947814822810968, "grad_norm": 0.5585082173347473, "learning_rate": 1e-05, "loss": 0.6346, "step": 3200 }, { "epoch": 0.3949048514943096, "grad_norm": 0.6571926474571228, "learning_rate": 1e-05, "loss": 0.627, "step": 3201 }, { "epoch": 0.3950282207075224, "grad_norm": 0.5029804110527039, "learning_rate": 1e-05, "loss": 0.4343, "step": 3202 }, { "epoch": 0.3951515899207353, "grad_norm": 0.565936803817749, "learning_rate": 1e-05, "loss": 0.6012, "step": 3203 }, { "epoch": 0.3952749591339481, "grad_norm": 0.6119303107261658, "learning_rate": 1e-05, "loss": 0.6407, "step": 3204 }, { "epoch": 0.395398328347161, "grad_norm": 0.5786303281784058, "learning_rate": 1e-05, "loss": 0.548, "step": 3205 }, { "epoch": 0.3955216975603738, "grad_norm": 0.5361892580986023, "learning_rate": 1e-05, "loss": 0.4777, "step": 3206 }, { "epoch": 0.39564506677358663, "grad_norm": 0.6822327375411987, "learning_rate": 1e-05, "loss": 0.5446, "step": 3207 }, { "epoch": 0.3957684359867995, "grad_norm": 0.5510512590408325, "learning_rate": 1e-05, "loss": 0.4958, "step": 3208 }, { "epoch": 0.3958918052000123, "grad_norm": 0.648247480392456, "learning_rate": 1e-05, "loss": 0.7042, "step": 3209 }, { "epoch": 0.3960151744132252, "grad_norm": 0.5226948857307434, "learning_rate": 1e-05, "loss": 0.5031, "step": 3210 }, { "epoch": 0.396138543626438, "grad_norm": 0.6518147587776184, "learning_rate": 1e-05, "loss": 0.6564, "step": 3211 }, { "epoch": 0.39626191283965084, "grad_norm": 0.5699924826622009, "learning_rate": 1e-05, "loss": 0.6116, "step": 3212 }, { "epoch": 0.3963852820528637, "grad_norm": 0.6016775965690613, "learning_rate": 1e-05, "loss": 0.5783, "step": 3213 }, { "epoch": 0.39650865126607654, "grad_norm": 0.5831443667411804, "learning_rate": 1e-05, "loss": 0.5728, "step": 3214 }, { "epoch": 0.3966320204792894, "grad_norm": 0.5651699304580688, "learning_rate": 1e-05, "loss": 0.4641, "step": 3215 }, { "epoch": 0.39675538969250224, "grad_norm": 0.5970153212547302, "learning_rate": 1e-05, "loss": 0.5405, "step": 3216 }, { "epoch": 0.39687875890571506, "grad_norm": 0.5828655362129211, "learning_rate": 1e-05, "loss": 0.5676, "step": 3217 }, { "epoch": 0.39700212811892793, "grad_norm": 0.5512237548828125, "learning_rate": 1e-05, "loss": 0.5482, "step": 3218 }, { "epoch": 0.39712549733214075, "grad_norm": 0.5804588198661804, "learning_rate": 1e-05, "loss": 0.5998, "step": 3219 }, { "epoch": 0.39724886654535363, "grad_norm": 0.6071969270706177, "learning_rate": 1e-05, "loss": 0.589, "step": 3220 }, { "epoch": 0.39737223575856645, "grad_norm": 0.595180869102478, "learning_rate": 1e-05, "loss": 0.6354, "step": 3221 }, { "epoch": 0.39749560497177927, "grad_norm": 0.7344764471054077, "learning_rate": 1e-05, "loss": 0.6869, "step": 3222 }, { "epoch": 0.39761897418499215, "grad_norm": 0.5042850971221924, "learning_rate": 1e-05, "loss": 0.5014, "step": 3223 }, { "epoch": 0.39774234339820497, "grad_norm": 0.5853902101516724, "learning_rate": 1e-05, "loss": 0.6613, "step": 3224 }, { "epoch": 0.39786571261141784, "grad_norm": 0.5210365056991577, "learning_rate": 1e-05, "loss": 0.5144, "step": 3225 }, { "epoch": 0.39798908182463066, "grad_norm": 0.5848454236984253, "learning_rate": 1e-05, "loss": 0.551, "step": 3226 }, { "epoch": 0.3981124510378435, "grad_norm": 0.5812278985977173, "learning_rate": 1e-05, "loss": 0.5989, "step": 3227 }, { "epoch": 0.39823582025105636, "grad_norm": 0.5569229125976562, "learning_rate": 1e-05, "loss": 0.5273, "step": 3228 }, { "epoch": 0.3983591894642692, "grad_norm": 0.5983976125717163, "learning_rate": 1e-05, "loss": 0.5517, "step": 3229 }, { "epoch": 0.39848255867748206, "grad_norm": 0.5471674799919128, "learning_rate": 1e-05, "loss": 0.5818, "step": 3230 }, { "epoch": 0.3986059278906949, "grad_norm": 0.6119272708892822, "learning_rate": 1e-05, "loss": 0.6557, "step": 3231 }, { "epoch": 0.3987292971039077, "grad_norm": 0.545619010925293, "learning_rate": 1e-05, "loss": 0.5892, "step": 3232 }, { "epoch": 0.3988526663171206, "grad_norm": 0.6158991456031799, "learning_rate": 1e-05, "loss": 0.6186, "step": 3233 }, { "epoch": 0.3989760355303334, "grad_norm": 0.5945494771003723, "learning_rate": 1e-05, "loss": 0.548, "step": 3234 }, { "epoch": 0.39909940474354627, "grad_norm": 0.5570152997970581, "learning_rate": 1e-05, "loss": 0.5811, "step": 3235 }, { "epoch": 0.3992227739567591, "grad_norm": 0.5415077805519104, "learning_rate": 1e-05, "loss": 0.538, "step": 3236 }, { "epoch": 0.3993461431699719, "grad_norm": 0.5734636187553406, "learning_rate": 1e-05, "loss": 0.5359, "step": 3237 }, { "epoch": 0.3994695123831848, "grad_norm": 0.6353350281715393, "learning_rate": 1e-05, "loss": 0.6335, "step": 3238 }, { "epoch": 0.3995928815963976, "grad_norm": 0.5289315581321716, "learning_rate": 1e-05, "loss": 0.5156, "step": 3239 }, { "epoch": 0.3997162508096105, "grad_norm": 0.5619062185287476, "learning_rate": 1e-05, "loss": 0.5427, "step": 3240 }, { "epoch": 0.3998396200228233, "grad_norm": 0.5804216265678406, "learning_rate": 1e-05, "loss": 0.6041, "step": 3241 }, { "epoch": 0.3999629892360361, "grad_norm": 0.5052801370620728, "learning_rate": 1e-05, "loss": 0.4751, "step": 3242 }, { "epoch": 0.400086358449249, "grad_norm": 0.5741432905197144, "learning_rate": 1e-05, "loss": 0.5585, "step": 3243 }, { "epoch": 0.4002097276624618, "grad_norm": 0.5668405890464783, "learning_rate": 1e-05, "loss": 0.6112, "step": 3244 }, { "epoch": 0.4003330968756747, "grad_norm": 0.6085237264633179, "learning_rate": 1e-05, "loss": 0.5993, "step": 3245 }, { "epoch": 0.4004564660888875, "grad_norm": 0.5506681799888611, "learning_rate": 1e-05, "loss": 0.5277, "step": 3246 }, { "epoch": 0.40057983530210034, "grad_norm": 0.5457934141159058, "learning_rate": 1e-05, "loss": 0.5218, "step": 3247 }, { "epoch": 0.4007032045153132, "grad_norm": 0.5500662326812744, "learning_rate": 1e-05, "loss": 0.5375, "step": 3248 }, { "epoch": 0.40082657372852604, "grad_norm": 0.5685248374938965, "learning_rate": 1e-05, "loss": 0.5195, "step": 3249 }, { "epoch": 0.4009499429417389, "grad_norm": 0.5347602367401123, "learning_rate": 1e-05, "loss": 0.5041, "step": 3250 }, { "epoch": 0.40107331215495173, "grad_norm": 0.7179406881332397, "learning_rate": 1e-05, "loss": 0.6331, "step": 3251 }, { "epoch": 0.40119668136816455, "grad_norm": 0.5636001825332642, "learning_rate": 1e-05, "loss": 0.52, "step": 3252 }, { "epoch": 0.40132005058137743, "grad_norm": 0.5877425074577332, "learning_rate": 1e-05, "loss": 0.5581, "step": 3253 }, { "epoch": 0.40144341979459025, "grad_norm": 0.5692852139472961, "learning_rate": 1e-05, "loss": 0.5681, "step": 3254 }, { "epoch": 0.4015667890078031, "grad_norm": 0.5112485885620117, "learning_rate": 1e-05, "loss": 0.4958, "step": 3255 }, { "epoch": 0.40169015822101595, "grad_norm": 0.6076973676681519, "learning_rate": 1e-05, "loss": 0.6356, "step": 3256 }, { "epoch": 0.40181352743422877, "grad_norm": 0.5516545176506042, "learning_rate": 1e-05, "loss": 0.5124, "step": 3257 }, { "epoch": 0.40193689664744164, "grad_norm": 0.6871640682220459, "learning_rate": 1e-05, "loss": 0.6247, "step": 3258 }, { "epoch": 0.40206026586065446, "grad_norm": 0.5811910033226013, "learning_rate": 1e-05, "loss": 0.5875, "step": 3259 }, { "epoch": 0.40218363507386734, "grad_norm": 0.6339962482452393, "learning_rate": 1e-05, "loss": 0.6501, "step": 3260 }, { "epoch": 0.40230700428708016, "grad_norm": 0.5480230450630188, "learning_rate": 1e-05, "loss": 0.5597, "step": 3261 }, { "epoch": 0.402430373500293, "grad_norm": 0.6243653893470764, "learning_rate": 1e-05, "loss": 0.6935, "step": 3262 }, { "epoch": 0.40255374271350586, "grad_norm": 0.5709531903266907, "learning_rate": 1e-05, "loss": 0.5695, "step": 3263 }, { "epoch": 0.4026771119267187, "grad_norm": 0.5439247488975525, "learning_rate": 1e-05, "loss": 0.5704, "step": 3264 }, { "epoch": 0.40280048113993155, "grad_norm": 0.5940728783607483, "learning_rate": 1e-05, "loss": 0.5875, "step": 3265 }, { "epoch": 0.4029238503531444, "grad_norm": 0.5907882452011108, "learning_rate": 1e-05, "loss": 0.4593, "step": 3266 }, { "epoch": 0.4030472195663572, "grad_norm": 0.5690090656280518, "learning_rate": 1e-05, "loss": 0.6217, "step": 3267 }, { "epoch": 0.40317058877957007, "grad_norm": 0.6248504519462585, "learning_rate": 1e-05, "loss": 0.717, "step": 3268 }, { "epoch": 0.4032939579927829, "grad_norm": 0.6151852607727051, "learning_rate": 1e-05, "loss": 0.6548, "step": 3269 }, { "epoch": 0.40341732720599577, "grad_norm": 0.6048724055290222, "learning_rate": 1e-05, "loss": 0.6206, "step": 3270 }, { "epoch": 0.4035406964192086, "grad_norm": 0.5843302011489868, "learning_rate": 1e-05, "loss": 0.6906, "step": 3271 }, { "epoch": 0.4036640656324214, "grad_norm": 0.6204142570495605, "learning_rate": 1e-05, "loss": 0.6321, "step": 3272 }, { "epoch": 0.4037874348456343, "grad_norm": 0.5077176690101624, "learning_rate": 1e-05, "loss": 0.532, "step": 3273 }, { "epoch": 0.4039108040588471, "grad_norm": 0.5816966891288757, "learning_rate": 1e-05, "loss": 0.5387, "step": 3274 }, { "epoch": 0.40403417327206, "grad_norm": 0.5773031711578369, "learning_rate": 1e-05, "loss": 0.627, "step": 3275 }, { "epoch": 0.4041575424852728, "grad_norm": 0.5998937487602234, "learning_rate": 1e-05, "loss": 0.5651, "step": 3276 }, { "epoch": 0.4042809116984856, "grad_norm": 0.5016205310821533, "learning_rate": 1e-05, "loss": 0.4605, "step": 3277 }, { "epoch": 0.4044042809116985, "grad_norm": 0.5979008078575134, "learning_rate": 1e-05, "loss": 0.6487, "step": 3278 }, { "epoch": 0.4045276501249113, "grad_norm": 0.6193057298660278, "learning_rate": 1e-05, "loss": 0.586, "step": 3279 }, { "epoch": 0.4046510193381242, "grad_norm": 0.5530032515525818, "learning_rate": 1e-05, "loss": 0.5375, "step": 3280 }, { "epoch": 0.404774388551337, "grad_norm": 0.4997609555721283, "learning_rate": 1e-05, "loss": 0.5269, "step": 3281 }, { "epoch": 0.40489775776454984, "grad_norm": 0.7150037288665771, "learning_rate": 1e-05, "loss": 0.6116, "step": 3282 }, { "epoch": 0.4050211269777627, "grad_norm": 0.5778046250343323, "learning_rate": 1e-05, "loss": 0.6092, "step": 3283 }, { "epoch": 0.40514449619097553, "grad_norm": 0.5535501837730408, "learning_rate": 1e-05, "loss": 0.5098, "step": 3284 }, { "epoch": 0.4052678654041884, "grad_norm": 0.5704671740531921, "learning_rate": 1e-05, "loss": 0.5604, "step": 3285 }, { "epoch": 0.40539123461740123, "grad_norm": 0.5931574702262878, "learning_rate": 1e-05, "loss": 0.6709, "step": 3286 }, { "epoch": 0.40551460383061405, "grad_norm": 0.5485065579414368, "learning_rate": 1e-05, "loss": 0.5413, "step": 3287 }, { "epoch": 0.4056379730438269, "grad_norm": 0.5413802266120911, "learning_rate": 1e-05, "loss": 0.5467, "step": 3288 }, { "epoch": 0.40576134225703975, "grad_norm": 0.5758828520774841, "learning_rate": 1e-05, "loss": 0.6333, "step": 3289 }, { "epoch": 0.4058847114702526, "grad_norm": 0.5976829528808594, "learning_rate": 1e-05, "loss": 0.5588, "step": 3290 }, { "epoch": 0.40600808068346544, "grad_norm": 0.6424011588096619, "learning_rate": 1e-05, "loss": 0.6594, "step": 3291 }, { "epoch": 0.40613144989667826, "grad_norm": 0.5891597867012024, "learning_rate": 1e-05, "loss": 0.5694, "step": 3292 }, { "epoch": 0.40625481910989114, "grad_norm": 0.5457882285118103, "learning_rate": 1e-05, "loss": 0.5396, "step": 3293 }, { "epoch": 0.40637818832310396, "grad_norm": 0.6231094002723694, "learning_rate": 1e-05, "loss": 0.5817, "step": 3294 }, { "epoch": 0.40650155753631684, "grad_norm": 0.535780131816864, "learning_rate": 1e-05, "loss": 0.5142, "step": 3295 }, { "epoch": 0.40662492674952966, "grad_norm": 0.5736764073371887, "learning_rate": 1e-05, "loss": 0.5652, "step": 3296 }, { "epoch": 0.4067482959627425, "grad_norm": 0.5536647439002991, "learning_rate": 1e-05, "loss": 0.5543, "step": 3297 }, { "epoch": 0.40687166517595535, "grad_norm": 0.5746094584465027, "learning_rate": 1e-05, "loss": 0.5885, "step": 3298 }, { "epoch": 0.4069950343891682, "grad_norm": 0.5156181454658508, "learning_rate": 1e-05, "loss": 0.5215, "step": 3299 }, { "epoch": 0.40711840360238105, "grad_norm": 0.5430619120597839, "learning_rate": 1e-05, "loss": 0.4984, "step": 3300 }, { "epoch": 0.40724177281559387, "grad_norm": 0.59913170337677, "learning_rate": 1e-05, "loss": 0.6257, "step": 3301 }, { "epoch": 0.4073651420288067, "grad_norm": 0.5874570608139038, "learning_rate": 1e-05, "loss": 0.5637, "step": 3302 }, { "epoch": 0.40748851124201957, "grad_norm": 0.5851806402206421, "learning_rate": 1e-05, "loss": 0.5363, "step": 3303 }, { "epoch": 0.4076118804552324, "grad_norm": 0.6198960542678833, "learning_rate": 1e-05, "loss": 0.6187, "step": 3304 }, { "epoch": 0.40773524966844527, "grad_norm": 0.5756022334098816, "learning_rate": 1e-05, "loss": 0.5731, "step": 3305 }, { "epoch": 0.4078586188816581, "grad_norm": 0.5565567016601562, "learning_rate": 1e-05, "loss": 0.5716, "step": 3306 }, { "epoch": 0.4079819880948709, "grad_norm": 0.5086644887924194, "learning_rate": 1e-05, "loss": 0.4516, "step": 3307 }, { "epoch": 0.4081053573080838, "grad_norm": 0.5192692279815674, "learning_rate": 1e-05, "loss": 0.5609, "step": 3308 }, { "epoch": 0.4082287265212966, "grad_norm": 0.6198065280914307, "learning_rate": 1e-05, "loss": 0.6146, "step": 3309 }, { "epoch": 0.4083520957345095, "grad_norm": 0.5855452418327332, "learning_rate": 1e-05, "loss": 0.6139, "step": 3310 }, { "epoch": 0.4084754649477223, "grad_norm": 0.5161016583442688, "learning_rate": 1e-05, "loss": 0.455, "step": 3311 }, { "epoch": 0.4085988341609351, "grad_norm": 0.5775180459022522, "learning_rate": 1e-05, "loss": 0.6299, "step": 3312 }, { "epoch": 0.408722203374148, "grad_norm": 0.5375611782073975, "learning_rate": 1e-05, "loss": 0.556, "step": 3313 }, { "epoch": 0.4088455725873608, "grad_norm": 0.502868115901947, "learning_rate": 1e-05, "loss": 0.4372, "step": 3314 }, { "epoch": 0.4089689418005737, "grad_norm": 0.6184990406036377, "learning_rate": 1e-05, "loss": 0.6179, "step": 3315 }, { "epoch": 0.4090923110137865, "grad_norm": 0.5464755892753601, "learning_rate": 1e-05, "loss": 0.6138, "step": 3316 }, { "epoch": 0.40921568022699933, "grad_norm": 0.5810965895652771, "learning_rate": 1e-05, "loss": 0.5615, "step": 3317 }, { "epoch": 0.4093390494402122, "grad_norm": 0.5990710258483887, "learning_rate": 1e-05, "loss": 0.5466, "step": 3318 }, { "epoch": 0.40946241865342503, "grad_norm": 0.6128894686698914, "learning_rate": 1e-05, "loss": 0.6137, "step": 3319 }, { "epoch": 0.4095857878666379, "grad_norm": 0.6580965518951416, "learning_rate": 1e-05, "loss": 0.5397, "step": 3320 }, { "epoch": 0.4097091570798507, "grad_norm": 0.5984264016151428, "learning_rate": 1e-05, "loss": 0.6237, "step": 3321 }, { "epoch": 0.40983252629306355, "grad_norm": 0.5426461100578308, "learning_rate": 1e-05, "loss": 0.5428, "step": 3322 }, { "epoch": 0.4099558955062764, "grad_norm": 0.6594126224517822, "learning_rate": 1e-05, "loss": 0.68, "step": 3323 }, { "epoch": 0.41007926471948924, "grad_norm": 0.6021159887313843, "learning_rate": 1e-05, "loss": 0.6678, "step": 3324 }, { "epoch": 0.4102026339327021, "grad_norm": 0.636104941368103, "learning_rate": 1e-05, "loss": 0.642, "step": 3325 }, { "epoch": 0.41032600314591494, "grad_norm": 0.5889078974723816, "learning_rate": 1e-05, "loss": 0.6532, "step": 3326 }, { "epoch": 0.41044937235912776, "grad_norm": 0.6560375690460205, "learning_rate": 1e-05, "loss": 0.6153, "step": 3327 }, { "epoch": 0.41057274157234064, "grad_norm": 0.5404573678970337, "learning_rate": 1e-05, "loss": 0.5447, "step": 3328 }, { "epoch": 0.41069611078555346, "grad_norm": 0.5345743894577026, "learning_rate": 1e-05, "loss": 0.4635, "step": 3329 }, { "epoch": 0.41081947999876633, "grad_norm": 0.5348767638206482, "learning_rate": 1e-05, "loss": 0.4975, "step": 3330 }, { "epoch": 0.41094284921197916, "grad_norm": 0.5519302487373352, "learning_rate": 1e-05, "loss": 0.6093, "step": 3331 }, { "epoch": 0.411066218425192, "grad_norm": 0.5708015561103821, "learning_rate": 1e-05, "loss": 0.5652, "step": 3332 }, { "epoch": 0.41118958763840485, "grad_norm": 0.5589155554771423, "learning_rate": 1e-05, "loss": 0.5065, "step": 3333 }, { "epoch": 0.4113129568516177, "grad_norm": 0.5898712873458862, "learning_rate": 1e-05, "loss": 0.6475, "step": 3334 }, { "epoch": 0.41143632606483055, "grad_norm": 0.6197591423988342, "learning_rate": 1e-05, "loss": 0.6361, "step": 3335 }, { "epoch": 0.41155969527804337, "grad_norm": 0.5912361145019531, "learning_rate": 1e-05, "loss": 0.6249, "step": 3336 }, { "epoch": 0.4116830644912562, "grad_norm": 0.5660859942436218, "learning_rate": 1e-05, "loss": 0.5519, "step": 3337 }, { "epoch": 0.41180643370446907, "grad_norm": 0.5770236849784851, "learning_rate": 1e-05, "loss": 0.5581, "step": 3338 }, { "epoch": 0.4119298029176819, "grad_norm": 0.6640840768814087, "learning_rate": 1e-05, "loss": 0.6015, "step": 3339 }, { "epoch": 0.41205317213089476, "grad_norm": 0.49997982382774353, "learning_rate": 1e-05, "loss": 0.5294, "step": 3340 }, { "epoch": 0.4121765413441076, "grad_norm": 0.5662977695465088, "learning_rate": 1e-05, "loss": 0.5882, "step": 3341 }, { "epoch": 0.4122999105573204, "grad_norm": 0.5630486011505127, "learning_rate": 1e-05, "loss": 0.5654, "step": 3342 }, { "epoch": 0.4124232797705333, "grad_norm": 0.5368142127990723, "learning_rate": 1e-05, "loss": 0.5139, "step": 3343 }, { "epoch": 0.4125466489837461, "grad_norm": 0.587110161781311, "learning_rate": 1e-05, "loss": 0.5953, "step": 3344 }, { "epoch": 0.412670018196959, "grad_norm": 0.5850639343261719, "learning_rate": 1e-05, "loss": 0.5518, "step": 3345 }, { "epoch": 0.4127933874101718, "grad_norm": 0.5447174310684204, "learning_rate": 1e-05, "loss": 0.5355, "step": 3346 }, { "epoch": 0.4129167566233846, "grad_norm": 0.5716145634651184, "learning_rate": 1e-05, "loss": 0.5024, "step": 3347 }, { "epoch": 0.4130401258365975, "grad_norm": 0.5490842461585999, "learning_rate": 1e-05, "loss": 0.4975, "step": 3348 }, { "epoch": 0.4131634950498103, "grad_norm": 0.6541851758956909, "learning_rate": 1e-05, "loss": 0.6108, "step": 3349 }, { "epoch": 0.41328686426302313, "grad_norm": 0.5725159049034119, "learning_rate": 1e-05, "loss": 0.6206, "step": 3350 }, { "epoch": 0.413410233476236, "grad_norm": 0.4740223288536072, "learning_rate": 1e-05, "loss": 0.4434, "step": 3351 }, { "epoch": 0.41353360268944883, "grad_norm": 0.6395127773284912, "learning_rate": 1e-05, "loss": 0.699, "step": 3352 }, { "epoch": 0.4136569719026617, "grad_norm": 0.5111945867538452, "learning_rate": 1e-05, "loss": 0.5138, "step": 3353 }, { "epoch": 0.41378034111587453, "grad_norm": 0.6319080591201782, "learning_rate": 1e-05, "loss": 0.7145, "step": 3354 }, { "epoch": 0.41390371032908735, "grad_norm": 0.6370152235031128, "learning_rate": 1e-05, "loss": 0.6129, "step": 3355 }, { "epoch": 0.4140270795423002, "grad_norm": 0.5722171664237976, "learning_rate": 1e-05, "loss": 0.5652, "step": 3356 }, { "epoch": 0.41415044875551305, "grad_norm": 0.5912392735481262, "learning_rate": 1e-05, "loss": 0.6139, "step": 3357 }, { "epoch": 0.4142738179687259, "grad_norm": 0.5623732209205627, "learning_rate": 1e-05, "loss": 0.5264, "step": 3358 }, { "epoch": 0.41439718718193874, "grad_norm": 0.5801467895507812, "learning_rate": 1e-05, "loss": 0.6506, "step": 3359 }, { "epoch": 0.41452055639515156, "grad_norm": 0.6673120260238647, "learning_rate": 1e-05, "loss": 0.6748, "step": 3360 }, { "epoch": 0.41464392560836444, "grad_norm": 0.5927623510360718, "learning_rate": 1e-05, "loss": 0.6227, "step": 3361 }, { "epoch": 0.41476729482157726, "grad_norm": 0.5632920265197754, "learning_rate": 1e-05, "loss": 0.5239, "step": 3362 }, { "epoch": 0.41489066403479014, "grad_norm": 0.5512875914573669, "learning_rate": 1e-05, "loss": 0.5982, "step": 3363 }, { "epoch": 0.41501403324800296, "grad_norm": 0.5657047033309937, "learning_rate": 1e-05, "loss": 0.5666, "step": 3364 }, { "epoch": 0.4151374024612158, "grad_norm": 0.5533481240272522, "learning_rate": 1e-05, "loss": 0.5872, "step": 3365 }, { "epoch": 0.41526077167442865, "grad_norm": 0.6191480755805969, "learning_rate": 1e-05, "loss": 0.7069, "step": 3366 }, { "epoch": 0.4153841408876415, "grad_norm": 0.6501120328903198, "learning_rate": 1e-05, "loss": 0.6955, "step": 3367 }, { "epoch": 0.41550751010085435, "grad_norm": 0.5779284834861755, "learning_rate": 1e-05, "loss": 0.5412, "step": 3368 }, { "epoch": 0.41563087931406717, "grad_norm": 0.666874885559082, "learning_rate": 1e-05, "loss": 0.707, "step": 3369 }, { "epoch": 0.41575424852728, "grad_norm": 0.5695516467094421, "learning_rate": 1e-05, "loss": 0.6174, "step": 3370 }, { "epoch": 0.41587761774049287, "grad_norm": 0.6305562853813171, "learning_rate": 1e-05, "loss": 0.6232, "step": 3371 }, { "epoch": 0.4160009869537057, "grad_norm": 0.6067129969596863, "learning_rate": 1e-05, "loss": 0.538, "step": 3372 }, { "epoch": 0.41612435616691856, "grad_norm": 0.5105657577514648, "learning_rate": 1e-05, "loss": 0.5077, "step": 3373 }, { "epoch": 0.4162477253801314, "grad_norm": 0.4637826383113861, "learning_rate": 1e-05, "loss": 0.4513, "step": 3374 }, { "epoch": 0.4163710945933442, "grad_norm": 0.5536416172981262, "learning_rate": 1e-05, "loss": 0.6182, "step": 3375 }, { "epoch": 0.4164944638065571, "grad_norm": 0.6716020107269287, "learning_rate": 1e-05, "loss": 0.6535, "step": 3376 }, { "epoch": 0.4166178330197699, "grad_norm": 0.6138641834259033, "learning_rate": 1e-05, "loss": 0.5434, "step": 3377 }, { "epoch": 0.4167412022329828, "grad_norm": 0.6027225852012634, "learning_rate": 1e-05, "loss": 0.5997, "step": 3378 }, { "epoch": 0.4168645714461956, "grad_norm": 0.6056876182556152, "learning_rate": 1e-05, "loss": 0.5738, "step": 3379 }, { "epoch": 0.4169879406594084, "grad_norm": 0.5925717949867249, "learning_rate": 1e-05, "loss": 0.5908, "step": 3380 }, { "epoch": 0.4171113098726213, "grad_norm": 0.5672228336334229, "learning_rate": 1e-05, "loss": 0.5942, "step": 3381 }, { "epoch": 0.4172346790858341, "grad_norm": 0.6406817436218262, "learning_rate": 1e-05, "loss": 0.6381, "step": 3382 }, { "epoch": 0.417358048299047, "grad_norm": 0.5726128220558167, "learning_rate": 1e-05, "loss": 0.5302, "step": 3383 }, { "epoch": 0.4174814175122598, "grad_norm": 0.5573223829269409, "learning_rate": 1e-05, "loss": 0.5171, "step": 3384 }, { "epoch": 0.41760478672547263, "grad_norm": 0.551387369632721, "learning_rate": 1e-05, "loss": 0.5633, "step": 3385 }, { "epoch": 0.4177281559386855, "grad_norm": 0.4917445182800293, "learning_rate": 1e-05, "loss": 0.5134, "step": 3386 }, { "epoch": 0.41785152515189833, "grad_norm": 0.5485051870346069, "learning_rate": 1e-05, "loss": 0.5247, "step": 3387 }, { "epoch": 0.4179748943651112, "grad_norm": 0.6257905960083008, "learning_rate": 1e-05, "loss": 0.6311, "step": 3388 }, { "epoch": 0.418098263578324, "grad_norm": 0.6088429093360901, "learning_rate": 1e-05, "loss": 0.5883, "step": 3389 }, { "epoch": 0.41822163279153685, "grad_norm": 0.5216710567474365, "learning_rate": 1e-05, "loss": 0.4053, "step": 3390 }, { "epoch": 0.4183450020047497, "grad_norm": 0.627972424030304, "learning_rate": 1e-05, "loss": 0.5546, "step": 3391 }, { "epoch": 0.41846837121796254, "grad_norm": 0.6931708455085754, "learning_rate": 1e-05, "loss": 0.5755, "step": 3392 }, { "epoch": 0.4185917404311754, "grad_norm": 0.5610859394073486, "learning_rate": 1e-05, "loss": 0.552, "step": 3393 }, { "epoch": 0.41871510964438824, "grad_norm": 0.5659007430076599, "learning_rate": 1e-05, "loss": 0.5897, "step": 3394 }, { "epoch": 0.41883847885760106, "grad_norm": 0.6059523224830627, "learning_rate": 1e-05, "loss": 0.6289, "step": 3395 }, { "epoch": 0.41896184807081394, "grad_norm": 0.5554589033126831, "learning_rate": 1e-05, "loss": 0.5533, "step": 3396 }, { "epoch": 0.41908521728402676, "grad_norm": 0.6091223359107971, "learning_rate": 1e-05, "loss": 0.6817, "step": 3397 }, { "epoch": 0.41920858649723963, "grad_norm": 0.6317917108535767, "learning_rate": 1e-05, "loss": 0.6563, "step": 3398 }, { "epoch": 0.41933195571045245, "grad_norm": 0.5941531658172607, "learning_rate": 1e-05, "loss": 0.6421, "step": 3399 }, { "epoch": 0.4194553249236653, "grad_norm": 0.5854235887527466, "learning_rate": 1e-05, "loss": 0.5539, "step": 3400 }, { "epoch": 0.41957869413687815, "grad_norm": 0.6766232848167419, "learning_rate": 1e-05, "loss": 0.7854, "step": 3401 }, { "epoch": 0.41970206335009097, "grad_norm": 0.5087511539459229, "learning_rate": 1e-05, "loss": 0.5014, "step": 3402 }, { "epoch": 0.41982543256330385, "grad_norm": 0.5305067300796509, "learning_rate": 1e-05, "loss": 0.5304, "step": 3403 }, { "epoch": 0.41994880177651667, "grad_norm": 0.5546717047691345, "learning_rate": 1e-05, "loss": 0.4903, "step": 3404 }, { "epoch": 0.4200721709897295, "grad_norm": 0.6614811420440674, "learning_rate": 1e-05, "loss": 0.6028, "step": 3405 }, { "epoch": 0.42019554020294236, "grad_norm": 0.5323296189308167, "learning_rate": 1e-05, "loss": 0.5095, "step": 3406 }, { "epoch": 0.4203189094161552, "grad_norm": 0.6272278428077698, "learning_rate": 1e-05, "loss": 0.6417, "step": 3407 }, { "epoch": 0.42044227862936806, "grad_norm": 0.5742906928062439, "learning_rate": 1e-05, "loss": 0.4543, "step": 3408 }, { "epoch": 0.4205656478425809, "grad_norm": 0.5646460652351379, "learning_rate": 1e-05, "loss": 0.5699, "step": 3409 }, { "epoch": 0.4206890170557937, "grad_norm": 0.5678768157958984, "learning_rate": 1e-05, "loss": 0.5618, "step": 3410 }, { "epoch": 0.4208123862690066, "grad_norm": 0.7067877054214478, "learning_rate": 1e-05, "loss": 0.6317, "step": 3411 }, { "epoch": 0.4209357554822194, "grad_norm": 0.6048859357833862, "learning_rate": 1e-05, "loss": 0.6035, "step": 3412 }, { "epoch": 0.4210591246954323, "grad_norm": 0.6384260058403015, "learning_rate": 1e-05, "loss": 0.6494, "step": 3413 }, { "epoch": 0.4211824939086451, "grad_norm": 0.6172804832458496, "learning_rate": 1e-05, "loss": 0.6597, "step": 3414 }, { "epoch": 0.4213058631218579, "grad_norm": 0.5288574695587158, "learning_rate": 1e-05, "loss": 0.5089, "step": 3415 }, { "epoch": 0.4214292323350708, "grad_norm": 1.3135806322097778, "learning_rate": 1e-05, "loss": 0.6105, "step": 3416 }, { "epoch": 0.4215526015482836, "grad_norm": 0.5462214946746826, "learning_rate": 1e-05, "loss": 0.5451, "step": 3417 }, { "epoch": 0.4216759707614965, "grad_norm": 0.5844091773033142, "learning_rate": 1e-05, "loss": 0.5849, "step": 3418 }, { "epoch": 0.4217993399747093, "grad_norm": 0.6047707200050354, "learning_rate": 1e-05, "loss": 0.6259, "step": 3419 }, { "epoch": 0.42192270918792213, "grad_norm": 0.6308290958404541, "learning_rate": 1e-05, "loss": 0.5738, "step": 3420 }, { "epoch": 0.422046078401135, "grad_norm": 0.538071870803833, "learning_rate": 1e-05, "loss": 0.5536, "step": 3421 }, { "epoch": 0.4221694476143478, "grad_norm": 0.6253432631492615, "learning_rate": 1e-05, "loss": 0.6158, "step": 3422 }, { "epoch": 0.4222928168275607, "grad_norm": 0.6170846819877625, "learning_rate": 1e-05, "loss": 0.6161, "step": 3423 }, { "epoch": 0.4224161860407735, "grad_norm": 0.5612276792526245, "learning_rate": 1e-05, "loss": 0.5116, "step": 3424 }, { "epoch": 0.42253955525398634, "grad_norm": 0.6686205267906189, "learning_rate": 1e-05, "loss": 0.7576, "step": 3425 }, { "epoch": 0.4226629244671992, "grad_norm": 0.5998547077178955, "learning_rate": 1e-05, "loss": 0.632, "step": 3426 }, { "epoch": 0.42278629368041204, "grad_norm": 0.6005386114120483, "learning_rate": 1e-05, "loss": 0.6028, "step": 3427 }, { "epoch": 0.4229096628936249, "grad_norm": 0.5497662425041199, "learning_rate": 1e-05, "loss": 0.5498, "step": 3428 }, { "epoch": 0.42303303210683774, "grad_norm": 0.6192296743392944, "learning_rate": 1e-05, "loss": 0.6512, "step": 3429 }, { "epoch": 0.42315640132005056, "grad_norm": 0.5648596286773682, "learning_rate": 1e-05, "loss": 0.5307, "step": 3430 }, { "epoch": 0.42327977053326343, "grad_norm": 0.5534960627555847, "learning_rate": 1e-05, "loss": 0.5837, "step": 3431 }, { "epoch": 0.42340313974647625, "grad_norm": 0.5770698189735413, "learning_rate": 1e-05, "loss": 0.4869, "step": 3432 }, { "epoch": 0.42352650895968913, "grad_norm": 0.6021262407302856, "learning_rate": 1e-05, "loss": 0.6374, "step": 3433 }, { "epoch": 0.42364987817290195, "grad_norm": 0.5785979628562927, "learning_rate": 1e-05, "loss": 0.6068, "step": 3434 }, { "epoch": 0.42377324738611477, "grad_norm": 0.5949110388755798, "learning_rate": 1e-05, "loss": 0.6238, "step": 3435 }, { "epoch": 0.42389661659932765, "grad_norm": 0.5489729046821594, "learning_rate": 1e-05, "loss": 0.5298, "step": 3436 }, { "epoch": 0.42401998581254047, "grad_norm": 0.6521816849708557, "learning_rate": 1e-05, "loss": 0.7077, "step": 3437 }, { "epoch": 0.42414335502575334, "grad_norm": 0.5306597948074341, "learning_rate": 1e-05, "loss": 0.55, "step": 3438 }, { "epoch": 0.42426672423896616, "grad_norm": 0.6446151733398438, "learning_rate": 1e-05, "loss": 0.5845, "step": 3439 }, { "epoch": 0.424390093452179, "grad_norm": 0.561445415019989, "learning_rate": 1e-05, "loss": 0.5676, "step": 3440 }, { "epoch": 0.42451346266539186, "grad_norm": 0.5441744923591614, "learning_rate": 1e-05, "loss": 0.5726, "step": 3441 }, { "epoch": 0.4246368318786047, "grad_norm": 0.6175549030303955, "learning_rate": 1e-05, "loss": 0.675, "step": 3442 }, { "epoch": 0.42476020109181756, "grad_norm": 0.5092188715934753, "learning_rate": 1e-05, "loss": 0.482, "step": 3443 }, { "epoch": 0.4248835703050304, "grad_norm": 0.6933817863464355, "learning_rate": 1e-05, "loss": 0.7151, "step": 3444 }, { "epoch": 0.4250069395182432, "grad_norm": 0.6158934831619263, "learning_rate": 1e-05, "loss": 0.6333, "step": 3445 }, { "epoch": 0.4251303087314561, "grad_norm": 0.5423116087913513, "learning_rate": 1e-05, "loss": 0.5385, "step": 3446 }, { "epoch": 0.4252536779446689, "grad_norm": 0.6306138634681702, "learning_rate": 1e-05, "loss": 0.5712, "step": 3447 }, { "epoch": 0.42537704715788177, "grad_norm": 0.5866636037826538, "learning_rate": 1e-05, "loss": 0.6207, "step": 3448 }, { "epoch": 0.4255004163710946, "grad_norm": 0.5421303510665894, "learning_rate": 1e-05, "loss": 0.5425, "step": 3449 }, { "epoch": 0.4256237855843074, "grad_norm": 0.5878863334655762, "learning_rate": 1e-05, "loss": 0.5174, "step": 3450 }, { "epoch": 0.4257471547975203, "grad_norm": 0.5825344920158386, "learning_rate": 1e-05, "loss": 0.5584, "step": 3451 }, { "epoch": 0.4258705240107331, "grad_norm": 0.5875107049942017, "learning_rate": 1e-05, "loss": 0.657, "step": 3452 }, { "epoch": 0.425993893223946, "grad_norm": 0.5677882432937622, "learning_rate": 1e-05, "loss": 0.5021, "step": 3453 }, { "epoch": 0.4261172624371588, "grad_norm": 0.6068867444992065, "learning_rate": 1e-05, "loss": 0.6389, "step": 3454 }, { "epoch": 0.4262406316503716, "grad_norm": 0.6114904880523682, "learning_rate": 1e-05, "loss": 0.5896, "step": 3455 }, { "epoch": 0.4263640008635845, "grad_norm": 0.6054381728172302, "learning_rate": 1e-05, "loss": 0.6781, "step": 3456 }, { "epoch": 0.4264873700767973, "grad_norm": 0.5499782562255859, "learning_rate": 1e-05, "loss": 0.5725, "step": 3457 }, { "epoch": 0.4266107392900102, "grad_norm": 0.5698041319847107, "learning_rate": 1e-05, "loss": 0.5834, "step": 3458 }, { "epoch": 0.426734108503223, "grad_norm": 0.5033005475997925, "learning_rate": 1e-05, "loss": 0.4153, "step": 3459 }, { "epoch": 0.42685747771643584, "grad_norm": 0.5082799792289734, "learning_rate": 1e-05, "loss": 0.4695, "step": 3460 }, { "epoch": 0.4269808469296487, "grad_norm": 0.5219315886497498, "learning_rate": 1e-05, "loss": 0.4356, "step": 3461 }, { "epoch": 0.42710421614286154, "grad_norm": 0.6108741760253906, "learning_rate": 1e-05, "loss": 0.5935, "step": 3462 }, { "epoch": 0.4272275853560744, "grad_norm": 0.685981273651123, "learning_rate": 1e-05, "loss": 0.6498, "step": 3463 }, { "epoch": 0.42735095456928723, "grad_norm": 0.6210673451423645, "learning_rate": 1e-05, "loss": 0.6685, "step": 3464 }, { "epoch": 0.42747432378250005, "grad_norm": 0.5644251704216003, "learning_rate": 1e-05, "loss": 0.5379, "step": 3465 }, { "epoch": 0.42759769299571293, "grad_norm": 0.6359272599220276, "learning_rate": 1e-05, "loss": 0.6507, "step": 3466 }, { "epoch": 0.42772106220892575, "grad_norm": 0.6841503381729126, "learning_rate": 1e-05, "loss": 0.7016, "step": 3467 }, { "epoch": 0.4278444314221386, "grad_norm": 0.6534481644630432, "learning_rate": 1e-05, "loss": 0.5928, "step": 3468 }, { "epoch": 0.42796780063535145, "grad_norm": 0.6350432634353638, "learning_rate": 1e-05, "loss": 0.7066, "step": 3469 }, { "epoch": 0.42809116984856427, "grad_norm": 0.5469191670417786, "learning_rate": 1e-05, "loss": 0.5059, "step": 3470 }, { "epoch": 0.42821453906177714, "grad_norm": 0.568943202495575, "learning_rate": 1e-05, "loss": 0.5567, "step": 3471 }, { "epoch": 0.42833790827498996, "grad_norm": 0.7102448344230652, "learning_rate": 1e-05, "loss": 0.6556, "step": 3472 }, { "epoch": 0.42846127748820284, "grad_norm": 0.6634898781776428, "learning_rate": 1e-05, "loss": 0.6171, "step": 3473 }, { "epoch": 0.42858464670141566, "grad_norm": 0.49438151717185974, "learning_rate": 1e-05, "loss": 0.4545, "step": 3474 }, { "epoch": 0.4287080159146285, "grad_norm": 0.541089653968811, "learning_rate": 1e-05, "loss": 0.5274, "step": 3475 }, { "epoch": 0.42883138512784136, "grad_norm": 0.5730909109115601, "learning_rate": 1e-05, "loss": 0.5389, "step": 3476 }, { "epoch": 0.4289547543410542, "grad_norm": 0.5106148719787598, "learning_rate": 1e-05, "loss": 0.511, "step": 3477 }, { "epoch": 0.42907812355426705, "grad_norm": 0.5904549956321716, "learning_rate": 1e-05, "loss": 0.6232, "step": 3478 }, { "epoch": 0.4292014927674799, "grad_norm": 0.5826467275619507, "learning_rate": 1e-05, "loss": 0.5415, "step": 3479 }, { "epoch": 0.4293248619806927, "grad_norm": 0.5149918794631958, "learning_rate": 1e-05, "loss": 0.5328, "step": 3480 }, { "epoch": 0.42944823119390557, "grad_norm": 0.568075954914093, "learning_rate": 1e-05, "loss": 0.5824, "step": 3481 }, { "epoch": 0.4295716004071184, "grad_norm": 0.5819019079208374, "learning_rate": 1e-05, "loss": 0.5217, "step": 3482 }, { "epoch": 0.42969496962033127, "grad_norm": 0.7555899024009705, "learning_rate": 1e-05, "loss": 0.7208, "step": 3483 }, { "epoch": 0.4298183388335441, "grad_norm": 0.5600507855415344, "learning_rate": 1e-05, "loss": 0.5933, "step": 3484 }, { "epoch": 0.4299417080467569, "grad_norm": 0.5987318158149719, "learning_rate": 1e-05, "loss": 0.5993, "step": 3485 }, { "epoch": 0.4300650772599698, "grad_norm": 0.5196436047554016, "learning_rate": 1e-05, "loss": 0.5388, "step": 3486 }, { "epoch": 0.4301884464731826, "grad_norm": 0.6156234741210938, "learning_rate": 1e-05, "loss": 0.5573, "step": 3487 }, { "epoch": 0.4303118156863955, "grad_norm": 0.686856210231781, "learning_rate": 1e-05, "loss": 0.6564, "step": 3488 }, { "epoch": 0.4304351848996083, "grad_norm": 0.5981041789054871, "learning_rate": 1e-05, "loss": 0.6479, "step": 3489 }, { "epoch": 0.4305585541128211, "grad_norm": 0.6383776664733887, "learning_rate": 1e-05, "loss": 0.562, "step": 3490 }, { "epoch": 0.430681923326034, "grad_norm": 0.6139704585075378, "learning_rate": 1e-05, "loss": 0.5782, "step": 3491 }, { "epoch": 0.4308052925392468, "grad_norm": 0.6042208671569824, "learning_rate": 1e-05, "loss": 0.5992, "step": 3492 }, { "epoch": 0.4309286617524597, "grad_norm": 0.5678145289421082, "learning_rate": 1e-05, "loss": 0.5518, "step": 3493 }, { "epoch": 0.4310520309656725, "grad_norm": 0.6274237036705017, "learning_rate": 1e-05, "loss": 0.6223, "step": 3494 }, { "epoch": 0.43117540017888534, "grad_norm": 0.5476399064064026, "learning_rate": 1e-05, "loss": 0.5703, "step": 3495 }, { "epoch": 0.4312987693920982, "grad_norm": 0.5634927749633789, "learning_rate": 1e-05, "loss": 0.6062, "step": 3496 }, { "epoch": 0.43142213860531103, "grad_norm": 0.614220917224884, "learning_rate": 1e-05, "loss": 0.5909, "step": 3497 }, { "epoch": 0.4315455078185239, "grad_norm": 0.56577068567276, "learning_rate": 1e-05, "loss": 0.6388, "step": 3498 }, { "epoch": 0.43166887703173673, "grad_norm": 0.5941275358200073, "learning_rate": 1e-05, "loss": 0.5814, "step": 3499 }, { "epoch": 0.43179224624494955, "grad_norm": 0.5931630730628967, "learning_rate": 1e-05, "loss": 0.534, "step": 3500 }, { "epoch": 0.4319156154581624, "grad_norm": 0.5156033635139465, "learning_rate": 1e-05, "loss": 0.5142, "step": 3501 }, { "epoch": 0.43203898467137525, "grad_norm": 0.5362243056297302, "learning_rate": 1e-05, "loss": 0.51, "step": 3502 }, { "epoch": 0.4321623538845881, "grad_norm": 0.6084906458854675, "learning_rate": 1e-05, "loss": 0.5986, "step": 3503 }, { "epoch": 0.43228572309780094, "grad_norm": 0.5920321345329285, "learning_rate": 1e-05, "loss": 0.6244, "step": 3504 }, { "epoch": 0.43240909231101377, "grad_norm": 0.676563560962677, "learning_rate": 1e-05, "loss": 0.5955, "step": 3505 }, { "epoch": 0.43253246152422664, "grad_norm": 0.5985086560249329, "learning_rate": 1e-05, "loss": 0.5874, "step": 3506 }, { "epoch": 0.43265583073743946, "grad_norm": 0.5926160216331482, "learning_rate": 1e-05, "loss": 0.6257, "step": 3507 }, { "epoch": 0.43277919995065234, "grad_norm": 0.5783392786979675, "learning_rate": 1e-05, "loss": 0.606, "step": 3508 }, { "epoch": 0.43290256916386516, "grad_norm": 0.5904390811920166, "learning_rate": 1e-05, "loss": 0.6155, "step": 3509 }, { "epoch": 0.433025938377078, "grad_norm": 0.6139599084854126, "learning_rate": 1e-05, "loss": 0.606, "step": 3510 }, { "epoch": 0.43314930759029086, "grad_norm": 0.5763165354728699, "learning_rate": 1e-05, "loss": 0.5284, "step": 3511 }, { "epoch": 0.4332726768035037, "grad_norm": 0.5731136202812195, "learning_rate": 1e-05, "loss": 0.4636, "step": 3512 }, { "epoch": 0.43339604601671655, "grad_norm": 0.6748663187026978, "learning_rate": 1e-05, "loss": 0.7127, "step": 3513 }, { "epoch": 0.4335194152299294, "grad_norm": 0.5120790004730225, "learning_rate": 1e-05, "loss": 0.5328, "step": 3514 }, { "epoch": 0.4336427844431422, "grad_norm": 0.5462766885757446, "learning_rate": 1e-05, "loss": 0.55, "step": 3515 }, { "epoch": 0.43376615365635507, "grad_norm": 0.5121601819992065, "learning_rate": 1e-05, "loss": 0.5217, "step": 3516 }, { "epoch": 0.4338895228695679, "grad_norm": 0.532611072063446, "learning_rate": 1e-05, "loss": 0.5396, "step": 3517 }, { "epoch": 0.43401289208278077, "grad_norm": 0.6560919284820557, "learning_rate": 1e-05, "loss": 0.6074, "step": 3518 }, { "epoch": 0.4341362612959936, "grad_norm": 0.5859878063201904, "learning_rate": 1e-05, "loss": 0.5081, "step": 3519 }, { "epoch": 0.4342596305092064, "grad_norm": 0.5290143489837646, "learning_rate": 1e-05, "loss": 0.5006, "step": 3520 }, { "epoch": 0.4343829997224193, "grad_norm": 0.5483124256134033, "learning_rate": 1e-05, "loss": 0.5388, "step": 3521 }, { "epoch": 0.4345063689356321, "grad_norm": 0.5783901810646057, "learning_rate": 1e-05, "loss": 0.5833, "step": 3522 }, { "epoch": 0.434629738148845, "grad_norm": 0.6050527691841125, "learning_rate": 1e-05, "loss": 0.6229, "step": 3523 }, { "epoch": 0.4347531073620578, "grad_norm": 0.5818017721176147, "learning_rate": 1e-05, "loss": 0.5895, "step": 3524 }, { "epoch": 0.4348764765752706, "grad_norm": 0.6028524041175842, "learning_rate": 1e-05, "loss": 0.725, "step": 3525 }, { "epoch": 0.4349998457884835, "grad_norm": 0.5136570930480957, "learning_rate": 1e-05, "loss": 0.509, "step": 3526 }, { "epoch": 0.4351232150016963, "grad_norm": 0.5842887163162231, "learning_rate": 1e-05, "loss": 0.6428, "step": 3527 }, { "epoch": 0.4352465842149092, "grad_norm": 0.5459843277931213, "learning_rate": 1e-05, "loss": 0.5152, "step": 3528 }, { "epoch": 0.435369953428122, "grad_norm": 0.5296088457107544, "learning_rate": 1e-05, "loss": 0.5275, "step": 3529 }, { "epoch": 0.43549332264133483, "grad_norm": 0.5041775703430176, "learning_rate": 1e-05, "loss": 0.4542, "step": 3530 }, { "epoch": 0.4356166918545477, "grad_norm": 0.5337104201316833, "learning_rate": 1e-05, "loss": 0.585, "step": 3531 }, { "epoch": 0.43574006106776053, "grad_norm": 0.6424673199653625, "learning_rate": 1e-05, "loss": 0.7048, "step": 3532 }, { "epoch": 0.4358634302809734, "grad_norm": 0.5865022540092468, "learning_rate": 1e-05, "loss": 0.5787, "step": 3533 }, { "epoch": 0.43598679949418623, "grad_norm": 0.5523676872253418, "learning_rate": 1e-05, "loss": 0.5515, "step": 3534 }, { "epoch": 0.43611016870739905, "grad_norm": 0.5358051657676697, "learning_rate": 1e-05, "loss": 0.5059, "step": 3535 }, { "epoch": 0.4362335379206119, "grad_norm": 0.563836932182312, "learning_rate": 1e-05, "loss": 0.6314, "step": 3536 }, { "epoch": 0.43635690713382475, "grad_norm": 0.630984365940094, "learning_rate": 1e-05, "loss": 0.6515, "step": 3537 }, { "epoch": 0.4364802763470376, "grad_norm": 0.533341646194458, "learning_rate": 1e-05, "loss": 0.5316, "step": 3538 }, { "epoch": 0.43660364556025044, "grad_norm": 0.5802929997444153, "learning_rate": 1e-05, "loss": 0.628, "step": 3539 }, { "epoch": 0.43672701477346326, "grad_norm": 0.5951263904571533, "learning_rate": 1e-05, "loss": 0.6897, "step": 3540 }, { "epoch": 0.43685038398667614, "grad_norm": 0.6345348954200745, "learning_rate": 1e-05, "loss": 0.6266, "step": 3541 }, { "epoch": 0.43697375319988896, "grad_norm": 0.5600700378417969, "learning_rate": 1e-05, "loss": 0.5323, "step": 3542 }, { "epoch": 0.43709712241310184, "grad_norm": 0.65964674949646, "learning_rate": 1e-05, "loss": 0.6448, "step": 3543 }, { "epoch": 0.43722049162631466, "grad_norm": 0.5470906496047974, "learning_rate": 1e-05, "loss": 0.4873, "step": 3544 }, { "epoch": 0.4373438608395275, "grad_norm": 0.5797744989395142, "learning_rate": 1e-05, "loss": 0.5924, "step": 3545 }, { "epoch": 0.43746723005274035, "grad_norm": 0.5398990511894226, "learning_rate": 1e-05, "loss": 0.5496, "step": 3546 }, { "epoch": 0.4375905992659532, "grad_norm": 0.5845136642456055, "learning_rate": 1e-05, "loss": 0.5655, "step": 3547 }, { "epoch": 0.43771396847916605, "grad_norm": 0.611324667930603, "learning_rate": 1e-05, "loss": 0.5769, "step": 3548 }, { "epoch": 0.43783733769237887, "grad_norm": 0.7457320690155029, "learning_rate": 1e-05, "loss": 0.8317, "step": 3549 }, { "epoch": 0.4379607069055917, "grad_norm": 0.6734923720359802, "learning_rate": 1e-05, "loss": 0.6189, "step": 3550 }, { "epoch": 0.43808407611880457, "grad_norm": 0.5674607157707214, "learning_rate": 1e-05, "loss": 0.5548, "step": 3551 }, { "epoch": 0.4382074453320174, "grad_norm": 0.5882092714309692, "learning_rate": 1e-05, "loss": 0.6204, "step": 3552 }, { "epoch": 0.43833081454523026, "grad_norm": 0.5126692652702332, "learning_rate": 1e-05, "loss": 0.4654, "step": 3553 }, { "epoch": 0.4384541837584431, "grad_norm": 0.604883074760437, "learning_rate": 1e-05, "loss": 0.596, "step": 3554 }, { "epoch": 0.4385775529716559, "grad_norm": 0.5600833892822266, "learning_rate": 1e-05, "loss": 0.5416, "step": 3555 }, { "epoch": 0.4387009221848688, "grad_norm": 0.6040504574775696, "learning_rate": 1e-05, "loss": 0.6404, "step": 3556 }, { "epoch": 0.4388242913980816, "grad_norm": 0.5754160284996033, "learning_rate": 1e-05, "loss": 0.6205, "step": 3557 }, { "epoch": 0.4389476606112945, "grad_norm": 0.5733497738838196, "learning_rate": 1e-05, "loss": 0.572, "step": 3558 }, { "epoch": 0.4390710298245073, "grad_norm": 0.5719031095504761, "learning_rate": 1e-05, "loss": 0.5729, "step": 3559 }, { "epoch": 0.4391943990377201, "grad_norm": 0.5479840040206909, "learning_rate": 1e-05, "loss": 0.4882, "step": 3560 }, { "epoch": 0.439317768250933, "grad_norm": 0.5680733919143677, "learning_rate": 1e-05, "loss": 0.5683, "step": 3561 }, { "epoch": 0.4394411374641458, "grad_norm": 0.5963146090507507, "learning_rate": 1e-05, "loss": 0.6256, "step": 3562 }, { "epoch": 0.4395645066773587, "grad_norm": 0.6103438138961792, "learning_rate": 1e-05, "loss": 0.6944, "step": 3563 }, { "epoch": 0.4396878758905715, "grad_norm": 0.5867921113967896, "learning_rate": 1e-05, "loss": 0.5821, "step": 3564 }, { "epoch": 0.43981124510378433, "grad_norm": 0.7088335156440735, "learning_rate": 1e-05, "loss": 0.7752, "step": 3565 }, { "epoch": 0.4399346143169972, "grad_norm": 0.6100218296051025, "learning_rate": 1e-05, "loss": 0.5797, "step": 3566 }, { "epoch": 0.44005798353021003, "grad_norm": 0.6079267859458923, "learning_rate": 1e-05, "loss": 0.6216, "step": 3567 }, { "epoch": 0.4401813527434229, "grad_norm": 0.5175748467445374, "learning_rate": 1e-05, "loss": 0.5169, "step": 3568 }, { "epoch": 0.4403047219566357, "grad_norm": 0.6413783431053162, "learning_rate": 1e-05, "loss": 0.6082, "step": 3569 }, { "epoch": 0.44042809116984855, "grad_norm": 0.5754187703132629, "learning_rate": 1e-05, "loss": 0.6099, "step": 3570 }, { "epoch": 0.4405514603830614, "grad_norm": 0.5789181590080261, "learning_rate": 1e-05, "loss": 0.5032, "step": 3571 }, { "epoch": 0.44067482959627424, "grad_norm": 0.6449918150901794, "learning_rate": 1e-05, "loss": 0.6194, "step": 3572 }, { "epoch": 0.4407981988094871, "grad_norm": 0.5848106741905212, "learning_rate": 1e-05, "loss": 0.5419, "step": 3573 }, { "epoch": 0.44092156802269994, "grad_norm": 0.5924473404884338, "learning_rate": 1e-05, "loss": 0.582, "step": 3574 }, { "epoch": 0.44104493723591276, "grad_norm": 0.6459923982620239, "learning_rate": 1e-05, "loss": 0.6496, "step": 3575 }, { "epoch": 0.44116830644912564, "grad_norm": 0.5601276159286499, "learning_rate": 1e-05, "loss": 0.5204, "step": 3576 }, { "epoch": 0.44129167566233846, "grad_norm": 0.641623854637146, "learning_rate": 1e-05, "loss": 0.6991, "step": 3577 }, { "epoch": 0.44141504487555133, "grad_norm": 0.5407400727272034, "learning_rate": 1e-05, "loss": 0.4808, "step": 3578 }, { "epoch": 0.44153841408876415, "grad_norm": 0.5680193305015564, "learning_rate": 1e-05, "loss": 0.5689, "step": 3579 }, { "epoch": 0.441661783301977, "grad_norm": 0.5577268600463867, "learning_rate": 1e-05, "loss": 0.6055, "step": 3580 }, { "epoch": 0.44178515251518985, "grad_norm": 0.6170949935913086, "learning_rate": 1e-05, "loss": 0.5423, "step": 3581 }, { "epoch": 0.44190852172840267, "grad_norm": 0.5550281405448914, "learning_rate": 1e-05, "loss": 0.5375, "step": 3582 }, { "epoch": 0.44203189094161555, "grad_norm": 0.521273136138916, "learning_rate": 1e-05, "loss": 0.4813, "step": 3583 }, { "epoch": 0.44215526015482837, "grad_norm": 0.5658755898475647, "learning_rate": 1e-05, "loss": 0.551, "step": 3584 }, { "epoch": 0.4422786293680412, "grad_norm": 0.6592357754707336, "learning_rate": 1e-05, "loss": 0.6211, "step": 3585 }, { "epoch": 0.44240199858125406, "grad_norm": 0.5408694744110107, "learning_rate": 1e-05, "loss": 0.483, "step": 3586 }, { "epoch": 0.4425253677944669, "grad_norm": 0.5853123664855957, "learning_rate": 1e-05, "loss": 0.5727, "step": 3587 }, { "epoch": 0.44264873700767976, "grad_norm": 0.5845080018043518, "learning_rate": 1e-05, "loss": 0.576, "step": 3588 }, { "epoch": 0.4427721062208926, "grad_norm": 0.6969712376594543, "learning_rate": 1e-05, "loss": 0.6548, "step": 3589 }, { "epoch": 0.4428954754341054, "grad_norm": 0.5549964308738708, "learning_rate": 1e-05, "loss": 0.5391, "step": 3590 }, { "epoch": 0.4430188446473183, "grad_norm": 0.5562686920166016, "learning_rate": 1e-05, "loss": 0.6659, "step": 3591 }, { "epoch": 0.4431422138605311, "grad_norm": 0.5828410983085632, "learning_rate": 1e-05, "loss": 0.6378, "step": 3592 }, { "epoch": 0.443265583073744, "grad_norm": 0.523670494556427, "learning_rate": 1e-05, "loss": 0.4358, "step": 3593 }, { "epoch": 0.4433889522869568, "grad_norm": 0.5675845146179199, "learning_rate": 1e-05, "loss": 0.5727, "step": 3594 }, { "epoch": 0.4435123215001696, "grad_norm": 0.5409983992576599, "learning_rate": 1e-05, "loss": 0.4773, "step": 3595 }, { "epoch": 0.4436356907133825, "grad_norm": 0.5679179430007935, "learning_rate": 1e-05, "loss": 0.6148, "step": 3596 }, { "epoch": 0.4437590599265953, "grad_norm": 0.5589921474456787, "learning_rate": 1e-05, "loss": 0.517, "step": 3597 }, { "epoch": 0.4438824291398082, "grad_norm": 0.5394036769866943, "learning_rate": 1e-05, "loss": 0.5316, "step": 3598 }, { "epoch": 0.444005798353021, "grad_norm": 0.6000076532363892, "learning_rate": 1e-05, "loss": 0.5707, "step": 3599 }, { "epoch": 0.44412916756623383, "grad_norm": 0.5527151226997375, "learning_rate": 1e-05, "loss": 0.5145, "step": 3600 }, { "epoch": 0.4442525367794467, "grad_norm": 0.5001333355903625, "learning_rate": 1e-05, "loss": 0.425, "step": 3601 }, { "epoch": 0.4443759059926595, "grad_norm": 0.614245593547821, "learning_rate": 1e-05, "loss": 0.6342, "step": 3602 }, { "epoch": 0.4444992752058724, "grad_norm": 0.6262297034263611, "learning_rate": 1e-05, "loss": 0.7177, "step": 3603 }, { "epoch": 0.4446226444190852, "grad_norm": 0.5701785087585449, "learning_rate": 1e-05, "loss": 0.5121, "step": 3604 }, { "epoch": 0.44474601363229804, "grad_norm": 0.666972815990448, "learning_rate": 1e-05, "loss": 0.6796, "step": 3605 }, { "epoch": 0.4448693828455109, "grad_norm": 0.5390801429748535, "learning_rate": 1e-05, "loss": 0.4533, "step": 3606 }, { "epoch": 0.44499275205872374, "grad_norm": 0.5269095301628113, "learning_rate": 1e-05, "loss": 0.4688, "step": 3607 }, { "epoch": 0.4451161212719366, "grad_norm": 0.49065494537353516, "learning_rate": 1e-05, "loss": 0.4969, "step": 3608 }, { "epoch": 0.44523949048514944, "grad_norm": 0.6121144890785217, "learning_rate": 1e-05, "loss": 0.6386, "step": 3609 }, { "epoch": 0.44536285969836226, "grad_norm": 0.535762369632721, "learning_rate": 1e-05, "loss": 0.4754, "step": 3610 }, { "epoch": 0.44548622891157513, "grad_norm": 0.5614601969718933, "learning_rate": 1e-05, "loss": 0.5465, "step": 3611 }, { "epoch": 0.44560959812478795, "grad_norm": 0.5582444667816162, "learning_rate": 1e-05, "loss": 0.6139, "step": 3612 }, { "epoch": 0.44573296733800083, "grad_norm": 0.600851833820343, "learning_rate": 1e-05, "loss": 0.6196, "step": 3613 }, { "epoch": 0.44585633655121365, "grad_norm": 0.6183995008468628, "learning_rate": 1e-05, "loss": 0.6397, "step": 3614 }, { "epoch": 0.44597970576442647, "grad_norm": 0.6918066740036011, "learning_rate": 1e-05, "loss": 0.6918, "step": 3615 }, { "epoch": 0.44610307497763935, "grad_norm": 0.5952332019805908, "learning_rate": 1e-05, "loss": 0.6271, "step": 3616 }, { "epoch": 0.44622644419085217, "grad_norm": 0.6045579314231873, "learning_rate": 1e-05, "loss": 0.5947, "step": 3617 }, { "epoch": 0.446349813404065, "grad_norm": 0.5664069056510925, "learning_rate": 1e-05, "loss": 0.5571, "step": 3618 }, { "epoch": 0.44647318261727786, "grad_norm": 0.6169529557228088, "learning_rate": 1e-05, "loss": 0.5953, "step": 3619 }, { "epoch": 0.4465965518304907, "grad_norm": 0.5403223633766174, "learning_rate": 1e-05, "loss": 0.5826, "step": 3620 }, { "epoch": 0.44671992104370356, "grad_norm": 0.5276821255683899, "learning_rate": 1e-05, "loss": 0.5163, "step": 3621 }, { "epoch": 0.4468432902569164, "grad_norm": 0.6963714361190796, "learning_rate": 1e-05, "loss": 0.6427, "step": 3622 }, { "epoch": 0.4469666594701292, "grad_norm": 0.5127968788146973, "learning_rate": 1e-05, "loss": 0.5434, "step": 3623 }, { "epoch": 0.4470900286833421, "grad_norm": 0.6186854243278503, "learning_rate": 1e-05, "loss": 0.5951, "step": 3624 }, { "epoch": 0.4472133978965549, "grad_norm": 0.6149842739105225, "learning_rate": 1e-05, "loss": 0.5771, "step": 3625 }, { "epoch": 0.4473367671097678, "grad_norm": 0.5464008450508118, "learning_rate": 1e-05, "loss": 0.4904, "step": 3626 }, { "epoch": 0.4474601363229806, "grad_norm": 0.5804686546325684, "learning_rate": 1e-05, "loss": 0.5697, "step": 3627 }, { "epoch": 0.4475835055361934, "grad_norm": 0.5191094875335693, "learning_rate": 1e-05, "loss": 0.5632, "step": 3628 }, { "epoch": 0.4477068747494063, "grad_norm": 0.5817518830299377, "learning_rate": 1e-05, "loss": 0.6546, "step": 3629 }, { "epoch": 0.4478302439626191, "grad_norm": 0.5674474239349365, "learning_rate": 1e-05, "loss": 0.5865, "step": 3630 }, { "epoch": 0.447953613175832, "grad_norm": 0.629401445388794, "learning_rate": 1e-05, "loss": 0.6853, "step": 3631 }, { "epoch": 0.4480769823890448, "grad_norm": 0.5124334692955017, "learning_rate": 1e-05, "loss": 0.4737, "step": 3632 }, { "epoch": 0.44820035160225763, "grad_norm": 0.6538196802139282, "learning_rate": 1e-05, "loss": 0.6631, "step": 3633 }, { "epoch": 0.4483237208154705, "grad_norm": 0.6341129541397095, "learning_rate": 1e-05, "loss": 0.5993, "step": 3634 }, { "epoch": 0.4484470900286833, "grad_norm": 0.5592570900917053, "learning_rate": 1e-05, "loss": 0.5236, "step": 3635 }, { "epoch": 0.4485704592418962, "grad_norm": 0.523341178894043, "learning_rate": 1e-05, "loss": 0.4932, "step": 3636 }, { "epoch": 0.448693828455109, "grad_norm": 0.6209279298782349, "learning_rate": 1e-05, "loss": 0.5679, "step": 3637 }, { "epoch": 0.44881719766832184, "grad_norm": 0.7518758177757263, "learning_rate": 1e-05, "loss": 0.6061, "step": 3638 }, { "epoch": 0.4489405668815347, "grad_norm": 0.5413001179695129, "learning_rate": 1e-05, "loss": 0.565, "step": 3639 }, { "epoch": 0.44906393609474754, "grad_norm": 0.620069146156311, "learning_rate": 1e-05, "loss": 0.7505, "step": 3640 }, { "epoch": 0.4491873053079604, "grad_norm": 0.6083626747131348, "learning_rate": 1e-05, "loss": 0.5813, "step": 3641 }, { "epoch": 0.44931067452117324, "grad_norm": 0.5927597880363464, "learning_rate": 1e-05, "loss": 0.607, "step": 3642 }, { "epoch": 0.44943404373438606, "grad_norm": 0.7178130149841309, "learning_rate": 1e-05, "loss": 0.5943, "step": 3643 }, { "epoch": 0.44955741294759893, "grad_norm": 0.6112367510795593, "learning_rate": 1e-05, "loss": 0.6139, "step": 3644 }, { "epoch": 0.44968078216081175, "grad_norm": 0.6266407370567322, "learning_rate": 1e-05, "loss": 0.6351, "step": 3645 }, { "epoch": 0.44980415137402463, "grad_norm": 0.5128270983695984, "learning_rate": 1e-05, "loss": 0.4092, "step": 3646 }, { "epoch": 0.44992752058723745, "grad_norm": 0.5641573667526245, "learning_rate": 1e-05, "loss": 0.577, "step": 3647 }, { "epoch": 0.45005088980045027, "grad_norm": 0.5778979063034058, "learning_rate": 1e-05, "loss": 0.6393, "step": 3648 }, { "epoch": 0.45017425901366315, "grad_norm": 0.5383800864219666, "learning_rate": 1e-05, "loss": 0.5653, "step": 3649 }, { "epoch": 0.45029762822687597, "grad_norm": 0.6117113828659058, "learning_rate": 1e-05, "loss": 0.6653, "step": 3650 }, { "epoch": 0.45042099744008884, "grad_norm": 0.5546717047691345, "learning_rate": 1e-05, "loss": 0.52, "step": 3651 }, { "epoch": 0.45054436665330166, "grad_norm": 0.5407130122184753, "learning_rate": 1e-05, "loss": 0.5163, "step": 3652 }, { "epoch": 0.4506677358665145, "grad_norm": 0.5506080985069275, "learning_rate": 1e-05, "loss": 0.508, "step": 3653 }, { "epoch": 0.45079110507972736, "grad_norm": 0.6073881387710571, "learning_rate": 1e-05, "loss": 0.6211, "step": 3654 }, { "epoch": 0.4509144742929402, "grad_norm": 0.6043562293052673, "learning_rate": 1e-05, "loss": 0.6511, "step": 3655 }, { "epoch": 0.45103784350615306, "grad_norm": 0.5233737230300903, "learning_rate": 1e-05, "loss": 0.4873, "step": 3656 }, { "epoch": 0.4511612127193659, "grad_norm": 0.5693528652191162, "learning_rate": 1e-05, "loss": 0.4998, "step": 3657 }, { "epoch": 0.4512845819325787, "grad_norm": 0.5731006264686584, "learning_rate": 1e-05, "loss": 0.5194, "step": 3658 }, { "epoch": 0.4514079511457916, "grad_norm": 0.5665209889411926, "learning_rate": 1e-05, "loss": 0.5713, "step": 3659 }, { "epoch": 0.4515313203590044, "grad_norm": 0.6433473229408264, "learning_rate": 1e-05, "loss": 0.6483, "step": 3660 }, { "epoch": 0.45165468957221727, "grad_norm": 0.5440652370452881, "learning_rate": 1e-05, "loss": 0.5362, "step": 3661 }, { "epoch": 0.4517780587854301, "grad_norm": 0.6657403111457825, "learning_rate": 1e-05, "loss": 0.607, "step": 3662 }, { "epoch": 0.4519014279986429, "grad_norm": 0.5126965045928955, "learning_rate": 1e-05, "loss": 0.5037, "step": 3663 }, { "epoch": 0.4520247972118558, "grad_norm": 0.6350241303443909, "learning_rate": 1e-05, "loss": 0.6093, "step": 3664 }, { "epoch": 0.4521481664250686, "grad_norm": 0.6157418489456177, "learning_rate": 1e-05, "loss": 0.6863, "step": 3665 }, { "epoch": 0.4522715356382815, "grad_norm": 0.6121821403503418, "learning_rate": 1e-05, "loss": 0.5829, "step": 3666 }, { "epoch": 0.4523949048514943, "grad_norm": 0.6261312365531921, "learning_rate": 1e-05, "loss": 0.6646, "step": 3667 }, { "epoch": 0.4525182740647071, "grad_norm": 0.5466595888137817, "learning_rate": 1e-05, "loss": 0.5346, "step": 3668 }, { "epoch": 0.45264164327792, "grad_norm": 0.5993540287017822, "learning_rate": 1e-05, "loss": 0.6202, "step": 3669 }, { "epoch": 0.4527650124911328, "grad_norm": 0.5592480301856995, "learning_rate": 1e-05, "loss": 0.5182, "step": 3670 }, { "epoch": 0.4528883817043457, "grad_norm": 0.5830579400062561, "learning_rate": 1e-05, "loss": 0.5782, "step": 3671 }, { "epoch": 0.4530117509175585, "grad_norm": 0.6616617441177368, "learning_rate": 1e-05, "loss": 0.6306, "step": 3672 }, { "epoch": 0.45313512013077134, "grad_norm": 0.6011826992034912, "learning_rate": 1e-05, "loss": 0.5961, "step": 3673 }, { "epoch": 0.4532584893439842, "grad_norm": 0.5769452452659607, "learning_rate": 1e-05, "loss": 0.6438, "step": 3674 }, { "epoch": 0.45338185855719704, "grad_norm": 0.5477854013442993, "learning_rate": 1e-05, "loss": 0.5808, "step": 3675 }, { "epoch": 0.4535052277704099, "grad_norm": 0.5625330805778503, "learning_rate": 1e-05, "loss": 0.5278, "step": 3676 }, { "epoch": 0.45362859698362273, "grad_norm": 0.6541531085968018, "learning_rate": 1e-05, "loss": 0.7351, "step": 3677 }, { "epoch": 0.45375196619683555, "grad_norm": 0.5527877807617188, "learning_rate": 1e-05, "loss": 0.4833, "step": 3678 }, { "epoch": 0.45387533541004843, "grad_norm": 0.649619996547699, "learning_rate": 1e-05, "loss": 0.6304, "step": 3679 }, { "epoch": 0.45399870462326125, "grad_norm": 0.6005823016166687, "learning_rate": 1e-05, "loss": 0.5813, "step": 3680 }, { "epoch": 0.4541220738364741, "grad_norm": 0.592276930809021, "learning_rate": 1e-05, "loss": 0.6022, "step": 3681 }, { "epoch": 0.45424544304968695, "grad_norm": 0.5457861423492432, "learning_rate": 1e-05, "loss": 0.5638, "step": 3682 }, { "epoch": 0.45436881226289977, "grad_norm": 0.5763013958930969, "learning_rate": 1e-05, "loss": 0.6407, "step": 3683 }, { "epoch": 0.45449218147611264, "grad_norm": 0.6229299902915955, "learning_rate": 1e-05, "loss": 0.5727, "step": 3684 }, { "epoch": 0.45461555068932546, "grad_norm": 0.5084751844406128, "learning_rate": 1e-05, "loss": 0.4677, "step": 3685 }, { "epoch": 0.45473891990253834, "grad_norm": 0.5672799944877625, "learning_rate": 1e-05, "loss": 0.668, "step": 3686 }, { "epoch": 0.45486228911575116, "grad_norm": 0.5204346179962158, "learning_rate": 1e-05, "loss": 0.5001, "step": 3687 }, { "epoch": 0.454985658328964, "grad_norm": 0.7237198948860168, "learning_rate": 1e-05, "loss": 0.7461, "step": 3688 }, { "epoch": 0.45510902754217686, "grad_norm": 0.6331814527511597, "learning_rate": 1e-05, "loss": 0.5998, "step": 3689 }, { "epoch": 0.4552323967553897, "grad_norm": 0.7374364733695984, "learning_rate": 1e-05, "loss": 0.5785, "step": 3690 }, { "epoch": 0.45535576596860255, "grad_norm": 0.5893973708152771, "learning_rate": 1e-05, "loss": 0.539, "step": 3691 }, { "epoch": 0.4554791351818154, "grad_norm": 0.5273039937019348, "learning_rate": 1e-05, "loss": 0.5382, "step": 3692 }, { "epoch": 0.4556025043950282, "grad_norm": 0.5783350467681885, "learning_rate": 1e-05, "loss": 0.6028, "step": 3693 }, { "epoch": 0.45572587360824107, "grad_norm": 0.7083237767219543, "learning_rate": 1e-05, "loss": 0.7502, "step": 3694 }, { "epoch": 0.4558492428214539, "grad_norm": 0.5596211552619934, "learning_rate": 1e-05, "loss": 0.6453, "step": 3695 }, { "epoch": 0.45597261203466677, "grad_norm": 0.6255745887756348, "learning_rate": 1e-05, "loss": 0.6429, "step": 3696 }, { "epoch": 0.4560959812478796, "grad_norm": 0.6513701677322388, "learning_rate": 1e-05, "loss": 0.5576, "step": 3697 }, { "epoch": 0.4562193504610924, "grad_norm": 0.6470292210578918, "learning_rate": 1e-05, "loss": 0.6317, "step": 3698 }, { "epoch": 0.4563427196743053, "grad_norm": 0.551861047744751, "learning_rate": 1e-05, "loss": 0.5808, "step": 3699 }, { "epoch": 0.4564660888875181, "grad_norm": 0.5205938220024109, "learning_rate": 1e-05, "loss": 0.5418, "step": 3700 }, { "epoch": 0.456589458100731, "grad_norm": 0.5691625475883484, "learning_rate": 1e-05, "loss": 0.5273, "step": 3701 }, { "epoch": 0.4567128273139438, "grad_norm": 0.58638995885849, "learning_rate": 1e-05, "loss": 0.5531, "step": 3702 }, { "epoch": 0.4568361965271566, "grad_norm": 0.5789216756820679, "learning_rate": 1e-05, "loss": 0.5893, "step": 3703 }, { "epoch": 0.4569595657403695, "grad_norm": 0.5373855233192444, "learning_rate": 1e-05, "loss": 0.4983, "step": 3704 }, { "epoch": 0.4570829349535823, "grad_norm": 0.5532498955726624, "learning_rate": 1e-05, "loss": 0.5349, "step": 3705 }, { "epoch": 0.4572063041667952, "grad_norm": 0.5478926301002502, "learning_rate": 1e-05, "loss": 0.5974, "step": 3706 }, { "epoch": 0.457329673380008, "grad_norm": 0.5582311153411865, "learning_rate": 1e-05, "loss": 0.567, "step": 3707 }, { "epoch": 0.45745304259322084, "grad_norm": 0.5242863893508911, "learning_rate": 1e-05, "loss": 0.4907, "step": 3708 }, { "epoch": 0.4575764118064337, "grad_norm": 0.5523812770843506, "learning_rate": 1e-05, "loss": 0.5264, "step": 3709 }, { "epoch": 0.45769978101964653, "grad_norm": 0.612064778804779, "learning_rate": 1e-05, "loss": 0.5598, "step": 3710 }, { "epoch": 0.4578231502328594, "grad_norm": 0.6009764075279236, "learning_rate": 1e-05, "loss": 0.5995, "step": 3711 }, { "epoch": 0.45794651944607223, "grad_norm": 0.5765559673309326, "learning_rate": 1e-05, "loss": 0.4955, "step": 3712 }, { "epoch": 0.45806988865928505, "grad_norm": 0.5983626842498779, "learning_rate": 1e-05, "loss": 0.681, "step": 3713 }, { "epoch": 0.4581932578724979, "grad_norm": 0.5910947322845459, "learning_rate": 1e-05, "loss": 0.6245, "step": 3714 }, { "epoch": 0.45831662708571075, "grad_norm": 0.6124838590621948, "learning_rate": 1e-05, "loss": 0.6808, "step": 3715 }, { "epoch": 0.4584399962989236, "grad_norm": 0.5472860336303711, "learning_rate": 1e-05, "loss": 0.5323, "step": 3716 }, { "epoch": 0.45856336551213644, "grad_norm": 0.6010318398475647, "learning_rate": 1e-05, "loss": 0.6148, "step": 3717 }, { "epoch": 0.45868673472534927, "grad_norm": 0.5058815479278564, "learning_rate": 1e-05, "loss": 0.5177, "step": 3718 }, { "epoch": 0.45881010393856214, "grad_norm": 0.5518904328346252, "learning_rate": 1e-05, "loss": 0.5397, "step": 3719 }, { "epoch": 0.45893347315177496, "grad_norm": 0.5720667839050293, "learning_rate": 1e-05, "loss": 0.5576, "step": 3720 }, { "epoch": 0.45905684236498784, "grad_norm": 0.5720611214637756, "learning_rate": 1e-05, "loss": 0.5783, "step": 3721 }, { "epoch": 0.45918021157820066, "grad_norm": 0.6006345152854919, "learning_rate": 1e-05, "loss": 0.6447, "step": 3722 }, { "epoch": 0.4593035807914135, "grad_norm": 0.5434088706970215, "learning_rate": 1e-05, "loss": 0.5467, "step": 3723 }, { "epoch": 0.45942695000462636, "grad_norm": 0.7215290665626526, "learning_rate": 1e-05, "loss": 0.684, "step": 3724 }, { "epoch": 0.4595503192178392, "grad_norm": 0.5795906186103821, "learning_rate": 1e-05, "loss": 0.5275, "step": 3725 }, { "epoch": 0.45967368843105205, "grad_norm": 0.5623164176940918, "learning_rate": 1e-05, "loss": 0.5596, "step": 3726 }, { "epoch": 0.4597970576442649, "grad_norm": 0.5169060826301575, "learning_rate": 1e-05, "loss": 0.5223, "step": 3727 }, { "epoch": 0.4599204268574777, "grad_norm": 0.5911841988563538, "learning_rate": 1e-05, "loss": 0.6073, "step": 3728 }, { "epoch": 0.46004379607069057, "grad_norm": 0.615044891834259, "learning_rate": 1e-05, "loss": 0.5483, "step": 3729 }, { "epoch": 0.4601671652839034, "grad_norm": 0.5426704287528992, "learning_rate": 1e-05, "loss": 0.5499, "step": 3730 }, { "epoch": 0.46029053449711627, "grad_norm": 0.5350459814071655, "learning_rate": 1e-05, "loss": 0.4838, "step": 3731 }, { "epoch": 0.4604139037103291, "grad_norm": 0.6595609188079834, "learning_rate": 1e-05, "loss": 0.7023, "step": 3732 }, { "epoch": 0.4605372729235419, "grad_norm": 0.6885796189308167, "learning_rate": 1e-05, "loss": 0.6344, "step": 3733 }, { "epoch": 0.4606606421367548, "grad_norm": 0.6199790239334106, "learning_rate": 1e-05, "loss": 0.5257, "step": 3734 }, { "epoch": 0.4607840113499676, "grad_norm": 0.6615414023399353, "learning_rate": 1e-05, "loss": 0.6262, "step": 3735 }, { "epoch": 0.4609073805631805, "grad_norm": 0.5889300107955933, "learning_rate": 1e-05, "loss": 0.5941, "step": 3736 }, { "epoch": 0.4610307497763933, "grad_norm": 0.524859607219696, "learning_rate": 1e-05, "loss": 0.5645, "step": 3737 }, { "epoch": 0.4611541189896061, "grad_norm": 0.6008405089378357, "learning_rate": 1e-05, "loss": 0.5774, "step": 3738 }, { "epoch": 0.461277488202819, "grad_norm": 0.5744900107383728, "learning_rate": 1e-05, "loss": 0.5758, "step": 3739 }, { "epoch": 0.4614008574160318, "grad_norm": 0.5681368112564087, "learning_rate": 1e-05, "loss": 0.5, "step": 3740 }, { "epoch": 0.4615242266292447, "grad_norm": 0.5908153653144836, "learning_rate": 1e-05, "loss": 0.6695, "step": 3741 }, { "epoch": 0.4616475958424575, "grad_norm": 0.5841366052627563, "learning_rate": 1e-05, "loss": 0.5358, "step": 3742 }, { "epoch": 0.46177096505567033, "grad_norm": 0.5794116258621216, "learning_rate": 1e-05, "loss": 0.583, "step": 3743 }, { "epoch": 0.4618943342688832, "grad_norm": 0.5587993860244751, "learning_rate": 1e-05, "loss": 0.5506, "step": 3744 }, { "epoch": 0.46201770348209603, "grad_norm": 0.5876469612121582, "learning_rate": 1e-05, "loss": 0.7004, "step": 3745 }, { "epoch": 0.4621410726953089, "grad_norm": 0.5369462370872498, "learning_rate": 1e-05, "loss": 0.5416, "step": 3746 }, { "epoch": 0.46226444190852173, "grad_norm": 0.5352222919464111, "learning_rate": 1e-05, "loss": 0.5104, "step": 3747 }, { "epoch": 0.46238781112173455, "grad_norm": 0.6999420523643494, "learning_rate": 1e-05, "loss": 0.7015, "step": 3748 }, { "epoch": 0.4625111803349474, "grad_norm": 0.5449258089065552, "learning_rate": 1e-05, "loss": 0.5544, "step": 3749 }, { "epoch": 0.46263454954816025, "grad_norm": 0.6028326153755188, "learning_rate": 1e-05, "loss": 0.6769, "step": 3750 }, { "epoch": 0.4627579187613731, "grad_norm": 0.5769385695457458, "learning_rate": 1e-05, "loss": 0.6592, "step": 3751 }, { "epoch": 0.46288128797458594, "grad_norm": 0.6062979102134705, "learning_rate": 1e-05, "loss": 0.6363, "step": 3752 }, { "epoch": 0.46300465718779876, "grad_norm": 0.5936810970306396, "learning_rate": 1e-05, "loss": 0.6253, "step": 3753 }, { "epoch": 0.46312802640101164, "grad_norm": 0.5200102925300598, "learning_rate": 1e-05, "loss": 0.4735, "step": 3754 }, { "epoch": 0.46325139561422446, "grad_norm": 0.6060130000114441, "learning_rate": 1e-05, "loss": 0.5752, "step": 3755 }, { "epoch": 0.46337476482743734, "grad_norm": 0.590488851070404, "learning_rate": 1e-05, "loss": 0.591, "step": 3756 }, { "epoch": 0.46349813404065016, "grad_norm": 0.5511004328727722, "learning_rate": 1e-05, "loss": 0.6372, "step": 3757 }, { "epoch": 0.463621503253863, "grad_norm": 0.6900985240936279, "learning_rate": 1e-05, "loss": 0.6804, "step": 3758 }, { "epoch": 0.46374487246707585, "grad_norm": 0.5627360939979553, "learning_rate": 1e-05, "loss": 0.5387, "step": 3759 }, { "epoch": 0.4638682416802887, "grad_norm": 0.5431362986564636, "learning_rate": 1e-05, "loss": 0.5378, "step": 3760 }, { "epoch": 0.46399161089350155, "grad_norm": 0.5551276803016663, "learning_rate": 1e-05, "loss": 0.6187, "step": 3761 }, { "epoch": 0.46411498010671437, "grad_norm": 0.562335729598999, "learning_rate": 1e-05, "loss": 0.5933, "step": 3762 }, { "epoch": 0.4642383493199272, "grad_norm": 0.5803765654563904, "learning_rate": 1e-05, "loss": 0.6327, "step": 3763 }, { "epoch": 0.46436171853314007, "grad_norm": 0.5930768251419067, "learning_rate": 1e-05, "loss": 0.6024, "step": 3764 }, { "epoch": 0.4644850877463529, "grad_norm": 0.5503953695297241, "learning_rate": 1e-05, "loss": 0.5933, "step": 3765 }, { "epoch": 0.46460845695956576, "grad_norm": 0.5914968252182007, "learning_rate": 1e-05, "loss": 0.5257, "step": 3766 }, { "epoch": 0.4647318261727786, "grad_norm": 0.6608415246009827, "learning_rate": 1e-05, "loss": 0.7362, "step": 3767 }, { "epoch": 0.4648551953859914, "grad_norm": 0.5491209626197815, "learning_rate": 1e-05, "loss": 0.6144, "step": 3768 }, { "epoch": 0.4649785645992043, "grad_norm": 0.5775510668754578, "learning_rate": 1e-05, "loss": 0.5834, "step": 3769 }, { "epoch": 0.4651019338124171, "grad_norm": 0.6267983317375183, "learning_rate": 1e-05, "loss": 0.7204, "step": 3770 }, { "epoch": 0.46522530302563, "grad_norm": 0.605737566947937, "learning_rate": 1e-05, "loss": 0.6739, "step": 3771 }, { "epoch": 0.4653486722388428, "grad_norm": 0.6566739082336426, "learning_rate": 1e-05, "loss": 0.5999, "step": 3772 }, { "epoch": 0.4654720414520556, "grad_norm": 0.5743463039398193, "learning_rate": 1e-05, "loss": 0.5599, "step": 3773 }, { "epoch": 0.4655954106652685, "grad_norm": 0.6036350131034851, "learning_rate": 1e-05, "loss": 0.6794, "step": 3774 }, { "epoch": 0.4657187798784813, "grad_norm": 0.5423754453659058, "learning_rate": 1e-05, "loss": 0.5072, "step": 3775 }, { "epoch": 0.4658421490916942, "grad_norm": 0.5170868039131165, "learning_rate": 1e-05, "loss": 0.4763, "step": 3776 }, { "epoch": 0.465965518304907, "grad_norm": 0.5911687612533569, "learning_rate": 1e-05, "loss": 0.5222, "step": 3777 }, { "epoch": 0.46608888751811983, "grad_norm": 0.6130036115646362, "learning_rate": 1e-05, "loss": 0.6533, "step": 3778 }, { "epoch": 0.4662122567313327, "grad_norm": 0.5488155484199524, "learning_rate": 1e-05, "loss": 0.5563, "step": 3779 }, { "epoch": 0.46633562594454553, "grad_norm": 0.6361104249954224, "learning_rate": 1e-05, "loss": 0.6933, "step": 3780 }, { "epoch": 0.4664589951577584, "grad_norm": 0.6209996342658997, "learning_rate": 1e-05, "loss": 0.624, "step": 3781 }, { "epoch": 0.4665823643709712, "grad_norm": 0.49012303352355957, "learning_rate": 1e-05, "loss": 0.4594, "step": 3782 }, { "epoch": 0.46670573358418405, "grad_norm": 0.5934959053993225, "learning_rate": 1e-05, "loss": 0.5741, "step": 3783 }, { "epoch": 0.4668291027973969, "grad_norm": 0.5283954739570618, "learning_rate": 1e-05, "loss": 0.5336, "step": 3784 }, { "epoch": 0.46695247201060974, "grad_norm": 0.6382024884223938, "learning_rate": 1e-05, "loss": 0.5881, "step": 3785 }, { "epoch": 0.4670758412238226, "grad_norm": 0.6067160964012146, "learning_rate": 1e-05, "loss": 0.6403, "step": 3786 }, { "epoch": 0.46719921043703544, "grad_norm": 0.5639182329177856, "learning_rate": 1e-05, "loss": 0.6007, "step": 3787 }, { "epoch": 0.46732257965024826, "grad_norm": 0.5812539458274841, "learning_rate": 1e-05, "loss": 0.5972, "step": 3788 }, { "epoch": 0.46744594886346114, "grad_norm": 0.6698452830314636, "learning_rate": 1e-05, "loss": 0.7273, "step": 3789 }, { "epoch": 0.46756931807667396, "grad_norm": 0.5449016690254211, "learning_rate": 1e-05, "loss": 0.5391, "step": 3790 }, { "epoch": 0.46769268728988683, "grad_norm": 0.531154990196228, "learning_rate": 1e-05, "loss": 0.5408, "step": 3791 }, { "epoch": 0.46781605650309965, "grad_norm": 0.5731769800186157, "learning_rate": 1e-05, "loss": 0.6063, "step": 3792 }, { "epoch": 0.4679394257163125, "grad_norm": 0.5601376891136169, "learning_rate": 1e-05, "loss": 0.4977, "step": 3793 }, { "epoch": 0.46806279492952535, "grad_norm": 0.5745819807052612, "learning_rate": 1e-05, "loss": 0.6065, "step": 3794 }, { "epoch": 0.46818616414273817, "grad_norm": 0.625696063041687, "learning_rate": 1e-05, "loss": 0.6986, "step": 3795 }, { "epoch": 0.46830953335595105, "grad_norm": 0.5973599553108215, "learning_rate": 1e-05, "loss": 0.5825, "step": 3796 }, { "epoch": 0.46843290256916387, "grad_norm": 0.5980522036552429, "learning_rate": 1e-05, "loss": 0.5915, "step": 3797 }, { "epoch": 0.4685562717823767, "grad_norm": 0.5966833829879761, "learning_rate": 1e-05, "loss": 0.6693, "step": 3798 }, { "epoch": 0.46867964099558956, "grad_norm": 0.6525993943214417, "learning_rate": 1e-05, "loss": 0.6986, "step": 3799 }, { "epoch": 0.4688030102088024, "grad_norm": 0.5706292986869812, "learning_rate": 1e-05, "loss": 0.5538, "step": 3800 }, { "epoch": 0.46892637942201526, "grad_norm": 0.5130251049995422, "learning_rate": 1e-05, "loss": 0.4176, "step": 3801 }, { "epoch": 0.4690497486352281, "grad_norm": 0.5872904062271118, "learning_rate": 1e-05, "loss": 0.5322, "step": 3802 }, { "epoch": 0.4691731178484409, "grad_norm": 0.516865074634552, "learning_rate": 1e-05, "loss": 0.4251, "step": 3803 }, { "epoch": 0.4692964870616538, "grad_norm": 0.60463547706604, "learning_rate": 1e-05, "loss": 0.6421, "step": 3804 }, { "epoch": 0.4694198562748666, "grad_norm": 0.5109005570411682, "learning_rate": 1e-05, "loss": 0.5044, "step": 3805 }, { "epoch": 0.4695432254880795, "grad_norm": 0.5843003988265991, "learning_rate": 1e-05, "loss": 0.6179, "step": 3806 }, { "epoch": 0.4696665947012923, "grad_norm": 0.5350116491317749, "learning_rate": 1e-05, "loss": 0.499, "step": 3807 }, { "epoch": 0.4697899639145051, "grad_norm": 0.6255543828010559, "learning_rate": 1e-05, "loss": 0.7079, "step": 3808 }, { "epoch": 0.469913333127718, "grad_norm": 0.6211764216423035, "learning_rate": 1e-05, "loss": 0.5986, "step": 3809 }, { "epoch": 0.4700367023409308, "grad_norm": 0.5975675582885742, "learning_rate": 1e-05, "loss": 0.5896, "step": 3810 }, { "epoch": 0.4701600715541437, "grad_norm": 0.5378442406654358, "learning_rate": 1e-05, "loss": 0.5304, "step": 3811 }, { "epoch": 0.4702834407673565, "grad_norm": 0.5666934251785278, "learning_rate": 1e-05, "loss": 0.4635, "step": 3812 }, { "epoch": 0.47040680998056933, "grad_norm": 0.5826770067214966, "learning_rate": 1e-05, "loss": 0.6225, "step": 3813 }, { "epoch": 0.4705301791937822, "grad_norm": 0.5041036605834961, "learning_rate": 1e-05, "loss": 0.4697, "step": 3814 }, { "epoch": 0.470653548406995, "grad_norm": 0.7333880662918091, "learning_rate": 1e-05, "loss": 0.6591, "step": 3815 }, { "epoch": 0.4707769176202079, "grad_norm": 0.6220777630805969, "learning_rate": 1e-05, "loss": 0.5775, "step": 3816 }, { "epoch": 0.4709002868334207, "grad_norm": 0.5596961975097656, "learning_rate": 1e-05, "loss": 0.5442, "step": 3817 }, { "epoch": 0.47102365604663354, "grad_norm": 0.5953522324562073, "learning_rate": 1e-05, "loss": 0.5706, "step": 3818 }, { "epoch": 0.4711470252598464, "grad_norm": 0.5745567679405212, "learning_rate": 1e-05, "loss": 0.4705, "step": 3819 }, { "epoch": 0.47127039447305924, "grad_norm": 0.5068484544754028, "learning_rate": 1e-05, "loss": 0.4733, "step": 3820 }, { "epoch": 0.4713937636862721, "grad_norm": 0.583473265171051, "learning_rate": 1e-05, "loss": 0.5353, "step": 3821 }, { "epoch": 0.47151713289948494, "grad_norm": 0.5457908511161804, "learning_rate": 1e-05, "loss": 0.5433, "step": 3822 }, { "epoch": 0.47164050211269776, "grad_norm": 0.5645642280578613, "learning_rate": 1e-05, "loss": 0.5551, "step": 3823 }, { "epoch": 0.47176387132591063, "grad_norm": 0.5619671940803528, "learning_rate": 1e-05, "loss": 0.5589, "step": 3824 }, { "epoch": 0.47188724053912345, "grad_norm": 0.5797988772392273, "learning_rate": 1e-05, "loss": 0.5292, "step": 3825 }, { "epoch": 0.47201060975233633, "grad_norm": 0.5472936630249023, "learning_rate": 1e-05, "loss": 0.5506, "step": 3826 }, { "epoch": 0.47213397896554915, "grad_norm": 0.5417957305908203, "learning_rate": 1e-05, "loss": 0.5103, "step": 3827 }, { "epoch": 0.47225734817876197, "grad_norm": 0.5454949140548706, "learning_rate": 1e-05, "loss": 0.5052, "step": 3828 }, { "epoch": 0.47238071739197485, "grad_norm": 0.6731436252593994, "learning_rate": 1e-05, "loss": 0.7363, "step": 3829 }, { "epoch": 0.47250408660518767, "grad_norm": 0.5855904221534729, "learning_rate": 1e-05, "loss": 0.6507, "step": 3830 }, { "epoch": 0.47262745581840054, "grad_norm": 0.6334226727485657, "learning_rate": 1e-05, "loss": 0.647, "step": 3831 }, { "epoch": 0.47275082503161336, "grad_norm": 0.5688008666038513, "learning_rate": 1e-05, "loss": 0.5767, "step": 3832 }, { "epoch": 0.4728741942448262, "grad_norm": 0.5449799299240112, "learning_rate": 1e-05, "loss": 0.5303, "step": 3833 }, { "epoch": 0.47299756345803906, "grad_norm": 0.512398362159729, "learning_rate": 1e-05, "loss": 0.4436, "step": 3834 }, { "epoch": 0.4731209326712519, "grad_norm": 0.641777515411377, "learning_rate": 1e-05, "loss": 0.6815, "step": 3835 }, { "epoch": 0.47324430188446476, "grad_norm": 0.6210508942604065, "learning_rate": 1e-05, "loss": 0.6064, "step": 3836 }, { "epoch": 0.4733676710976776, "grad_norm": 0.5209712386131287, "learning_rate": 1e-05, "loss": 0.4886, "step": 3837 }, { "epoch": 0.4734910403108904, "grad_norm": 0.6073447465896606, "learning_rate": 1e-05, "loss": 0.5598, "step": 3838 }, { "epoch": 0.4736144095241033, "grad_norm": 0.5315760970115662, "learning_rate": 1e-05, "loss": 0.491, "step": 3839 }, { "epoch": 0.4737377787373161, "grad_norm": 0.5229793787002563, "learning_rate": 1e-05, "loss": 0.5272, "step": 3840 }, { "epoch": 0.47386114795052897, "grad_norm": 0.5352762341499329, "learning_rate": 1e-05, "loss": 0.5141, "step": 3841 }, { "epoch": 0.4739845171637418, "grad_norm": 0.5442870259284973, "learning_rate": 1e-05, "loss": 0.454, "step": 3842 }, { "epoch": 0.4741078863769546, "grad_norm": 0.5912232398986816, "learning_rate": 1e-05, "loss": 0.5631, "step": 3843 }, { "epoch": 0.4742312555901675, "grad_norm": 0.5634068846702576, "learning_rate": 1e-05, "loss": 0.5775, "step": 3844 }, { "epoch": 0.4743546248033803, "grad_norm": 0.5629028081893921, "learning_rate": 1e-05, "loss": 0.5939, "step": 3845 }, { "epoch": 0.4744779940165932, "grad_norm": 0.5576089024543762, "learning_rate": 1e-05, "loss": 0.6012, "step": 3846 }, { "epoch": 0.474601363229806, "grad_norm": 0.6119677424430847, "learning_rate": 1e-05, "loss": 0.5948, "step": 3847 }, { "epoch": 0.4747247324430188, "grad_norm": 0.5968068242073059, "learning_rate": 1e-05, "loss": 0.6021, "step": 3848 }, { "epoch": 0.4748481016562317, "grad_norm": 0.5891293883323669, "learning_rate": 1e-05, "loss": 0.6783, "step": 3849 }, { "epoch": 0.4749714708694445, "grad_norm": 0.6478825211524963, "learning_rate": 1e-05, "loss": 0.6456, "step": 3850 }, { "epoch": 0.4750948400826574, "grad_norm": 0.5932230353355408, "learning_rate": 1e-05, "loss": 0.5741, "step": 3851 }, { "epoch": 0.4752182092958702, "grad_norm": 0.62232506275177, "learning_rate": 1e-05, "loss": 0.6466, "step": 3852 }, { "epoch": 0.47534157850908304, "grad_norm": 0.48648831248283386, "learning_rate": 1e-05, "loss": 0.4627, "step": 3853 }, { "epoch": 0.4754649477222959, "grad_norm": 0.6061046719551086, "learning_rate": 1e-05, "loss": 0.5981, "step": 3854 }, { "epoch": 0.47558831693550874, "grad_norm": 0.5180764198303223, "learning_rate": 1e-05, "loss": 0.5393, "step": 3855 }, { "epoch": 0.4757116861487216, "grad_norm": 0.4993349015712738, "learning_rate": 1e-05, "loss": 0.4559, "step": 3856 }, { "epoch": 0.47583505536193443, "grad_norm": 0.6272590756416321, "learning_rate": 1e-05, "loss": 0.5778, "step": 3857 }, { "epoch": 0.47595842457514725, "grad_norm": 0.6603074669837952, "learning_rate": 1e-05, "loss": 0.5879, "step": 3858 }, { "epoch": 0.47608179378836013, "grad_norm": 0.6249369382858276, "learning_rate": 1e-05, "loss": 0.591, "step": 3859 }, { "epoch": 0.47620516300157295, "grad_norm": 0.6081451773643494, "learning_rate": 1e-05, "loss": 0.6772, "step": 3860 }, { "epoch": 0.4763285322147858, "grad_norm": 0.5915789604187012, "learning_rate": 1e-05, "loss": 0.6729, "step": 3861 }, { "epoch": 0.47645190142799865, "grad_norm": 0.5578958988189697, "learning_rate": 1e-05, "loss": 0.4731, "step": 3862 }, { "epoch": 0.47657527064121147, "grad_norm": 0.5815126895904541, "learning_rate": 1e-05, "loss": 0.646, "step": 3863 }, { "epoch": 0.47669863985442434, "grad_norm": 0.5778455138206482, "learning_rate": 1e-05, "loss": 0.5536, "step": 3864 }, { "epoch": 0.47682200906763716, "grad_norm": 0.5244335532188416, "learning_rate": 1e-05, "loss": 0.4497, "step": 3865 }, { "epoch": 0.47694537828085004, "grad_norm": 0.6451833248138428, "learning_rate": 1e-05, "loss": 0.6537, "step": 3866 }, { "epoch": 0.47706874749406286, "grad_norm": 0.6081408858299255, "learning_rate": 1e-05, "loss": 0.6274, "step": 3867 }, { "epoch": 0.4771921167072757, "grad_norm": 0.5855705142021179, "learning_rate": 1e-05, "loss": 0.5337, "step": 3868 }, { "epoch": 0.47731548592048856, "grad_norm": 0.6594080924987793, "learning_rate": 1e-05, "loss": 0.7162, "step": 3869 }, { "epoch": 0.4774388551337014, "grad_norm": 0.5590198636054993, "learning_rate": 1e-05, "loss": 0.5824, "step": 3870 }, { "epoch": 0.47756222434691425, "grad_norm": 0.5306087732315063, "learning_rate": 1e-05, "loss": 0.4769, "step": 3871 }, { "epoch": 0.4776855935601271, "grad_norm": 0.5886354446411133, "learning_rate": 1e-05, "loss": 0.5501, "step": 3872 }, { "epoch": 0.4778089627733399, "grad_norm": 0.5770297646522522, "learning_rate": 1e-05, "loss": 0.5594, "step": 3873 }, { "epoch": 0.47793233198655277, "grad_norm": 0.6015908122062683, "learning_rate": 1e-05, "loss": 0.5859, "step": 3874 }, { "epoch": 0.4780557011997656, "grad_norm": 0.4878849685192108, "learning_rate": 1e-05, "loss": 0.4272, "step": 3875 }, { "epoch": 0.47817907041297847, "grad_norm": 0.6121713519096375, "learning_rate": 1e-05, "loss": 0.5503, "step": 3876 }, { "epoch": 0.4783024396261913, "grad_norm": 0.5855767726898193, "learning_rate": 1e-05, "loss": 0.5933, "step": 3877 }, { "epoch": 0.4784258088394041, "grad_norm": 0.5999166965484619, "learning_rate": 1e-05, "loss": 0.619, "step": 3878 }, { "epoch": 0.478549178052617, "grad_norm": 0.618480920791626, "learning_rate": 1e-05, "loss": 0.6863, "step": 3879 }, { "epoch": 0.4786725472658298, "grad_norm": 0.5582593083381653, "learning_rate": 1e-05, "loss": 0.4675, "step": 3880 }, { "epoch": 0.4787959164790427, "grad_norm": 0.4778631925582886, "learning_rate": 1e-05, "loss": 0.4193, "step": 3881 }, { "epoch": 0.4789192856922555, "grad_norm": 0.6587344408035278, "learning_rate": 1e-05, "loss": 0.5624, "step": 3882 }, { "epoch": 0.4790426549054683, "grad_norm": 0.5639616847038269, "learning_rate": 1e-05, "loss": 0.6074, "step": 3883 }, { "epoch": 0.4791660241186812, "grad_norm": 0.5836836695671082, "learning_rate": 1e-05, "loss": 0.5245, "step": 3884 }, { "epoch": 0.479289393331894, "grad_norm": 0.6254863739013672, "learning_rate": 1e-05, "loss": 0.5752, "step": 3885 }, { "epoch": 0.47941276254510684, "grad_norm": 0.5690606236457825, "learning_rate": 1e-05, "loss": 0.5286, "step": 3886 }, { "epoch": 0.4795361317583197, "grad_norm": 0.6255609393119812, "learning_rate": 1e-05, "loss": 0.5397, "step": 3887 }, { "epoch": 0.47965950097153254, "grad_norm": 0.5117272734642029, "learning_rate": 1e-05, "loss": 0.4843, "step": 3888 }, { "epoch": 0.4797828701847454, "grad_norm": 0.5470507144927979, "learning_rate": 1e-05, "loss": 0.529, "step": 3889 }, { "epoch": 0.47990623939795823, "grad_norm": 0.5568363666534424, "learning_rate": 1e-05, "loss": 0.4889, "step": 3890 }, { "epoch": 0.48002960861117105, "grad_norm": 0.678314208984375, "learning_rate": 1e-05, "loss": 0.6672, "step": 3891 }, { "epoch": 0.48015297782438393, "grad_norm": 0.5195672512054443, "learning_rate": 1e-05, "loss": 0.4905, "step": 3892 }, { "epoch": 0.48027634703759675, "grad_norm": 0.5729072690010071, "learning_rate": 1e-05, "loss": 0.6042, "step": 3893 }, { "epoch": 0.4803997162508096, "grad_norm": 0.5698903799057007, "learning_rate": 1e-05, "loss": 0.4905, "step": 3894 }, { "epoch": 0.48052308546402245, "grad_norm": 0.5922335982322693, "learning_rate": 1e-05, "loss": 0.6403, "step": 3895 }, { "epoch": 0.48064645467723527, "grad_norm": 0.5964649319648743, "learning_rate": 1e-05, "loss": 0.5539, "step": 3896 }, { "epoch": 0.48076982389044814, "grad_norm": 0.5661703944206238, "learning_rate": 1e-05, "loss": 0.5523, "step": 3897 }, { "epoch": 0.48089319310366097, "grad_norm": 0.6081989407539368, "learning_rate": 1e-05, "loss": 0.6781, "step": 3898 }, { "epoch": 0.48101656231687384, "grad_norm": 0.6055293679237366, "learning_rate": 1e-05, "loss": 0.6686, "step": 3899 }, { "epoch": 0.48113993153008666, "grad_norm": 0.5308582186698914, "learning_rate": 1e-05, "loss": 0.4728, "step": 3900 }, { "epoch": 0.4812633007432995, "grad_norm": 0.5539143085479736, "learning_rate": 1e-05, "loss": 0.5201, "step": 3901 }, { "epoch": 0.48138666995651236, "grad_norm": 0.55086350440979, "learning_rate": 1e-05, "loss": 0.4792, "step": 3902 }, { "epoch": 0.4815100391697252, "grad_norm": 0.5677441954612732, "learning_rate": 1e-05, "loss": 0.5504, "step": 3903 }, { "epoch": 0.48163340838293806, "grad_norm": 0.5490809679031372, "learning_rate": 1e-05, "loss": 0.552, "step": 3904 }, { "epoch": 0.4817567775961509, "grad_norm": 0.587868332862854, "learning_rate": 1e-05, "loss": 0.6015, "step": 3905 }, { "epoch": 0.4818801468093637, "grad_norm": 0.594247043132782, "learning_rate": 1e-05, "loss": 0.6053, "step": 3906 }, { "epoch": 0.4820035160225766, "grad_norm": 0.5411868691444397, "learning_rate": 1e-05, "loss": 0.5338, "step": 3907 }, { "epoch": 0.4821268852357894, "grad_norm": 0.5301688313484192, "learning_rate": 1e-05, "loss": 0.5723, "step": 3908 }, { "epoch": 0.48225025444900227, "grad_norm": 0.5713765025138855, "learning_rate": 1e-05, "loss": 0.608, "step": 3909 }, { "epoch": 0.4823736236622151, "grad_norm": 0.5648901462554932, "learning_rate": 1e-05, "loss": 0.556, "step": 3910 }, { "epoch": 0.4824969928754279, "grad_norm": 0.5474717020988464, "learning_rate": 1e-05, "loss": 0.5525, "step": 3911 }, { "epoch": 0.4826203620886408, "grad_norm": 0.5631597638130188, "learning_rate": 1e-05, "loss": 0.5915, "step": 3912 }, { "epoch": 0.4827437313018536, "grad_norm": 0.5340496301651001, "learning_rate": 1e-05, "loss": 0.4808, "step": 3913 }, { "epoch": 0.4828671005150665, "grad_norm": 0.5501688122749329, "learning_rate": 1e-05, "loss": 0.5793, "step": 3914 }, { "epoch": 0.4829904697282793, "grad_norm": 0.9822888970375061, "learning_rate": 1e-05, "loss": 0.5801, "step": 3915 }, { "epoch": 0.4831138389414921, "grad_norm": 0.5755778551101685, "learning_rate": 1e-05, "loss": 0.5801, "step": 3916 }, { "epoch": 0.483237208154705, "grad_norm": 0.5793763399124146, "learning_rate": 1e-05, "loss": 0.5723, "step": 3917 }, { "epoch": 0.4833605773679178, "grad_norm": 0.6422014832496643, "learning_rate": 1e-05, "loss": 0.5731, "step": 3918 }, { "epoch": 0.4834839465811307, "grad_norm": 0.6579854488372803, "learning_rate": 1e-05, "loss": 0.6186, "step": 3919 }, { "epoch": 0.4836073157943435, "grad_norm": 0.590503990650177, "learning_rate": 1e-05, "loss": 0.6187, "step": 3920 }, { "epoch": 0.48373068500755634, "grad_norm": 0.5377308130264282, "learning_rate": 1e-05, "loss": 0.522, "step": 3921 }, { "epoch": 0.4838540542207692, "grad_norm": 0.5852921605110168, "learning_rate": 1e-05, "loss": 0.5666, "step": 3922 }, { "epoch": 0.48397742343398203, "grad_norm": 0.5911467671394348, "learning_rate": 1e-05, "loss": 0.6018, "step": 3923 }, { "epoch": 0.4841007926471949, "grad_norm": 0.5176968574523926, "learning_rate": 1e-05, "loss": 0.5762, "step": 3924 }, { "epoch": 0.48422416186040773, "grad_norm": 0.4765843451023102, "learning_rate": 1e-05, "loss": 0.4114, "step": 3925 }, { "epoch": 0.48434753107362055, "grad_norm": 0.5068493485450745, "learning_rate": 1e-05, "loss": 0.4509, "step": 3926 }, { "epoch": 0.48447090028683343, "grad_norm": 0.6466670632362366, "learning_rate": 1e-05, "loss": 0.6717, "step": 3927 }, { "epoch": 0.48459426950004625, "grad_norm": 0.5433177351951599, "learning_rate": 1e-05, "loss": 0.4943, "step": 3928 }, { "epoch": 0.4847176387132591, "grad_norm": 0.49739545583724976, "learning_rate": 1e-05, "loss": 0.4009, "step": 3929 }, { "epoch": 0.48484100792647195, "grad_norm": 0.5725671052932739, "learning_rate": 1e-05, "loss": 0.6835, "step": 3930 }, { "epoch": 0.48496437713968477, "grad_norm": 0.5122892260551453, "learning_rate": 1e-05, "loss": 0.5213, "step": 3931 }, { "epoch": 0.48508774635289764, "grad_norm": 0.606503963470459, "learning_rate": 1e-05, "loss": 0.5613, "step": 3932 }, { "epoch": 0.48521111556611046, "grad_norm": 0.5562818050384521, "learning_rate": 1e-05, "loss": 0.6143, "step": 3933 }, { "epoch": 0.48533448477932334, "grad_norm": 0.5835887789726257, "learning_rate": 1e-05, "loss": 0.4529, "step": 3934 }, { "epoch": 0.48545785399253616, "grad_norm": 0.48635587096214294, "learning_rate": 1e-05, "loss": 0.5081, "step": 3935 }, { "epoch": 0.485581223205749, "grad_norm": 0.596322774887085, "learning_rate": 1e-05, "loss": 0.616, "step": 3936 }, { "epoch": 0.48570459241896186, "grad_norm": 0.6094297766685486, "learning_rate": 1e-05, "loss": 0.5592, "step": 3937 }, { "epoch": 0.4858279616321747, "grad_norm": 0.5521395802497864, "learning_rate": 1e-05, "loss": 0.5042, "step": 3938 }, { "epoch": 0.48595133084538755, "grad_norm": 0.5418375730514526, "learning_rate": 1e-05, "loss": 0.5427, "step": 3939 }, { "epoch": 0.4860747000586004, "grad_norm": 0.5830357670783997, "learning_rate": 1e-05, "loss": 0.5535, "step": 3940 }, { "epoch": 0.4861980692718132, "grad_norm": 0.6318268179893494, "learning_rate": 1e-05, "loss": 0.5542, "step": 3941 }, { "epoch": 0.48632143848502607, "grad_norm": 0.5620795488357544, "learning_rate": 1e-05, "loss": 0.555, "step": 3942 }, { "epoch": 0.4864448076982389, "grad_norm": 0.5848623514175415, "learning_rate": 1e-05, "loss": 0.5887, "step": 3943 }, { "epoch": 0.48656817691145177, "grad_norm": 0.6372314095497131, "learning_rate": 1e-05, "loss": 0.6002, "step": 3944 }, { "epoch": 0.4866915461246646, "grad_norm": 0.625250518321991, "learning_rate": 1e-05, "loss": 0.5893, "step": 3945 }, { "epoch": 0.4868149153378774, "grad_norm": 0.5552360415458679, "learning_rate": 1e-05, "loss": 0.5381, "step": 3946 }, { "epoch": 0.4869382845510903, "grad_norm": 0.5717198848724365, "learning_rate": 1e-05, "loss": 0.6134, "step": 3947 }, { "epoch": 0.4870616537643031, "grad_norm": 0.5741493105888367, "learning_rate": 1e-05, "loss": 0.5731, "step": 3948 }, { "epoch": 0.487185022977516, "grad_norm": 0.6172621250152588, "learning_rate": 1e-05, "loss": 0.6419, "step": 3949 }, { "epoch": 0.4873083921907288, "grad_norm": 0.5633589029312134, "learning_rate": 1e-05, "loss": 0.5564, "step": 3950 }, { "epoch": 0.4874317614039416, "grad_norm": 0.5322867631912231, "learning_rate": 1e-05, "loss": 0.5604, "step": 3951 }, { "epoch": 0.4875551306171545, "grad_norm": 0.6702197194099426, "learning_rate": 1e-05, "loss": 0.668, "step": 3952 }, { "epoch": 0.4876784998303673, "grad_norm": 0.566273033618927, "learning_rate": 1e-05, "loss": 0.6136, "step": 3953 }, { "epoch": 0.4878018690435802, "grad_norm": 0.5483869910240173, "learning_rate": 1e-05, "loss": 0.5058, "step": 3954 }, { "epoch": 0.487925238256793, "grad_norm": 0.6721545457839966, "learning_rate": 1e-05, "loss": 0.6279, "step": 3955 }, { "epoch": 0.48804860747000584, "grad_norm": 0.5607666969299316, "learning_rate": 1e-05, "loss": 0.6128, "step": 3956 }, { "epoch": 0.4881719766832187, "grad_norm": 0.588797390460968, "learning_rate": 1e-05, "loss": 0.5972, "step": 3957 }, { "epoch": 0.48829534589643153, "grad_norm": 0.5137364268302917, "learning_rate": 1e-05, "loss": 0.4851, "step": 3958 }, { "epoch": 0.4884187151096444, "grad_norm": 0.572638213634491, "learning_rate": 1e-05, "loss": 0.6026, "step": 3959 }, { "epoch": 0.48854208432285723, "grad_norm": 0.5634315609931946, "learning_rate": 1e-05, "loss": 0.6081, "step": 3960 }, { "epoch": 0.48866545353607005, "grad_norm": 0.643621027469635, "learning_rate": 1e-05, "loss": 0.5466, "step": 3961 }, { "epoch": 0.4887888227492829, "grad_norm": 0.644517183303833, "learning_rate": 1e-05, "loss": 0.6022, "step": 3962 }, { "epoch": 0.48891219196249575, "grad_norm": 0.5785015821456909, "learning_rate": 1e-05, "loss": 0.4716, "step": 3963 }, { "epoch": 0.4890355611757086, "grad_norm": 0.5731491446495056, "learning_rate": 1e-05, "loss": 0.5517, "step": 3964 }, { "epoch": 0.48915893038892144, "grad_norm": 0.5405590534210205, "learning_rate": 1e-05, "loss": 0.554, "step": 3965 }, { "epoch": 0.48928229960213426, "grad_norm": 0.7509252429008484, "learning_rate": 1e-05, "loss": 0.7962, "step": 3966 }, { "epoch": 0.48940566881534714, "grad_norm": 0.5904772281646729, "learning_rate": 1e-05, "loss": 0.6342, "step": 3967 }, { "epoch": 0.48952903802855996, "grad_norm": 0.4862602949142456, "learning_rate": 1e-05, "loss": 0.4155, "step": 3968 }, { "epoch": 0.48965240724177284, "grad_norm": 0.5372475385665894, "learning_rate": 1e-05, "loss": 0.5481, "step": 3969 }, { "epoch": 0.48977577645498566, "grad_norm": 0.5771114230155945, "learning_rate": 1e-05, "loss": 0.658, "step": 3970 }, { "epoch": 0.4898991456681985, "grad_norm": 0.5110951662063599, "learning_rate": 1e-05, "loss": 0.4574, "step": 3971 }, { "epoch": 0.49002251488141135, "grad_norm": 0.5611343383789062, "learning_rate": 1e-05, "loss": 0.618, "step": 3972 }, { "epoch": 0.4901458840946242, "grad_norm": 0.5808870196342468, "learning_rate": 1e-05, "loss": 0.5985, "step": 3973 }, { "epoch": 0.49026925330783705, "grad_norm": 0.531268298625946, "learning_rate": 1e-05, "loss": 0.5414, "step": 3974 }, { "epoch": 0.49039262252104987, "grad_norm": 0.4897632300853729, "learning_rate": 1e-05, "loss": 0.4726, "step": 3975 }, { "epoch": 0.4905159917342627, "grad_norm": 0.5818015336990356, "learning_rate": 1e-05, "loss": 0.5669, "step": 3976 }, { "epoch": 0.49063936094747557, "grad_norm": 0.5318160057067871, "learning_rate": 1e-05, "loss": 0.5367, "step": 3977 }, { "epoch": 0.4907627301606884, "grad_norm": 0.6185823082923889, "learning_rate": 1e-05, "loss": 0.6869, "step": 3978 }, { "epoch": 0.49088609937390126, "grad_norm": 0.5890626311302185, "learning_rate": 1e-05, "loss": 0.6713, "step": 3979 }, { "epoch": 0.4910094685871141, "grad_norm": 0.49003639817237854, "learning_rate": 1e-05, "loss": 0.4803, "step": 3980 }, { "epoch": 0.4911328378003269, "grad_norm": 0.5622575879096985, "learning_rate": 1e-05, "loss": 0.5786, "step": 3981 }, { "epoch": 0.4912562070135398, "grad_norm": 0.6334102153778076, "learning_rate": 1e-05, "loss": 0.6013, "step": 3982 }, { "epoch": 0.4913795762267526, "grad_norm": 0.5786693096160889, "learning_rate": 1e-05, "loss": 0.5874, "step": 3983 }, { "epoch": 0.4915029454399655, "grad_norm": 0.6235138177871704, "learning_rate": 1e-05, "loss": 0.6149, "step": 3984 }, { "epoch": 0.4916263146531783, "grad_norm": 0.5736632943153381, "learning_rate": 1e-05, "loss": 0.6133, "step": 3985 }, { "epoch": 0.4917496838663911, "grad_norm": 0.549576461315155, "learning_rate": 1e-05, "loss": 0.5158, "step": 3986 }, { "epoch": 0.491873053079604, "grad_norm": 0.6159964799880981, "learning_rate": 1e-05, "loss": 0.6642, "step": 3987 }, { "epoch": 0.4919964222928168, "grad_norm": 0.5639529824256897, "learning_rate": 1e-05, "loss": 0.5121, "step": 3988 }, { "epoch": 0.4921197915060297, "grad_norm": 0.5953075885772705, "learning_rate": 1e-05, "loss": 0.5672, "step": 3989 }, { "epoch": 0.4922431607192425, "grad_norm": 0.5094324350357056, "learning_rate": 1e-05, "loss": 0.4805, "step": 3990 }, { "epoch": 0.49236652993245533, "grad_norm": 0.49990588426589966, "learning_rate": 1e-05, "loss": 0.4411, "step": 3991 }, { "epoch": 0.4924898991456682, "grad_norm": 0.509099006652832, "learning_rate": 1e-05, "loss": 0.4815, "step": 3992 }, { "epoch": 0.49261326835888103, "grad_norm": 0.6317893862724304, "learning_rate": 1e-05, "loss": 0.6485, "step": 3993 }, { "epoch": 0.4927366375720939, "grad_norm": 0.5473777055740356, "learning_rate": 1e-05, "loss": 0.5405, "step": 3994 }, { "epoch": 0.4928600067853067, "grad_norm": 0.5261601209640503, "learning_rate": 1e-05, "loss": 0.5324, "step": 3995 }, { "epoch": 0.49298337599851955, "grad_norm": 0.585757851600647, "learning_rate": 1e-05, "loss": 0.5947, "step": 3996 }, { "epoch": 0.4931067452117324, "grad_norm": 0.6224164366722107, "learning_rate": 1e-05, "loss": 0.6433, "step": 3997 }, { "epoch": 0.49323011442494524, "grad_norm": 0.5795688629150391, "learning_rate": 1e-05, "loss": 0.5724, "step": 3998 }, { "epoch": 0.4933534836381581, "grad_norm": 0.588766872882843, "learning_rate": 1e-05, "loss": 0.5865, "step": 3999 }, { "epoch": 0.49347685285137094, "grad_norm": 0.6049856543540955, "learning_rate": 1e-05, "loss": 0.691, "step": 4000 }, { "epoch": 0.49360022206458376, "grad_norm": 0.5678289532661438, "learning_rate": 1e-05, "loss": 0.6022, "step": 4001 }, { "epoch": 0.49372359127779664, "grad_norm": 0.5551800727844238, "learning_rate": 1e-05, "loss": 0.5832, "step": 4002 }, { "epoch": 0.49384696049100946, "grad_norm": 0.5698062777519226, "learning_rate": 1e-05, "loss": 0.6749, "step": 4003 }, { "epoch": 0.49397032970422233, "grad_norm": 0.6104639768600464, "learning_rate": 1e-05, "loss": 0.5984, "step": 4004 }, { "epoch": 0.49409369891743515, "grad_norm": 0.5954513549804688, "learning_rate": 1e-05, "loss": 0.6055, "step": 4005 }, { "epoch": 0.494217068130648, "grad_norm": 0.4902303218841553, "learning_rate": 1e-05, "loss": 0.5145, "step": 4006 }, { "epoch": 0.49434043734386085, "grad_norm": 0.5680736303329468, "learning_rate": 1e-05, "loss": 0.6584, "step": 4007 }, { "epoch": 0.49446380655707367, "grad_norm": 0.5557247996330261, "learning_rate": 1e-05, "loss": 0.586, "step": 4008 }, { "epoch": 0.49458717577028655, "grad_norm": 0.5839394927024841, "learning_rate": 1e-05, "loss": 0.6176, "step": 4009 }, { "epoch": 0.49471054498349937, "grad_norm": 0.5597246289253235, "learning_rate": 1e-05, "loss": 0.508, "step": 4010 }, { "epoch": 0.4948339141967122, "grad_norm": 0.6512753367424011, "learning_rate": 1e-05, "loss": 0.7043, "step": 4011 }, { "epoch": 0.49495728340992506, "grad_norm": 0.5736011862754822, "learning_rate": 1e-05, "loss": 0.669, "step": 4012 }, { "epoch": 0.4950806526231379, "grad_norm": 0.508765459060669, "learning_rate": 1e-05, "loss": 0.5143, "step": 4013 }, { "epoch": 0.49520402183635076, "grad_norm": 0.6495784521102905, "learning_rate": 1e-05, "loss": 0.5715, "step": 4014 }, { "epoch": 0.4953273910495636, "grad_norm": 0.5555331110954285, "learning_rate": 1e-05, "loss": 0.492, "step": 4015 }, { "epoch": 0.4954507602627764, "grad_norm": 0.5815754532814026, "learning_rate": 1e-05, "loss": 0.489, "step": 4016 }, { "epoch": 0.4955741294759893, "grad_norm": 0.5280928611755371, "learning_rate": 1e-05, "loss": 0.5083, "step": 4017 }, { "epoch": 0.4956974986892021, "grad_norm": 0.6446611881256104, "learning_rate": 1e-05, "loss": 0.679, "step": 4018 }, { "epoch": 0.495820867902415, "grad_norm": 0.6615819334983826, "learning_rate": 1e-05, "loss": 0.5039, "step": 4019 }, { "epoch": 0.4959442371156278, "grad_norm": 0.5584143400192261, "learning_rate": 1e-05, "loss": 0.5089, "step": 4020 }, { "epoch": 0.4960676063288406, "grad_norm": 0.5481278300285339, "learning_rate": 1e-05, "loss": 0.5361, "step": 4021 }, { "epoch": 0.4961909755420535, "grad_norm": 0.5700264573097229, "learning_rate": 1e-05, "loss": 0.4929, "step": 4022 }, { "epoch": 0.4963143447552663, "grad_norm": 0.6371064186096191, "learning_rate": 1e-05, "loss": 0.6162, "step": 4023 }, { "epoch": 0.4964377139684792, "grad_norm": 0.6325159072875977, "learning_rate": 1e-05, "loss": 0.7247, "step": 4024 }, { "epoch": 0.496561083181692, "grad_norm": 0.5606503486633301, "learning_rate": 1e-05, "loss": 0.6078, "step": 4025 }, { "epoch": 0.49668445239490483, "grad_norm": 0.5506722331047058, "learning_rate": 1e-05, "loss": 0.562, "step": 4026 }, { "epoch": 0.4968078216081177, "grad_norm": 0.528823971748352, "learning_rate": 1e-05, "loss": 0.5059, "step": 4027 }, { "epoch": 0.4969311908213305, "grad_norm": 0.600772500038147, "learning_rate": 1e-05, "loss": 0.5307, "step": 4028 }, { "epoch": 0.4970545600345434, "grad_norm": 0.664779543876648, "learning_rate": 1e-05, "loss": 0.5954, "step": 4029 }, { "epoch": 0.4971779292477562, "grad_norm": 0.5453622937202454, "learning_rate": 1e-05, "loss": 0.4922, "step": 4030 }, { "epoch": 0.49730129846096904, "grad_norm": 0.561044454574585, "learning_rate": 1e-05, "loss": 0.6456, "step": 4031 }, { "epoch": 0.4974246676741819, "grad_norm": 0.7705190181732178, "learning_rate": 1e-05, "loss": 0.6844, "step": 4032 }, { "epoch": 0.49754803688739474, "grad_norm": 0.5785367488861084, "learning_rate": 1e-05, "loss": 0.6456, "step": 4033 }, { "epoch": 0.4976714061006076, "grad_norm": 0.5590353012084961, "learning_rate": 1e-05, "loss": 0.574, "step": 4034 }, { "epoch": 0.49779477531382044, "grad_norm": 0.6209882497787476, "learning_rate": 1e-05, "loss": 0.7038, "step": 4035 }, { "epoch": 0.49791814452703326, "grad_norm": 0.5958069562911987, "learning_rate": 1e-05, "loss": 0.5687, "step": 4036 }, { "epoch": 0.49804151374024613, "grad_norm": 0.5695993304252625, "learning_rate": 1e-05, "loss": 0.6065, "step": 4037 }, { "epoch": 0.49816488295345895, "grad_norm": 0.5563250780105591, "learning_rate": 1e-05, "loss": 0.6141, "step": 4038 }, { "epoch": 0.49828825216667183, "grad_norm": 0.5136759281158447, "learning_rate": 1e-05, "loss": 0.5359, "step": 4039 }, { "epoch": 0.49841162137988465, "grad_norm": 0.5283939838409424, "learning_rate": 1e-05, "loss": 0.5229, "step": 4040 }, { "epoch": 0.49853499059309747, "grad_norm": 0.6041221618652344, "learning_rate": 1e-05, "loss": 0.652, "step": 4041 }, { "epoch": 0.49865835980631035, "grad_norm": 0.6217100024223328, "learning_rate": 1e-05, "loss": 0.6105, "step": 4042 }, { "epoch": 0.49878172901952317, "grad_norm": 0.5270769000053406, "learning_rate": 1e-05, "loss": 0.571, "step": 4043 }, { "epoch": 0.49890509823273604, "grad_norm": 0.5667478442192078, "learning_rate": 1e-05, "loss": 0.532, "step": 4044 }, { "epoch": 0.49902846744594886, "grad_norm": 0.576387882232666, "learning_rate": 1e-05, "loss": 0.5563, "step": 4045 }, { "epoch": 0.4991518366591617, "grad_norm": 0.5933818221092224, "learning_rate": 1e-05, "loss": 0.5585, "step": 4046 }, { "epoch": 0.49927520587237456, "grad_norm": 0.5220391750335693, "learning_rate": 1e-05, "loss": 0.4911, "step": 4047 }, { "epoch": 0.4993985750855874, "grad_norm": 0.6556411981582642, "learning_rate": 1e-05, "loss": 0.6942, "step": 4048 }, { "epoch": 0.49952194429880026, "grad_norm": 0.5293809771537781, "learning_rate": 1e-05, "loss": 0.5143, "step": 4049 }, { "epoch": 0.4996453135120131, "grad_norm": 0.6150904893875122, "learning_rate": 1e-05, "loss": 0.6045, "step": 4050 }, { "epoch": 0.4997686827252259, "grad_norm": 0.5558447241783142, "learning_rate": 1e-05, "loss": 0.5701, "step": 4051 }, { "epoch": 0.4998920519384388, "grad_norm": 0.6183029413223267, "learning_rate": 1e-05, "loss": 0.6064, "step": 4052 }, { "epoch": 0.5000154211516517, "grad_norm": 0.6348970532417297, "learning_rate": 1e-05, "loss": 0.7744, "step": 4053 } ], "logging_steps": 1, "max_steps": 8105, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4053, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.930358073545877e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }